[PATCH] Add extra statistics to explain for Nested Loop

Started by Nonameabout 5 years ago33 messages
#1Noname
e.sokolova@postgrespro.ru
2 attachment(s)

Hi, hackers.
For some distributions of data in tables, different loops in nested loop
joins can take different time and process different amounts of entries.
It makes average statistics returned by explain analyze not very useful
for DBA.
To fix it, here is the patch that add printing of min and max statistics
for time and rows across all loops in Nested Loop to EXPLAIN ANALYSE.
Please don't hesitate to share any thoughts on this topic!
--
Ekaterina Sokolova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Attachments:

extra_statistics_v0.patchtext/x-diff; name=extra_statistics_v0.patchDownload
From 7871ac1afe7837a6dc0676a6c9819cc68a5c0f07 Mon Sep 17 00:00:00 2001
From: "e.sokolova" <e.sokolova@postgrespro.ru>
Date: Fri, 4 Sep 2020 18:00:47 +0300
Subject: Add min and max statistics without case of
 parallel workers. Tags: commitfest_hotfix.

---
 src/backend/commands/explain.c    | 19 +++++++++++++++----
 src/backend/executor/instrument.c | 13 +++++++++++++
 src/include/executor/instrument.h |  4 ++++
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index c98c9b5547c..a10a2cfdfee 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1571,17 +1571,21 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
 		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
 		double		rows = planstate->instrument->ntuples / nloops;
+		double      min_r = planstate->instrument->min_tuples;
+        double      max_r = planstate->instrument->max_tuples;
+		double      min_t_ms = 1000.0 * planstate->instrument->min_t;
+		double      max_t_ms = 1000.0 * planstate->instrument->max_t;
 
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
 			if (es->timing)
 				appendStringInfo(es->str,
-								 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
-								 startup_ms, total_ms, rows, nloops);
+								 " (actual time=%.3f..%.3f min time %.3f max time %.3f rows=%.0f min rows %.0f max rows %.0f loops=%.0f)",
+								 startup_ms, total_ms, min_t_ms, min_t_ms, rows, min_r, max_r, nloops);
 			else
 				appendStringInfo(es->str,
-								 " (actual rows=%.0f loops=%.0f)",
-								 rows, nloops);
+								 " (actual rows=%.0f min rows %.0f max rows %.0f loops=%.0f)",
+								 rows, min_r, max_r, nloops);
 		}
 		else
 		{
@@ -1589,10 +1593,16 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			{
 				ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
 									 3, es);
+                ExplainPropertyFloat("Min Time", "s", min_t_ms,
+                                     3, es);
 				ExplainPropertyFloat("Actual Total Time", "s", total_ms,
 									 3, es);
+                ExplainPropertyFloat("Max Time", "s", max_t_ms,
+                                     3, es);
 			}
+            ExplainPropertyFloat("Min Rows", NULL, rows, 0, es);
 			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+            ExplainPropertyFloat("Max Rows", NULL, rows, 0, es);
 			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 		}
 	}
@@ -1602,6 +1612,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			appendStringInfoString(es->str, " (never executed)");
 		else
 		{
+		    // without min and max values because actual result is 0
 			if (es->timing)
 			{
 				ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index fbedb5aaf60..a46201c9284 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -134,7 +134,20 @@ InstrEndLoop(Instrumentation *instr)
 
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
+	if (instr->nloops == 0)                     // this is first loop
+    {
+        instr->min_t = totaltime;               // init min_t
+        instr->min_tuples = instr->tuplecount;  // init min_tuples
+    }
+	if (instr->min_t > totaltime)
+	    instr->min_t = totaltime;
+	if (instr->max_t < totaltime)
+        instr->max_t = totaltime;
 	instr->ntuples += instr->tuplecount;
+    if (instr->min_tuples > instr->tuplecount)
+        instr->min_tuples = instr->tuplecount;
+    if (instr->max_tuples < instr->tuplecount)
+        instr->max_tuples = instr->tuplecount;
 	instr->nloops += 1;
 
 	/* Reset for next cycle (if any) */
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index 9dc3ecb07d7..eb95bbcbd9a 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -66,7 +66,11 @@ typedef struct Instrumentation
 	/* Accumulated statistics across all completed cycles: */
 	double		startup;		/* total startup time (in seconds) */
 	double		total;			/* total time (in seconds) */
+    double      min_t;          /* time of fastest loop (in seconds) */
+    double      max_t;          /* time of slowest loop (in seconds) */
 	double		ntuples;		/* total tuples produced */
+    double      min_tuples;    /* min counter of produced tuples for all loops */
+    double      max_tuples;    /* max counter of produced tuples for all loops */
 	double		ntuples2;		/* secondary node-specific tuple counter */
 	double		nloops;			/* # of run cycles for this node */
 	double		nfiltered1;		/* # of tuples removed by scanqual or joinqual */
-- 
2.26.0


From ebdfe117e4074d268e3e7c480b98d375d1d6f62b Mon Sep 17 00:00:00 2001
From: "e.sokolova" <e.sokolova@postgrespro.ru>
Date: Fri, 11 Sep 2020 23:04:34 +0300
Subject: Add case of parallel workers. Tags:
 commitfest_hotfix.

---
 src/backend/commands/explain.c    | 36 +++++++++++++++++++++----------
 src/backend/executor/instrument.c |  6 +++---
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index a10a2cfdfee..b60d17542d5 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1580,12 +1580,12 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		{
 			if (es->timing)
 				appendStringInfo(es->str,
-								 " (actual time=%.3f..%.3f min time %.3f max time %.3f rows=%.0f min rows %.0f max rows %.0f loops=%.0f)",
-								 startup_ms, total_ms, min_t_ms, min_t_ms, rows, min_r, max_r, nloops);
+								 " (actual time=%.3f..%.3f min_time=%.3f max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
+								 startup_ms, total_ms, min_t_ms, max_t_ms, min_r, rows, max_r, nloops);
 			else
 				appendStringInfo(es->str,
-								 " (actual rows=%.0f min rows %.0f max rows %.0f loops=%.0f)",
-								 rows, min_r, max_r, nloops);
+								 " (actual min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
+                                 min_r, rows, max_r, nloops);
 		}
 		else
 		{
@@ -1593,10 +1593,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			{
 				ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
 									 3, es);
-                ExplainPropertyFloat("Min Time", "s", min_t_ms,
-                                     3, es);
 				ExplainPropertyFloat("Actual Total Time", "s", total_ms,
 									 3, es);
+                ExplainPropertyFloat("Min Time", "s", min_t_ms,
+                                     3, es);
                 ExplainPropertyFloat("Max Time", "s", max_t_ms,
                                      3, es);
 			}
@@ -1612,7 +1612,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			appendStringInfoString(es->str, " (never executed)");
 		else
 		{
-		    // without min and max values because actual result is 0
+		    /* without min and max values because actual result is 0 */
 			if (es->timing)
 			{
 				ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
@@ -1638,13 +1638,21 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			double		nloops = instrument->nloops;
 			double		startup_ms;
 			double		total_ms;
+            double      min_t_ms;
+            double      max_t_ms;
 			double		rows;
+            double      min_r;
+            double      max_r;
 
 			if (nloops <= 0)
 				continue;
 			startup_ms = 1000.0 * instrument->startup / nloops;
 			total_ms = 1000.0 * instrument->total / nloops;
+            min_t_ms = 1000.0 * planstate->instrument->min_t;
+            max_t_ms = 1000.0 * planstate->instrument->max_t;
 			rows = instrument->ntuples / nloops;
+            min_r = planstate->instrument->min_tuples;
+            max_r = planstate->instrument->max_tuples;
 
 			ExplainOpenWorker(n, es);
 
@@ -1653,12 +1661,12 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				ExplainIndentText(es);
 				if (es->timing)
 					appendStringInfo(es->str,
-									 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
-									 startup_ms, total_ms, rows, nloops);
+									 "actual time=%.3f..%.3f min_time=%.3f  max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f\n",
+                                     startup_ms, total_ms, min_t_ms, max_t_ms, min_r, rows, max_r, nloops);
 				else
 					appendStringInfo(es->str,
-									 "actual rows=%.0f loops=%.0f\n",
-									 rows, nloops);
+									 "actual min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f\n",
+                                     min_r, rows, max_r, nloops);
 			}
 			else
 			{
@@ -1668,8 +1676,14 @@ ExplainNode(PlanState *planstate, List *ancestors,
 										 startup_ms, 3, es);
 					ExplainPropertyFloat("Actual Total Time", "ms",
 										 total_ms, 3, es);
+                    ExplainPropertyFloat("Min Time", "ms",
+                                         min_t_ms, 3, es);
+                    ExplainPropertyFloat("Max Time", "ms",
+                                         max_t_ms, 3, es);
 				}
+                ExplainPropertyFloat("Min Rows", NULL, rows, 0, es);
 				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+                ExplainPropertyFloat("Max Rows", NULL, rows, 0, es);
 				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 			}
 
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index a46201c9284..30ccc876e7c 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -134,10 +134,10 @@ InstrEndLoop(Instrumentation *instr)
 
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
-	if (instr->nloops == 0)                     // this is first loop
+	if (instr->nloops == 0)                       /* this is first loop */
     {
-        instr->min_t = totaltime;               // init min_t
-        instr->min_tuples = instr->tuplecount;  // init min_tuples
+        instr->min_t = totaltime;                 /* init min_t */
+        instr->min_tuples = instr->tuplecount;    /* init min_tuples */
     }
 	if (instr->min_t > totaltime)
 	    instr->min_t = totaltime;
-- 
2.26.0


From ecbf04d519e17b8968103364e89169ab965b41d7 Mon Sep 17 00:00:00 2001
From: "e.sokolova" <e.sokolova@postgrespro.ru>
Date: Fri, 18 Sep 2020 13:35:19 +0300
Subject: Fix bugs. Tags: commitfest_hotfix.

---
 src/backend/commands/explain.c    | 142 ++++++++++++++++++++----------
 src/backend/executor/instrument.c |  16 ++--
 2 files changed, 105 insertions(+), 53 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index b60d17542d5..d700a4168c7 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1578,32 +1578,57 @@ ExplainNode(PlanState *planstate, List *ancestors,
 
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
-			if (es->timing)
-				appendStringInfo(es->str,
-								 " (actual time=%.3f..%.3f min_time=%.3f max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
-								 startup_ms, total_ms, min_t_ms, max_t_ms, min_r, rows, max_r, nloops);
-			else
-				appendStringInfo(es->str,
-								 " (actual min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
-                                 min_r, rows, max_r, nloops);
+            if (nodeTag(plan) == T_NestLoop) {
+                if (es->timing)
+                    appendStringInfo(es->str,
+                                     " (actual time=%.3f..%.3f min_time=%.3f max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
+                                     startup_ms, total_ms, min_t_ms, max_t_ms, min_r, rows, max_r, nloops);
+                else
+                    appendStringInfo(es->str,
+                                     " (actual min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
+                                     min_r, rows, max_r, nloops);
+            }
+            else
+            {
+                if (es->timing)
+                    appendStringInfo(es->str,
+                                     " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
+                                     startup_ms, total_ms, rows, nloops);
+                else
+                    appendStringInfo(es->str,
+                                     " (actual rows=%.0f loops=%.0f)",
+                                     rows, nloops);
+            }
 		}
 		else
 		{
-			if (es->timing)
-			{
-				ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
-									 3, es);
-				ExplainPropertyFloat("Actual Total Time", "s", total_ms,
-									 3, es);
-                ExplainPropertyFloat("Min Time", "s", min_t_ms,
-                                     3, es);
-                ExplainPropertyFloat("Max Time", "s", max_t_ms,
-                                     3, es);
-			}
-            ExplainPropertyFloat("Min Rows", NULL, rows, 0, es);
-			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-            ExplainPropertyFloat("Max Rows", NULL, rows, 0, es);
-			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+            if (nodeTag(plan) == T_NestLoop) {
+                if (es->timing) {
+                    ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
+                                         3, es);
+                    ExplainPropertyFloat("Actual Total Time", "s", total_ms,
+                                         3, es);
+                    ExplainPropertyFloat("Min Time", "s", min_t_ms,
+                                         3, es);
+                    ExplainPropertyFloat("Max Time", "s", max_t_ms,
+                                         3, es);
+                }
+                ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+                ExplainPropertyFloat("Min Rows", NULL, rows, 0, es);
+                ExplainPropertyFloat("Max Rows", NULL, rows, 0, es);
+                ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+            }
+            else
+            {
+                if (es->timing) {
+                    ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
+                                         3, es);
+                    ExplainPropertyFloat("Actual Total Time", "s", total_ms,
+                                         3, es);
+                }
+                ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+                ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+            }
 		}
 	}
 	else if (es->analyze)
@@ -1659,32 +1684,57 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (es->format == EXPLAIN_FORMAT_TEXT)
 			{
 				ExplainIndentText(es);
-				if (es->timing)
-					appendStringInfo(es->str,
-									 "actual time=%.3f..%.3f min_time=%.3f  max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f\n",
-                                     startup_ms, total_ms, min_t_ms, max_t_ms, min_r, rows, max_r, nloops);
-				else
-					appendStringInfo(es->str,
-									 "actual min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f\n",
-                                     min_r, rows, max_r, nloops);
+                if (nodeTag(plan) == T_NestLoop) {
+                    if (es->timing)
+                        appendStringInfo(es->str,
+                                         "actual time=%.3f..%.3f min_time=%.3f  max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f\n",
+                                         startup_ms, total_ms, min_t_ms, max_t_ms, min_r, rows, max_r, nloops);
+                    else
+                        appendStringInfo(es->str,
+                                         "actual min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f\n",
+                                         min_r, rows, max_r, nloops);
+                }
+                else
+                {
+                    if (es->timing)
+                        appendStringInfo(es->str,
+                                         "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
+                                         startup_ms, total_ms, rows, nloops);
+                    else
+                        appendStringInfo(es->str,
+                                         "actual rows=%.0f loops=%.0f\n",
+                                         rows, nloops);
+                }
 			}
 			else
 			{
-				if (es->timing)
-				{
-					ExplainPropertyFloat("Actual Startup Time", "ms",
-										 startup_ms, 3, es);
-					ExplainPropertyFloat("Actual Total Time", "ms",
-										 total_ms, 3, es);
-                    ExplainPropertyFloat("Min Time", "ms",
-                                         min_t_ms, 3, es);
-                    ExplainPropertyFloat("Max Time", "ms",
-                                         max_t_ms, 3, es);
-				}
-                ExplainPropertyFloat("Min Rows", NULL, rows, 0, es);
-				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-                ExplainPropertyFloat("Max Rows", NULL, rows, 0, es);
-				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+                if (nodeTag(plan) == T_NestLoop) {
+                    if (es->timing) {
+                        ExplainPropertyFloat("Actual Startup Time", "ms",
+                                             startup_ms, 3, es);
+                        ExplainPropertyFloat("Actual Total Time", "ms",
+                                             total_ms, 3, es);
+                        ExplainPropertyFloat("Min Time", "ms",
+                                             min_t_ms, 3, es);
+                        ExplainPropertyFloat("Max Time", "ms",
+                                             max_t_ms, 3, es);
+                    }
+                    ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+                    ExplainPropertyFloat("Min Rows", NULL, rows, 0, es);
+                    ExplainPropertyFloat("Max Rows", NULL, rows, 0, es);
+                    ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+                }
+                else
+                {
+                    if (es->timing) {
+                        ExplainPropertyFloat("Actual Startup Time", "ms",
+                                             startup_ms, 3, es);
+                        ExplainPropertyFloat("Actual Total Time", "ms",
+                                             total_ms, 3, es);
+                    }
+                    ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+                    ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+                }
 			}
 
 			ExplainCloseWorker(n, es);
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index 30ccc876e7c..79ea3cf94ba 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -134,15 +134,17 @@ InstrEndLoop(Instrumentation *instr)
 
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
-	if (instr->nloops == 0)                       /* this is first loop */
+	if (instr->nloops == 0)                                /* this is first loop */
     {
-        instr->min_t = totaltime;                 /* init min_t */
-        instr->min_tuples = instr->tuplecount;    /* init min_tuples */
+	    if (instr->min_t == 0)
+            instr->min_t = totaltime - instr->firsttuple;  /* init min_t */
+        if (instr->min_tuples == 0)
+            instr->min_tuples = instr->tuplecount;         /* init min_tuples */
     }
-	if (instr->min_t > totaltime)
-	    instr->min_t = totaltime;
-	if (instr->max_t < totaltime)
-        instr->max_t = totaltime;
+	if (instr->min_t > totaltime - instr->firsttuple)
+	    instr->min_t = totaltime - instr->firsttuple;
+	if (instr->max_t < totaltime - instr->firsttuple)
+        instr->max_t = totaltime - instr->firsttuple;
 	instr->ntuples += instr->tuplecount;
     if (instr->min_tuples > instr->tuplecount)
         instr->min_tuples = instr->tuplecount;
-- 
2.26.0


From 7566a98bbc33a24052e1334b0afe2cf341c0818f Mon Sep 17 00:00:00 2001
From: "e.sokolova" <e.sokolova@postgrespro.ru>
Date: Fri, 25 Sep 2020 20:09:22 +0300
Subject: Fix tests. Tags: commitfest_hotfix.

---
 src/backend/executor/instrument.c             | 31 +++++++++++------
 src/test/regress/expected/partition_prune.out | 34 +++++++++----------
 src/test/regress/expected/select_parallel.out | 12 +++----
 3 files changed, 44 insertions(+), 33 deletions(-)

diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index 79ea3cf94ba..42b2caade01 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -118,9 +118,8 @@ InstrStopNode(Instrumentation *instr, double nTuples)
 
 /* Finish a run cycle for a plan node */
 void
-InstrEndLoop(Instrumentation *instr)
-{
-	double		totaltime;
+InstrEndLoop(Instrumentation *instr) {
+	double totaltime;
 
 	/* Skip if nothing has happened, or already shut down */
 	if (!instr->running)
@@ -136,20 +135,32 @@ InstrEndLoop(Instrumentation *instr)
 	instr->total += totaltime;
 	if (instr->nloops == 0)                                /* this is first loop */
     {
-	    if (instr->min_t == 0)
+        if (instr->min_t == 0)
+        {
             instr->min_t = totaltime - instr->firsttuple;  /* init min_t */
+        }
         if (instr->min_tuples == 0)
+        {
             instr->min_tuples = instr->tuplecount;         /* init min_tuples */
-    }
+        }
+	}
 	if (instr->min_t > totaltime - instr->firsttuple)
+	{
 	    instr->min_t = totaltime - instr->firsttuple;
+	}
 	if (instr->max_t < totaltime - instr->firsttuple)
-        instr->max_t = totaltime - instr->firsttuple;
+	{
+		instr->max_t = totaltime - instr->firsttuple;
+	}
 	instr->ntuples += instr->tuplecount;
-    if (instr->min_tuples > instr->tuplecount)
-        instr->min_tuples = instr->tuplecount;
-    if (instr->max_tuples < instr->tuplecount)
-        instr->max_tuples = instr->tuplecount;
+	if (instr->min_tuples > instr->tuplecount)
+	{
+		instr->min_tuples = instr->tuplecount;
+	}
+	if (instr->max_tuples < instr->tuplecount)
+	{
+		instr->max_tuples = instr->tuplecount;
+	}
 	instr->nloops += 1;
 
 	/* Reset for next cycle (if any) */
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 50d2a7e4b97..db0b167ef4a 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -2065,7 +2065,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=2)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{0,0,1}'::integer[]))
                      ->  Append (actual rows=N loops=N)
@@ -2099,7 +2099,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=2)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{0,0,1}'::integer[]))
                      ->  Append (actual rows=N loops=N)
@@ -2132,7 +2132,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=2)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{1,0,3}'::integer[]))
                      ->  Append (actual rows=N loops=N)
@@ -2164,7 +2164,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=2)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{1,0,0}'::integer[]))
                            Rows Removed by Filter: N
@@ -2198,7 +2198,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=2)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{1,0,0}'::integer[]))
                            Rows Removed by Filter: N
@@ -2441,7 +2441,7 @@ update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;
    Update on ab_a1_b1 ab_a1_1
    Update on ab_a1_b2 ab_a1_2
    Update on ab_a1_b3 ab_a1_3
-   ->  Nested Loop (actual rows=0 loops=1)
+   ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=1)
          ->  Append (actual rows=1 loops=1)
                ->  Bitmap Heap Scan on ab_a1_b1 ab_1 (actual rows=0 loops=1)
                      Recheck Cond: (a = 1)
@@ -2461,7 +2461,7 @@ update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;
                      Recheck Cond: (a = 1)
                      ->  Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0 loops=1)
                            Index Cond: (a = 1)
-   ->  Nested Loop (actual rows=1 loops=1)
+   ->  Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
          ->  Append (actual rows=1 loops=1)
                ->  Bitmap Heap Scan on ab_a1_b1 ab_1 (actual rows=0 loops=1)
                      Recheck Cond: (a = 1)
@@ -2482,7 +2482,7 @@ update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;
                      Heap Blocks: exact=1
                      ->  Bitmap Index Scan on ab_a1_b2_a_idx (actual rows=1 loops=1)
                            Index Cond: (a = 1)
-   ->  Nested Loop (actual rows=0 loops=1)
+   ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=1)
          ->  Append (actual rows=1 loops=1)
                ->  Bitmap Heap Scan on ab_a1_b1 ab_1 (actual rows=0 loops=1)
                      Recheck Cond: (a = 1)
@@ -2523,7 +2523,7 @@ update ab_a1 set b = 3 from ab_a2 where ab_a2.b = (select 1);
    Update on ab_a1_b3 ab_a1_3
    InitPlan 1 (returns $0)
      ->  Result (actual rows=1 loops=1)
-   ->  Nested Loop (actual rows=1 loops=1)
+   ->  Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
          ->  Seq Scan on ab_a1_b1 ab_a1_1 (actual rows=1 loops=1)
          ->  Materialize (actual rows=1 loops=1)
                ->  Append (actual rows=1 loops=1)
@@ -2533,7 +2533,7 @@ update ab_a1 set b = 3 from ab_a2 where ab_a2.b = (select 1);
                            Filter: (b = $0)
                      ->  Seq Scan on ab_a2_b3 ab_a2_3 (never executed)
                            Filter: (b = $0)
-   ->  Nested Loop (actual rows=1 loops=1)
+   ->  Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
          ->  Seq Scan on ab_a1_b2 ab_a1_2 (actual rows=1 loops=1)
          ->  Materialize (actual rows=1 loops=1)
                ->  Append (actual rows=1 loops=1)
@@ -2543,7 +2543,7 @@ update ab_a1 set b = 3 from ab_a2 where ab_a2.b = (select 1);
                            Filter: (b = $0)
                      ->  Seq Scan on ab_a2_b3 ab_a2_3 (never executed)
                            Filter: (b = $0)
-   ->  Nested Loop (actual rows=1 loops=1)
+   ->  Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
          ->  Seq Scan on ab_a1_b3 ab_a1_3 (actual rows=1 loops=1)
          ->  Materialize (actual rows=1 loops=1)
                ->  Append (actual rows=1 loops=1)
@@ -2589,7 +2589,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 join tprt on tbl1.col1 > tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=6 loops=1)
+ Nested Loop (actual min_rows=6 rows=6 max_rows=6 loops=1)
    ->  Seq Scan on tbl1 (actual rows=2 loops=1)
    ->  Append (actual rows=3 loops=2)
          ->  Index Scan using tprt1_idx on tprt_1 (actual rows=2 loops=2)
@@ -2610,7 +2610,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 join tprt on tbl1.col1 = tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=2 loops=1)
+ Nested Loop (actual min_rows=2 rows=2 max_rows=2 loops=1)
    ->  Seq Scan on tbl1 (actual rows=2 loops=1)
    ->  Append (actual rows=1 loops=2)
          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
@@ -2655,7 +2655,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=23 loops=1)
+ Nested Loop (actual min_rows=23 rows=23 max_rows=23 loops=1)
    ->  Seq Scan on tbl1 (actual rows=5 loops=1)
    ->  Append (actual rows=5 loops=5)
          ->  Index Scan using tprt1_idx on tprt_1 (actual rows=2 loops=5)
@@ -2676,7 +2676,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=3 loops=1)
+ Nested Loop (actual min_rows=3 rows=3 max_rows=3 loops=1)
    ->  Seq Scan on tbl1 (actual rows=5 loops=1)
    ->  Append (actual rows=1 loops=5)
          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
@@ -2740,7 +2740,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 join tprt on tbl1.col1 < tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=1 loops=1)
+ Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
    ->  Seq Scan on tbl1 (actual rows=1 loops=1)
    ->  Append (actual rows=1 loops=1)
          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
@@ -2772,7 +2772,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 join tprt on tbl1.col1 = tprt.col1;
                             QUERY PLAN                             
 -------------------------------------------------------------------
- Nested Loop (actual rows=0 loops=1)
+ Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=1)
    ->  Seq Scan on tbl1 (actual rows=1 loops=1)
    ->  Append (actual rows=0 loops=1)
          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index 96dfb7c8dd6..65559b2261e 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -547,10 +547,10 @@ alter table tenk2 set (parallel_workers = 0);
 explain (analyze, timing off, summary off, costs off)
    select count(*) from tenk1, tenk2 where tenk1.hundred > 1
         and tenk2.thousand=0;
-                                QUERY PLAN                                
---------------------------------------------------------------------------
+                                 QUERY PLAN                                  
+-----------------------------------------------------------------------------
  Aggregate (actual rows=1 loops=1)
-   ->  Nested Loop (actual rows=98000 loops=1)
+   ->  Nested Loop (actual min_rows=98000 rows=98000 max_rows=98000 loops=1)
          ->  Seq Scan on tenk2 (actual rows=10 loops=1)
                Filter: (thousand = 0)
                Rows Removed by Filter: 9990
@@ -581,9 +581,9 @@ begin
 end;
 $$;
 select * from explain_parallel_sort_stats();
-                       explain_parallel_sort_stats                        
---------------------------------------------------------------------------
- Nested Loop Left Join (actual rows=30000 loops=1)
+                           explain_parallel_sort_stats                           
+---------------------------------------------------------------------------------
+ Nested Loop Left Join (actual min_rows=30000 rows=30000 max_rows=30000 loops=1)
    ->  Values Scan on "*VALUES*" (actual rows=3 loops=1)
    ->  Gather Merge (actual rows=10000 loops=3)
          Workers Planned: 4
-- 
2.26.0

example_v0.sqltext/plain; name=example_v0.sqlDownload
#2Pavel Stehule
pavel.stehule@gmail.com
In reply to: Noname (#1)
Re: [PATCH] Add extra statistics to explain for Nested Loop

pá 16. 10. 2020 v 9:43 odesílatel <e.sokolova@postgrespro.ru> napsal:

Hi, hackers.
For some distributions of data in tables, different loops in nested loop
joins can take different time and process different amounts of entries.
It makes average statistics returned by explain analyze not very useful
for DBA.
To fix it, here is the patch that add printing of min and max statistics
for time and rows across all loops in Nested Loop to EXPLAIN ANALYSE.
Please don't hesitate to share any thoughts on this topic!

+1

This is great feature - sometimes it can be pretty messy current limited
format

Pavel

--

Show quoted text

Ekaterina Sokolova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

#3Julien Rouhaud
rjuju123@gmail.com
In reply to: Pavel Stehule (#2)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Le ven. 16 oct. 2020 à 16:12, Pavel Stehule <pavel.stehule@gmail.com> a
écrit :

pá 16. 10. 2020 v 9:43 odesílatel <e.sokolova@postgrespro.ru> napsal:

Hi, hackers.
For some distributions of data in tables, different loops in nested loop
joins can take different time and process different amounts of entries.
It makes average statistics returned by explain analyze not very useful
for DBA.
To fix it, here is the patch that add printing of min and max statistics
for time and rows across all loops in Nested Loop to EXPLAIN ANALYSE.
Please don't hesitate to share any thoughts on this topic!

+1

This is great feature - sometimes it can be pretty messy current limited
format

+1, this can be very handy!

Show quoted text
#4Anastasia Lubennikova
a.lubennikova@postgrespro.ru
In reply to: Julien Rouhaud (#3)
Re: [PATCH] Add extra statistics to explain for Nested Loop

On 16.10.2020 12:07, Julien Rouhaud wrote:

Le ven. 16 oct. 2020 à 16:12, Pavel Stehule <pavel.stehule@gmail.com
<mailto:pavel.stehule@gmail.com>> a écrit :

pá 16. 10. 2020 v 9:43 odesílatel <e.sokolova@postgrespro.ru
<mailto:e.sokolova@postgrespro.ru>> napsal:

Hi, hackers.
For some distributions of data in tables, different loops in
nested loop
joins can take different time and process different amounts of
entries.
It makes average statistics returned by explain analyze not
very useful
for DBA.
To fix it, here is the patch that add printing of min and max
statistics
for time and rows across all loops in Nested Loop to EXPLAIN
ANALYSE.
Please don't hesitate to share any thoughts on this topic!

+1

This is great feature - sometimes it can be pretty messy current
limited format

+1, this can be very handy!

Cool.
I have added your patch to the commitfest, so it won't get lost.
https://commitfest.postgresql.org/30/2765/

I will review the code next week.  Unfortunately, I cannot give any
feedback about usability of this feature.

User visible change is:

-               ->  Nested Loop (actual rows=N loops=N)
+              ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=2)

Pavel, Julien, could you please say if it looks good?

--
Anastasia Lubennikova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

#5Pavel Stehule
pavel.stehule@gmail.com
In reply to: Anastasia Lubennikova (#4)
Re: [PATCH] Add extra statistics to explain for Nested Loop

so 17. 10. 2020 v 0:11 odesílatel Anastasia Lubennikova <
a.lubennikova@postgrespro.ru> napsal:

On 16.10.2020 12:07, Julien Rouhaud wrote:

Le ven. 16 oct. 2020 à 16:12, Pavel Stehule <pavel.stehule@gmail.com> a
écrit :

pá 16. 10. 2020 v 9:43 odesílatel <e.sokolova@postgrespro.ru> napsal:

Hi, hackers.
For some distributions of data in tables, different loops in nested loop
joins can take different time and process different amounts of entries.
It makes average statistics returned by explain analyze not very useful
for DBA.
To fix it, here is the patch that add printing of min and max statistics
for time and rows across all loops in Nested Loop to EXPLAIN ANALYSE.
Please don't hesitate to share any thoughts on this topic!

+1

This is great feature - sometimes it can be pretty messy current limited
format

+1, this can be very handy!

Cool.

I have added your patch to the commitfest, so it won't get lost.
https://commitfest.postgresql.org/30/2765/

I will review the code next week. Unfortunately, I cannot give any
feedback about usability of this feature.

User visible change is:

-               ->  Nested Loop (actual rows=N loops=N)
+              ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0
loops=2)

This interface is ok - there is not too much space for creativity. I can
imagine displaying variance or average - but I am afraid about very bad
performance impacts.

Regards

Pavel

Show quoted text

Pavel, Julien, could you please say if it looks good?

--
Anastasia Lubennikova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

#6Julien Rouhaud
rjuju123@gmail.com
In reply to: Pavel Stehule (#5)
Re: [PATCH] Add extra statistics to explain for Nested Loop

On Sat, Oct 17, 2020 at 12:15 PM Pavel Stehule <pavel.stehule@gmail.com> wrote:

so 17. 10. 2020 v 0:11 odesílatel Anastasia Lubennikova <a.lubennikova@postgrespro.ru> napsal:

On 16.10.2020 12:07, Julien Rouhaud wrote:

Le ven. 16 oct. 2020 à 16:12, Pavel Stehule <pavel.stehule@gmail.com> a écrit :

pá 16. 10. 2020 v 9:43 odesílatel <e.sokolova@postgrespro.ru> napsal:

Hi, hackers.
For some distributions of data in tables, different loops in nested loop
joins can take different time and process different amounts of entries.
It makes average statistics returned by explain analyze not very useful
for DBA.
To fix it, here is the patch that add printing of min and max statistics
for time and rows across all loops in Nested Loop to EXPLAIN ANALYSE.
Please don't hesitate to share any thoughts on this topic!

+1

This is great feature - sometimes it can be pretty messy current limited format

+1, this can be very handy!

Cool.
I have added your patch to the commitfest, so it won't get lost.

Thanks! I'll also try to review it next week.

https://commitfest.postgresql.org/30/2765/

I will review the code next week. Unfortunately, I cannot give any feedback about usability of this feature.

User visible change is:

-               ->  Nested Loop (actual rows=N loops=N)
+              ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=2)

This interface is ok - there is not too much space for creativity.

Yes I also think it's ok. We should also consider usability for tools
like explain.depesz.com, I don't know if the current output is best.
I'm adding Depesz and Pierre which are both working on this kind of
tool for additional input.

I can imagine displaying variance or average - but I am afraid about very bad performance impacts.

The original counter (rows here) is already an average right?
Variance could be nice too. Instrumentation will already spam
gettimeofday() calls for nested loops, I don't think that computing
variance would add that much overhead?

#7Pavel Stehule
pavel.stehule@gmail.com
In reply to: Julien Rouhaud (#6)
Re: [PATCH] Add extra statistics to explain for Nested Loop

so 17. 10. 2020 v 6:26 odesílatel Julien Rouhaud <rjuju123@gmail.com>
napsal:

On Sat, Oct 17, 2020 at 12:15 PM Pavel Stehule <pavel.stehule@gmail.com>
wrote:

so 17. 10. 2020 v 0:11 odesílatel Anastasia Lubennikova <

a.lubennikova@postgrespro.ru> napsal:

On 16.10.2020 12:07, Julien Rouhaud wrote:

Le ven. 16 oct. 2020 à 16:12, Pavel Stehule <pavel.stehule@gmail.com>

a écrit :

pá 16. 10. 2020 v 9:43 odesílatel <e.sokolova@postgrespro.ru> napsal:

Hi, hackers.
For some distributions of data in tables, different loops in nested

loop

joins can take different time and process different amounts of

entries.

It makes average statistics returned by explain analyze not very

useful

for DBA.
To fix it, here is the patch that add printing of min and max

statistics

for time and rows across all loops in Nested Loop to EXPLAIN ANALYSE.
Please don't hesitate to share any thoughts on this topic!

+1

This is great feature - sometimes it can be pretty messy current

limited format

+1, this can be very handy!

Cool.
I have added your patch to the commitfest, so it won't get lost.

Thanks! I'll also try to review it next week.

https://commitfest.postgresql.org/30/2765/

I will review the code next week. Unfortunately, I cannot give any

feedback about usability of this feature.

User visible change is:

-               ->  Nested Loop (actual rows=N loops=N)
+              ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0

loops=2)

This interface is ok - there is not too much space for creativity.

Yes I also think it's ok. We should also consider usability for tools
like explain.depesz.com, I don't know if the current output is best.
I'm adding Depesz and Pierre which are both working on this kind of
tool for additional input.

I can imagine displaying variance or average - but I am afraid about

very bad performance impacts.

The original counter (rows here) is already an average right?
Variance could be nice too. Instrumentation will already spam
gettimeofday() calls for nested loops, I don't think that computing
variance would add that much overhead?

There is not any problem to write benchmark for worst case and test it

In reply to: Julien Rouhaud (#6)
Re: [PATCH] Add extra statistics to explain for Nested Loop

On Sat, Oct 17, 2020 at 12:26:08PM +0800, Julien Rouhaud wrote:

-               ->  Nested Loop (actual rows=N loops=N)
+              ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=2)

This interface is ok - there is not too much space for creativity.

Yes I also think it's ok. We should also consider usability for tools
like explain.depesz.com, I don't know if the current output is best.
I'm adding Depesz and Pierre which are both working on this kind of
tool for additional input.

Thanks for heads up. This definitely will need some changes on my side,
but should be easy to handle.

Best regards,

depesz

#9David G. Johnston
david.g.johnston@gmail.com
In reply to: Anastasia Lubennikova (#4)
Re: [PATCH] Add extra statistics to explain for Nested Loop

On Fri, Oct 16, 2020 at 3:11 PM Anastasia Lubennikova <
a.lubennikova@postgrespro.ru> wrote:

User visible change is:

-               ->  Nested Loop (actual rows=N loops=N)
+              ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0
loops=2)

I'd be inclined to append both new rows to the end.

(actual rows=N loops=N min_rows=N max_rows=N)

rows * loops is still an important calculation.

Why not just add total_rows while we are at it - last in the listing?

(actual rows=N loops=N min_rows=N max_rows=N total_rows=N)

David J.

#10Julien Rouhaud
rjuju123@gmail.com
In reply to: Anastasia Lubennikova (#4)
Re: [PATCH] Add extra statistics to explain for Nested Loop

On Sat, Oct 17, 2020 at 6:11 AM Anastasia Lubennikova
<a.lubennikova@postgrespro.ru> wrote:

On 16.10.2020 12:07, Julien Rouhaud wrote:

Le ven. 16 oct. 2020 à 16:12, Pavel Stehule <pavel.stehule@gmail.com> a écrit :

pá 16. 10. 2020 v 9:43 odesílatel <e.sokolova@postgrespro.ru> napsal:

Hi, hackers.
For some distributions of data in tables, different loops in nested loop
joins can take different time and process different amounts of entries.
It makes average statistics returned by explain analyze not very useful
for DBA.
To fix it, here is the patch that add printing of min and max statistics
for time and rows across all loops in Nested Loop to EXPLAIN ANALYSE.
Please don't hesitate to share any thoughts on this topic!

+1

This is great feature - sometimes it can be pretty messy current limited format

+1, this can be very handy!

Cool.
I have added your patch to the commitfest, so it won't get lost.
https://commitfest.postgresql.org/30/2765/

I will review the code next week. Unfortunately, I cannot give any feedback about usability of this feature.

User visible change is:

-               ->  Nested Loop (actual rows=N loops=N)
+              ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=2)

Thanks for working on this feature! Here are some comments on the patch.

First, cosmetic issues. There are a lot of whitespace issues, the new
code is space indented while it should be tab indented. Also there
are 3 patches included with some fixups, could you instead push a
single patch?

It also misses some modification in the regression tests. For instance:

diff --git a/src/test/regress/expected/partition_prune.out
b/src/test/regress/expected/partition_prune.out
index 50d2a7e4b9..db0b167ef4 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -2065,7 +2065,7 @@ select explain_parallel_append('select avg(ab.a)
from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=2)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)

You should update the explain_parallel_append() plpgsql function
created in that test file to make sure that both "rows" and the two
new counters are changed to "N". There might be other similar changes
needed.

Now, for the changes themselves. For the min/max time, you're
aggregating "totaltime - instr->firsttuple". Why removing the startup
time from the loop execution time? I think this should be kept.
Also, in explain.c you display the counters in the "Nested loop" node,
but this should be done in the inner plan node instead, as this is the
one being looped on. So the condition should probably be "nloops > 1"
rather than testing if it's a NestLoop.

I'm switching the patch to WoA.

#11Pierre Giraud
pierre.giraud@dalibo.com
In reply to: Julien Rouhaud (#6)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Le 17/10/2020 à 06:26, Julien Rouhaud a écrit :

On Sat, Oct 17, 2020 at 12:15 PM Pavel Stehule <pavel.stehule@gmail.com> wrote:

so 17. 10. 2020 v 0:11 odesílatel Anastasia Lubennikova <a.lubennikova@postgrespro.ru> napsal:

On 16.10.2020 12:07, Julien Rouhaud wrote:

Le ven. 16 oct. 2020 à 16:12, Pavel Stehule <pavel.stehule@gmail.com> a écrit :

pá 16. 10. 2020 v 9:43 odesílatel <e.sokolova@postgrespro.ru> napsal:

Hi, hackers.
For some distributions of data in tables, different loops in nested loop
joins can take different time and process different amounts of entries.
It makes average statistics returned by explain analyze not very useful
for DBA.
To fix it, here is the patch that add printing of min and max statistics
for time and rows across all loops in Nested Loop to EXPLAIN ANALYSE.
Please don't hesitate to share any thoughts on this topic!

+1

This is great feature - sometimes it can be pretty messy current limited format

+1, this can be very handy!

Cool.
I have added your patch to the commitfest, so it won't get lost.

Thanks! I'll also try to review it next week.

https://commitfest.postgresql.org/30/2765/

I will review the code next week. Unfortunately, I cannot give any feedback about usability of this feature.

User visible change is:

-               ->  Nested Loop (actual rows=N loops=N)
+              ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=2)

This interface is ok - there is not too much space for creativity.

Yes I also think it's ok. We should also consider usability for tools
like explain.depesz.com, I don't know if the current output is best.
I'm adding Depesz and Pierre which are both working on this kind of
tool for additional input.

Same for me and PEV2. It should be fairly easy to parse this new format.

I can imagine displaying variance or average - but I am afraid about very bad performance impacts.

The original counter (rows here) is already an average right?
Variance could be nice too. Instrumentation will already spam
gettimeofday() calls for nested loops, I don't think that computing
variance would add that much overhead?

Thus, it's an average value. And to be mentioned: a rounded one! Which I
found a bit tricky to figure out.

#12Andres Freund
andres@anarazel.de
In reply to: Noname (#1)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Hi,

On 2020-10-16 10:42:43 +0300, e.sokolova@postgrespro.ru wrote:

For some distributions of data in tables, different loops in nested loop
joins can take different time and process different amounts of entries. It
makes average statistics returned by explain analyze not very useful for
DBA.
To fix it, here is the patch that add printing of min and max statistics for
time and rows across all loops in Nested Loop to EXPLAIN ANALYSE.
Please don't hesitate to share any thoughts on this topic!

Interesting idea!

I'm a bit worried that further increasing the size of struct
Instrumentation will increase the overhead of EXPLAIN ANALYZE further -
in some workloads we spend a fair bit of time in code handling that. It
would be good to try to find a few bad cases, and see what the overhead is.

Unfortunately your patch is pretty hard to look at - you seem to have
included your incremental hacking efforts?

From 7871ac1afe7837a6dc0676a6c9819cc68a5c0f07 Mon Sep 17 00:00:00 2001
From: "e.sokolova" <e.sokolova@postgrespro.ru>
Date: Fri, 4 Sep 2020 18:00:47 +0300
Subject: Add min and max statistics without case of
parallel workers. Tags: commitfest_hotfix.

From ebdfe117e4074d268e3e7c480b98d375d1d6f62b Mon Sep 17 00:00:00 2001
From: "e.sokolova" <e.sokolova@postgrespro.ru>
Date: Fri, 11 Sep 2020 23:04:34 +0300
Subject: Add case of parallel workers. Tags:
commitfest_hotfix.

From ecbf04d519e17b8968103364e89169ab965b41d7 Mon Sep 17 00:00:00 2001
From: "e.sokolova" <e.sokolova@postgrespro.ru>
Date: Fri, 18 Sep 2020 13:35:19 +0300
Subject: Fix bugs. Tags: commitfest_hotfix.

From 7566a98bbc33a24052e1334b0afe2cf341c0818f Mon Sep 17 00:00:00 2001
From: "e.sokolova" <e.sokolova@postgrespro.ru>
Date: Fri, 25 Sep 2020 20:09:22 +0300
Subject: Fix tests. Tags: commitfest_hotfix.

Greetings,

Andres Freund

#13Noname
e.sokolova@postgrespro.ru
In reply to: Julien Rouhaud (#10)
1 attachment(s)
Re: [PATCH] Add extra statistics to explain for Nested Loop

<rjuju123@gmail.com> wrote:

You should update the explain_parallel_append() plpgsql function
created in that test file to make sure that both "rows" and the two
new counters are changed to "N". There might be other similar changes
needed.

Thank you for watching this issue. I made the necessary changes in tests
following your advice.

Now, for the changes themselves. For the min/max time, you're
aggregating "totaltime - instr->firsttuple". Why removing the startup
time from the loop execution time? I think this should be kept.

I think it's right remark. I fixed it.

Also, in explain.c you display the counters in the "Nested loop" node,
but this should be done in the inner plan node instead, as this is the
one being looped on. So the condition should probably be "nloops > 1"
rather than testing if it's a NestLoop.

Condition "nloops > 1" is not the same as checking if it's NestLoop.
This condition will lead to printing extra statistics for nodes with
different types of join, not only Nested Loop Join. If this statistic is
useful for other plan nodes, it makes sense to change the condition.

<andres@anarazel.de> wrote:

I'm a bit worried that further increasing the size of struct
Instrumentation will increase the overhead of EXPLAIN ANALYZE further -
in some workloads we spend a fair bit of time in code handling that. It
would be good to try to find a few bad cases, and see what the overhead
is.

Thank you for this comment, I will try to figure it out. Do you have
some examples of large overhead dependent on this struct? I think I need
some sample to know which way to think.

Thank you all for the feedback. I hope the new version of my patch will
be more correct and useful.
Please don't hesitate to share any thoughts on this topic!
--
Ekaterina Sokolova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Attachments:

extra_statistics_v1.patchtext/x-diff; name=extra_statistics_v1.patchDownload
From: "Ekaterina Sokolova" <e.sokolova@postgrespro.ru>
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 41317f18374..2132d82fe79 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1568,29 +1568,64 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
 		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
 		double		rows = planstate->instrument->ntuples / nloops;
+		double		min_r = planstate->instrument->min_tuples;
+		double		max_r = planstate->instrument->max_tuples;
+		double		min_t_ms = 1000.0 * planstate->instrument->min_t;
+		double		max_t_ms = 1000.0 * planstate->instrument->max_t;
 
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
-			if (es->timing)
-				appendStringInfo(es->str,
-								 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
-								 startup_ms, total_ms, rows, nloops);
+			if (nodeTag(plan) == T_NestLoop) {
+				if (es->timing)
+					appendStringInfo(es->str,
+									 " (actual time=%.3f..%.3f min_time=%.3f max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
+									 startup_ms, total_ms, min_t_ms, max_t_ms, min_r, rows, max_r, nloops);
+				else
+					appendStringInfo(es->str,
+									 " (actual min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
+									 min_r, rows, max_r, nloops);
+			}
 			else
-				appendStringInfo(es->str,
-								 " (actual rows=%.0f loops=%.0f)",
-								 rows, nloops);
+			{
+				if (es->timing)
+					appendStringInfo(es->str,
+									 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
+									 startup_ms, total_ms, rows, nloops);
+				else
+					appendStringInfo(es->str,
+									 " (actual rows=%.0f loops=%.0f)",
+									 rows, nloops);
+			}
 		}
 		else
 		{
-			if (es->timing)
+			if (nodeTag(plan) == T_NestLoop) {
+				if (es->timing) {
+					ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
+										 3, es);
+					ExplainPropertyFloat("Actual Total Time", "s", total_ms,
+										 3, es);
+					ExplainPropertyFloat("Min Time", "s", min_t_ms,
+										 3, es);
+					ExplainPropertyFloat("Max Time", "s", max_t_ms,
+										 3, es);
+				}
+				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Min Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Max Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+			}
+			else
 			{
-				ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
-									 3, es);
-				ExplainPropertyFloat("Actual Total Time", "s", total_ms,
-									 3, es);
+				if (es->timing) {
+					ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
+										 3, es);
+					ExplainPropertyFloat("Actual Total Time", "s", total_ms,
+										 3, es);
+				}
+				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 			}
-			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 		}
 	}
 	else if (es->analyze)
@@ -1599,6 +1634,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			appendStringInfoString(es->str, " (never executed)");
 		else
 		{
+			/* without min and max values because actual result is 0 */
 			if (es->timing)
 			{
 				ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
@@ -1624,39 +1660,78 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			double		nloops = instrument->nloops;
 			double		startup_ms;
 			double		total_ms;
+			double		min_t_ms;
+			double		max_t_ms;
 			double		rows;
+			double		min_r;
+			double		max_r;
 
 			if (nloops <= 0)
 				continue;
 			startup_ms = 1000.0 * instrument->startup / nloops;
 			total_ms = 1000.0 * instrument->total / nloops;
+			min_t_ms = 1000.0 * planstate->instrument->min_t;
+			max_t_ms = 1000.0 * planstate->instrument->max_t;
 			rows = instrument->ntuples / nloops;
+			min_r = planstate->instrument->min_tuples;
+			max_r = planstate->instrument->max_tuples;
 
 			ExplainOpenWorker(n, es);
 
 			if (es->format == EXPLAIN_FORMAT_TEXT)
 			{
 				ExplainIndentText(es);
-				if (es->timing)
-					appendStringInfo(es->str,
-									 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
-									 startup_ms, total_ms, rows, nloops);
+				if (nodeTag(plan) == T_NestLoop) {
+					if (es->timing)
+						appendStringInfo(es->str,
+										 "actual time=%.3f..%.3f min_time=%.3f  max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f\n",
+										 startup_ms, total_ms, min_t_ms, max_t_ms, min_r, rows, max_r, nloops);
+					else
+						appendStringInfo(es->str,
+										 "actual min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f\n",
+										 min_r, rows, max_r, nloops);
+				}
 				else
-					appendStringInfo(es->str,
-									 "actual rows=%.0f loops=%.0f\n",
-									 rows, nloops);
+				{
+					if (es->timing)
+						appendStringInfo(es->str,
+										 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
+										 startup_ms, total_ms, rows, nloops);
+					else
+						appendStringInfo(es->str,
+										 "actual rows=%.0f loops=%.0f\n",
+										 rows, nloops);
+				}
 			}
 			else
 			{
-				if (es->timing)
+				if (nodeTag(plan) == T_NestLoop) {
+					if (es->timing) {
+						ExplainPropertyFloat("Actual Startup Time", "ms",
+											 startup_ms, 3, es);
+						ExplainPropertyFloat("Actual Total Time", "ms",
+											 total_ms, 3, es);
+						ExplainPropertyFloat("Min Time", "ms",
+											 min_t_ms, 3, es);
+						ExplainPropertyFloat("Max Time", "ms",
+											 max_t_ms, 3, es);
+					}
+					ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Min Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Max Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+				}
+				else
 				{
-					ExplainPropertyFloat("Actual Startup Time", "ms",
-										 startup_ms, 3, es);
-					ExplainPropertyFloat("Actual Total Time", "ms",
-										 total_ms, 3, es);
+					if (es->timing) {
+						ExplainPropertyFloat("Actual Startup Time", "ms",
+											 startup_ms, 3, es);
+						ExplainPropertyFloat("Actual Total Time", "ms",
+											 total_ms, 3, es);
+					}
+					ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 				}
-				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 			}
 
 			ExplainCloseWorker(n, es);
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index fbedb5aaf60..effe8516dba 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -118,9 +118,8 @@ InstrStopNode(Instrumentation *instr, double nTuples)
 
 /* Finish a run cycle for a plan node */
 void
-InstrEndLoop(Instrumentation *instr)
-{
-	double		totaltime;
+InstrEndLoop(Instrumentation *instr) {
+	double totaltime;
 
 	/* Skip if nothing has happened, or already shut down */
 	if (!instr->running)
@@ -134,7 +133,34 @@ InstrEndLoop(Instrumentation *instr)
 
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
+	if (instr->nloops == 0)					/* this is first loop */
+	{
+		if (instr->min_t == 0)
+		{
+			instr->min_t = totaltime;		/* init min_t */
+		}
+		if (instr->min_tuples == 0)
+		{
+			instr->min_tuples = instr->tuplecount;	/* init min_tuples */
+		}
+	}
+	if (instr->min_t > totaltime)
+	{
+		instr->min_t = totaltime;
+	}
+	if (instr->max_t < totaltime)
+	{
+		instr->max_t = totaltime;
+	}
 	instr->ntuples += instr->tuplecount;
+	if (instr->min_tuples > instr->tuplecount)
+	{
+		instr->min_tuples = instr->tuplecount;
+	}
+	if (instr->max_tuples < instr->tuplecount)
+	{
+		instr->max_tuples = instr->tuplecount;
+	}
 	instr->nloops += 1;
 
 	/* Reset for next cycle (if any) */
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index 9dc3ecb07d7..996433e9bb1 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -66,7 +66,11 @@ typedef struct Instrumentation
 	/* Accumulated statistics across all completed cycles: */
 	double		startup;		/* total startup time (in seconds) */
 	double		total;			/* total time (in seconds) */
+	double		min_t;			/* time of fastest loop (in seconds) */
+	double		max_t;			/* time of slowest loop (in seconds) */
 	double		ntuples;		/* total tuples produced */
+	double		min_tuples;		/* min counter of produced tuples for all loops */
+	double		max_tuples;		/* max counter of produced tuples for all loops */
 	double		ntuples2;		/* secondary node-specific tuple counter */
 	double		nloops;			/* # of run cycles for this node */
 	double		nfiltered1;		/* # of tuples removed by scanqual or joinqual */
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 50d2a7e4b97..ba864a57677 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -1929,6 +1929,8 @@ begin
     loop
         ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
         ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
+        ln := regexp_replace(ln, 'actual min_rows=\d+ rows=\d+ max_rows=\d+ loops=\d+',
+				 'actual min_rows=N rows=N max_rows=N loops=N');
         ln := regexp_replace(ln, 'Rows Removed by Filter: \d+', 'Rows Removed by Filter: N');
         return next ln;
     end loop;
@@ -2065,7 +2067,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=N rows=N max_rows=N loops=N)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{0,0,1}'::integer[]))
                      ->  Append (actual rows=N loops=N)
@@ -2099,7 +2101,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=N rows=N max_rows=N loops=N)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{0,0,1}'::integer[]))
                      ->  Append (actual rows=N loops=N)
@@ -2132,7 +2134,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=N rows=N max_rows=N loops=N)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{1,0,3}'::integer[]))
                      ->  Append (actual rows=N loops=N)
@@ -2164,7 +2166,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=N rows=N max_rows=N loops=N)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{1,0,0}'::integer[]))
                            Rows Removed by Filter: N
@@ -2198,7 +2200,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=N rows=N max_rows=N loops=N)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{1,0,0}'::integer[]))
                            Rows Removed by Filter: N
@@ -2441,7 +2443,7 @@ update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;
    Update on ab_a1_b1 ab_a1_1
    Update on ab_a1_b2 ab_a1_2
    Update on ab_a1_b3 ab_a1_3
-   ->  Nested Loop (actual rows=0 loops=1)
+   ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=1)
          ->  Append (actual rows=1 loops=1)
                ->  Bitmap Heap Scan on ab_a1_b1 ab_1 (actual rows=0 loops=1)
                      Recheck Cond: (a = 1)
@@ -2461,7 +2463,7 @@ update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;
                      Recheck Cond: (a = 1)
                      ->  Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0 loops=1)
                            Index Cond: (a = 1)
-   ->  Nested Loop (actual rows=1 loops=1)
+   ->  Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
          ->  Append (actual rows=1 loops=1)
                ->  Bitmap Heap Scan on ab_a1_b1 ab_1 (actual rows=0 loops=1)
                      Recheck Cond: (a = 1)
@@ -2482,7 +2484,7 @@ update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;
                      Heap Blocks: exact=1
                      ->  Bitmap Index Scan on ab_a1_b2_a_idx (actual rows=1 loops=1)
                            Index Cond: (a = 1)
-   ->  Nested Loop (actual rows=0 loops=1)
+   ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=1)
          ->  Append (actual rows=1 loops=1)
                ->  Bitmap Heap Scan on ab_a1_b1 ab_1 (actual rows=0 loops=1)
                      Recheck Cond: (a = 1)
@@ -2523,7 +2525,7 @@ update ab_a1 set b = 3 from ab_a2 where ab_a2.b = (select 1);
    Update on ab_a1_b3 ab_a1_3
    InitPlan 1 (returns $0)
      ->  Result (actual rows=1 loops=1)
-   ->  Nested Loop (actual rows=1 loops=1)
+   ->  Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
          ->  Seq Scan on ab_a1_b1 ab_a1_1 (actual rows=1 loops=1)
          ->  Materialize (actual rows=1 loops=1)
                ->  Append (actual rows=1 loops=1)
@@ -2533,7 +2535,7 @@ update ab_a1 set b = 3 from ab_a2 where ab_a2.b = (select 1);
                            Filter: (b = $0)
                      ->  Seq Scan on ab_a2_b3 ab_a2_3 (never executed)
                            Filter: (b = $0)
-   ->  Nested Loop (actual rows=1 loops=1)
+   ->  Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
          ->  Seq Scan on ab_a1_b2 ab_a1_2 (actual rows=1 loops=1)
          ->  Materialize (actual rows=1 loops=1)
                ->  Append (actual rows=1 loops=1)
@@ -2543,7 +2545,7 @@ update ab_a1 set b = 3 from ab_a2 where ab_a2.b = (select 1);
                            Filter: (b = $0)
                      ->  Seq Scan on ab_a2_b3 ab_a2_3 (never executed)
                            Filter: (b = $0)
-   ->  Nested Loop (actual rows=1 loops=1)
+   ->  Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
          ->  Seq Scan on ab_a1_b3 ab_a1_3 (actual rows=1 loops=1)
          ->  Materialize (actual rows=1 loops=1)
                ->  Append (actual rows=1 loops=1)
@@ -2589,7 +2591,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 join tprt on tbl1.col1 > tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=6 loops=1)
+ Nested Loop (actual min_rows=6 rows=6 max_rows=6 loops=1)
    ->  Seq Scan on tbl1 (actual rows=2 loops=1)
    ->  Append (actual rows=3 loops=2)
          ->  Index Scan using tprt1_idx on tprt_1 (actual rows=2 loops=2)
@@ -2610,7 +2612,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 join tprt on tbl1.col1 = tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=2 loops=1)
+ Nested Loop (actual min_rows=2 rows=2 max_rows=2 loops=1)
    ->  Seq Scan on tbl1 (actual rows=2 loops=1)
    ->  Append (actual rows=1 loops=2)
          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
@@ -2655,7 +2657,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=23 loops=1)
+ Nested Loop (actual min_rows=23 rows=23 max_rows=23 loops=1)
    ->  Seq Scan on tbl1 (actual rows=5 loops=1)
    ->  Append (actual rows=5 loops=5)
          ->  Index Scan using tprt1_idx on tprt_1 (actual rows=2 loops=5)
@@ -2676,7 +2678,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=3 loops=1)
+ Nested Loop (actual min_rows=3 rows=3 max_rows=3 loops=1)
    ->  Seq Scan on tbl1 (actual rows=5 loops=1)
    ->  Append (actual rows=1 loops=5)
          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
@@ -2740,7 +2742,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 join tprt on tbl1.col1 < tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=1 loops=1)
+ Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
    ->  Seq Scan on tbl1 (actual rows=1 loops=1)
    ->  Append (actual rows=1 loops=1)
          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
@@ -2772,7 +2774,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 join tprt on tbl1.col1 = tprt.col1;
                             QUERY PLAN                             
 -------------------------------------------------------------------
- Nested Loop (actual rows=0 loops=1)
+ Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=1)
    ->  Seq Scan on tbl1 (actual rows=1 loops=1)
    ->  Append (actual rows=0 loops=1)
          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index 9b0c418db71..e8ccd903b41 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -547,10 +547,10 @@ alter table tenk2 set (parallel_workers = 0);
 explain (analyze, timing off, summary off, costs off)
    select count(*) from tenk1, tenk2 where tenk1.hundred > 1
         and tenk2.thousand=0;
-                                QUERY PLAN                                
---------------------------------------------------------------------------
+                                 QUERY PLAN                                  
+-----------------------------------------------------------------------------
  Aggregate (actual rows=1 loops=1)
-   ->  Nested Loop (actual rows=98000 loops=1)
+   ->  Nested Loop (actual min_rows=98000 rows=98000 max_rows=98000 loops=1)
          ->  Seq Scan on tenk2 (actual rows=10 loops=1)
                Filter: (thousand = 0)
                Rows Removed by Filter: 9990
@@ -581,9 +581,9 @@ begin
 end;
 $$;
 select * from explain_parallel_sort_stats();
-                       explain_parallel_sort_stats                        
---------------------------------------------------------------------------
- Nested Loop Left Join (actual rows=30000 loops=1)
+                           explain_parallel_sort_stats                           
+---------------------------------------------------------------------------------
+ Nested Loop Left Join (actual min_rows=30000 rows=30000 max_rows=30000 loops=1)
    ->  Values Scan on "*VALUES*" (actual rows=3 loops=1)
    ->  Gather Merge (actual rows=10000 loops=3)
          Workers Planned: 4
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index 1e904a8c5b7..20688561614 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -452,6 +452,8 @@ begin
     loop
         ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
         ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
+        ln := regexp_replace(ln, 'actual min_rows=\d+ rows=\d+ max_rows=\d+ loops=\d+',
+				 'actual min_rows=N rows=N max_rows=N loops=N');
         ln := regexp_replace(ln, 'Rows Removed by Filter: \d+', 'Rows Removed by Filter: N');
         return next ln;
     end loop;
#14Tomas Vondra
tomas.vondra@2ndquadrant.com
In reply to: Noname (#13)
2 attachment(s)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Hello Ekaterina,

seems like an interesting and useful improvement. I did a quick review
of the patch - attached is a 0002 patch with a couple minor changes (the
0001 is just your v1 patch, to keep cfbot happy).

1) There's a couple changes to follow project code style (e.g. brackets
after "if" on a separate line, no brackets around single-line blocks,
etc.). I've reverted some unnecessary whitespace changes. Minor stuff.

2) I don't think InstrEndLoop needs to check if min_t == 0 before
initializing it in the first loop. It certainly has to be 0, no? Same
for min_tuples. I also suggest comment explaining that we don't have to
initialize the max values.

3) In ExplainNode, in the part processing per-worker stats, I think some
of the fields are incorrectly referencing planstate->instrument instead
of using the 'instrument' variable from WorkerInstrumentation.

In general, I agree with Andres this might add overhead to explain
analyze, although I doubt it's going to be measurable. But maybe try
doing some measurements for common and worst-cases.

I wonder if we should have another option EXPLAIN option enabling this.
I.e. by default we'd not collect/print this, and users would have to
pass some option to EXPLAIN. Or maybe we could tie this to VERBOSE?

Also, why print this only for nested loop, and not for all nodes with
(nloops > 1)? I see there was some discussion why checking nodeTag is
necessary to identify NL, but that's not my point.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Attachments:

0001-extra_statistics_v1.patchtext/plain; charset=us-asciiDownload
From 903c203e7e5f298f927ade97ca03a0e129c31e75 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@2ndquadrant.com>
Date: Sat, 31 Oct 2020 01:47:46 +0100
Subject: [PATCH 1/2] extra_statistics_v1

---
 src/backend/commands/explain.c                | 131 ++++++++++++++----
 src/backend/executor/instrument.c             |  32 ++++-
 src/include/executor/instrument.h             |   4 +
 src/test/regress/expected/partition_prune.out |  36 ++---
 src/test/regress/expected/select_parallel.out |  12 +-
 src/test/regress/sql/partition_prune.sql      |   2 +
 6 files changed, 163 insertions(+), 54 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 43f9b01e83..72dae57ee2 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1568,29 +1568,64 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
 		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
 		double		rows = planstate->instrument->ntuples / nloops;
+		double		min_r = planstate->instrument->min_tuples;
+		double		max_r = planstate->instrument->max_tuples;
+		double		min_t_ms = 1000.0 * planstate->instrument->min_t;
+		double		max_t_ms = 1000.0 * planstate->instrument->max_t;
 
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
-			if (es->timing)
-				appendStringInfo(es->str,
-								 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
-								 startup_ms, total_ms, rows, nloops);
+			if (nodeTag(plan) == T_NestLoop) {
+				if (es->timing)
+					appendStringInfo(es->str,
+									 " (actual time=%.3f..%.3f min_time=%.3f max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
+									 startup_ms, total_ms, min_t_ms, max_t_ms, min_r, rows, max_r, nloops);
+				else
+					appendStringInfo(es->str,
+									 " (actual min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
+									 min_r, rows, max_r, nloops);
+			}
 			else
-				appendStringInfo(es->str,
-								 " (actual rows=%.0f loops=%.0f)",
-								 rows, nloops);
+			{
+				if (es->timing)
+					appendStringInfo(es->str,
+									 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
+									 startup_ms, total_ms, rows, nloops);
+				else
+					appendStringInfo(es->str,
+									 " (actual rows=%.0f loops=%.0f)",
+									 rows, nloops);
+			}
 		}
 		else
 		{
-			if (es->timing)
+			if (nodeTag(plan) == T_NestLoop) {
+				if (es->timing) {
+					ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
+										 3, es);
+					ExplainPropertyFloat("Actual Total Time", "s", total_ms,
+										 3, es);
+					ExplainPropertyFloat("Min Time", "s", min_t_ms,
+										 3, es);
+					ExplainPropertyFloat("Max Time", "s", max_t_ms,
+										 3, es);
+				}
+				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Min Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Max Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+			}
+			else
 			{
-				ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
-									 3, es);
-				ExplainPropertyFloat("Actual Total Time", "s", total_ms,
-									 3, es);
+				if (es->timing) {
+					ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
+										 3, es);
+					ExplainPropertyFloat("Actual Total Time", "s", total_ms,
+										 3, es);
+				}
+				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 			}
-			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 		}
 	}
 	else if (es->analyze)
@@ -1599,6 +1634,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			appendStringInfoString(es->str, " (never executed)");
 		else
 		{
+			/* without min and max values because actual result is 0 */
 			if (es->timing)
 			{
 				ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
@@ -1624,39 +1660,78 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			double		nloops = instrument->nloops;
 			double		startup_ms;
 			double		total_ms;
+			double		min_t_ms;
+			double		max_t_ms;
 			double		rows;
+			double		min_r;
+			double		max_r;
 
 			if (nloops <= 0)
 				continue;
 			startup_ms = 1000.0 * instrument->startup / nloops;
 			total_ms = 1000.0 * instrument->total / nloops;
+			min_t_ms = 1000.0 * planstate->instrument->min_t;
+			max_t_ms = 1000.0 * planstate->instrument->max_t;
 			rows = instrument->ntuples / nloops;
+			min_r = planstate->instrument->min_tuples;
+			max_r = planstate->instrument->max_tuples;
 
 			ExplainOpenWorker(n, es);
 
 			if (es->format == EXPLAIN_FORMAT_TEXT)
 			{
 				ExplainIndentText(es);
-				if (es->timing)
-					appendStringInfo(es->str,
-									 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
-									 startup_ms, total_ms, rows, nloops);
+				if (nodeTag(plan) == T_NestLoop) {
+					if (es->timing)
+						appendStringInfo(es->str,
+										 "actual time=%.3f..%.3f min_time=%.3f  max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f\n",
+										 startup_ms, total_ms, min_t_ms, max_t_ms, min_r, rows, max_r, nloops);
+					else
+						appendStringInfo(es->str,
+										 "actual min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f\n",
+										 min_r, rows, max_r, nloops);
+				}
 				else
-					appendStringInfo(es->str,
-									 "actual rows=%.0f loops=%.0f\n",
-									 rows, nloops);
+				{
+					if (es->timing)
+						appendStringInfo(es->str,
+										 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
+										 startup_ms, total_ms, rows, nloops);
+					else
+						appendStringInfo(es->str,
+										 "actual rows=%.0f loops=%.0f\n",
+										 rows, nloops);
+				}
 			}
 			else
 			{
-				if (es->timing)
+				if (nodeTag(plan) == T_NestLoop) {
+					if (es->timing) {
+						ExplainPropertyFloat("Actual Startup Time", "ms",
+											 startup_ms, 3, es);
+						ExplainPropertyFloat("Actual Total Time", "ms",
+											 total_ms, 3, es);
+						ExplainPropertyFloat("Min Time", "ms",
+											 min_t_ms, 3, es);
+						ExplainPropertyFloat("Max Time", "ms",
+											 max_t_ms, 3, es);
+					}
+					ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Min Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Max Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+				}
+				else
 				{
-					ExplainPropertyFloat("Actual Startup Time", "ms",
-										 startup_ms, 3, es);
-					ExplainPropertyFloat("Actual Total Time", "ms",
-										 total_ms, 3, es);
+					if (es->timing) {
+						ExplainPropertyFloat("Actual Startup Time", "ms",
+											 startup_ms, 3, es);
+						ExplainPropertyFloat("Actual Total Time", "ms",
+											 total_ms, 3, es);
+					}
+					ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 				}
-				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 			}
 
 			ExplainCloseWorker(n, es);
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index fbedb5aaf6..b0748721c3 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -118,9 +118,8 @@ InstrStopNode(Instrumentation *instr, double nTuples)
 
 /* Finish a run cycle for a plan node */
 void
-InstrEndLoop(Instrumentation *instr)
-{
-	double		totaltime;
+InstrEndLoop(Instrumentation *instr) {
+	double totaltime;
 
 	/* Skip if nothing has happened, or already shut down */
 	if (!instr->running)
@@ -134,7 +133,34 @@ InstrEndLoop(Instrumentation *instr)
 
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
+	if (instr->nloops == 0)					/* this is first loop */
+	{
+		if (instr->min_t == 0)
+		{
+			instr->min_t = totaltime;		/* init min_t */
+		}
+		if (instr->min_tuples == 0)
+		{
+			instr->min_tuples = instr->tuplecount;	/* init min_tuples */
+		}
+	}
+	if (instr->min_t > totaltime)
+	{
+		instr->min_t = totaltime;
+	}
+	if (instr->max_t < totaltime)
+	{
+		instr->max_t = totaltime;
+	}
 	instr->ntuples += instr->tuplecount;
+	if (instr->min_tuples > instr->tuplecount)
+	{
+		instr->min_tuples = instr->tuplecount;
+	}
+	if (instr->max_tuples < instr->tuplecount)
+	{
+		instr->max_tuples = instr->tuplecount;
+	}
 	instr->nloops += 1;
 
 	/* Reset for next cycle (if any) */
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index 9dc3ecb07d..dbfac349cc 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -66,7 +66,11 @@ typedef struct Instrumentation
 	/* Accumulated statistics across all completed cycles: */
 	double		startup;		/* total startup time (in seconds) */
 	double		total;			/* total time (in seconds) */
+	double		min_t;			/* time of fastest loop (in seconds) */
+	double		max_t;			/* time of slowest loop (in seconds) */
 	double		ntuples;		/* total tuples produced */
+	double		min_tuples;		/* min counter of produced tuples for all loops */
+	double		max_tuples;		/* max counter of produced tuples for all loops */
 	double		ntuples2;		/* secondary node-specific tuple counter */
 	double		nloops;			/* # of run cycles for this node */
 	double		nfiltered1;		/* # of tuples removed by scanqual or joinqual */
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 50d2a7e4b9..ba864a5767 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -1929,6 +1929,8 @@ begin
     loop
         ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
         ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
+        ln := regexp_replace(ln, 'actual min_rows=\d+ rows=\d+ max_rows=\d+ loops=\d+',
+				 'actual min_rows=N rows=N max_rows=N loops=N');
         ln := regexp_replace(ln, 'Rows Removed by Filter: \d+', 'Rows Removed by Filter: N');
         return next ln;
     end loop;
@@ -2065,7 +2067,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=N rows=N max_rows=N loops=N)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{0,0,1}'::integer[]))
                      ->  Append (actual rows=N loops=N)
@@ -2099,7 +2101,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=N rows=N max_rows=N loops=N)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{0,0,1}'::integer[]))
                      ->  Append (actual rows=N loops=N)
@@ -2132,7 +2134,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=N rows=N max_rows=N loops=N)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{1,0,3}'::integer[]))
                      ->  Append (actual rows=N loops=N)
@@ -2164,7 +2166,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=N rows=N max_rows=N loops=N)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{1,0,0}'::integer[]))
                            Rows Removed by Filter: N
@@ -2198,7 +2200,7 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
          Workers Planned: 1
          Workers Launched: N
          ->  Partial Aggregate (actual rows=N loops=N)
-               ->  Nested Loop (actual rows=N loops=N)
+               ->  Nested Loop (actual min_rows=N rows=N max_rows=N loops=N)
                      ->  Parallel Seq Scan on lprt_a a (actual rows=N loops=N)
                            Filter: (a = ANY ('{1,0,0}'::integer[]))
                            Rows Removed by Filter: N
@@ -2441,7 +2443,7 @@ update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;
    Update on ab_a1_b1 ab_a1_1
    Update on ab_a1_b2 ab_a1_2
    Update on ab_a1_b3 ab_a1_3
-   ->  Nested Loop (actual rows=0 loops=1)
+   ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=1)
          ->  Append (actual rows=1 loops=1)
                ->  Bitmap Heap Scan on ab_a1_b1 ab_1 (actual rows=0 loops=1)
                      Recheck Cond: (a = 1)
@@ -2461,7 +2463,7 @@ update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;
                      Recheck Cond: (a = 1)
                      ->  Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0 loops=1)
                            Index Cond: (a = 1)
-   ->  Nested Loop (actual rows=1 loops=1)
+   ->  Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
          ->  Append (actual rows=1 loops=1)
                ->  Bitmap Heap Scan on ab_a1_b1 ab_1 (actual rows=0 loops=1)
                      Recheck Cond: (a = 1)
@@ -2482,7 +2484,7 @@ update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;
                      Heap Blocks: exact=1
                      ->  Bitmap Index Scan on ab_a1_b2_a_idx (actual rows=1 loops=1)
                            Index Cond: (a = 1)
-   ->  Nested Loop (actual rows=0 loops=1)
+   ->  Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=1)
          ->  Append (actual rows=1 loops=1)
                ->  Bitmap Heap Scan on ab_a1_b1 ab_1 (actual rows=0 loops=1)
                      Recheck Cond: (a = 1)
@@ -2523,7 +2525,7 @@ update ab_a1 set b = 3 from ab_a2 where ab_a2.b = (select 1);
    Update on ab_a1_b3 ab_a1_3
    InitPlan 1 (returns $0)
      ->  Result (actual rows=1 loops=1)
-   ->  Nested Loop (actual rows=1 loops=1)
+   ->  Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
          ->  Seq Scan on ab_a1_b1 ab_a1_1 (actual rows=1 loops=1)
          ->  Materialize (actual rows=1 loops=1)
                ->  Append (actual rows=1 loops=1)
@@ -2533,7 +2535,7 @@ update ab_a1 set b = 3 from ab_a2 where ab_a2.b = (select 1);
                            Filter: (b = $0)
                      ->  Seq Scan on ab_a2_b3 ab_a2_3 (never executed)
                            Filter: (b = $0)
-   ->  Nested Loop (actual rows=1 loops=1)
+   ->  Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
          ->  Seq Scan on ab_a1_b2 ab_a1_2 (actual rows=1 loops=1)
          ->  Materialize (actual rows=1 loops=1)
                ->  Append (actual rows=1 loops=1)
@@ -2543,7 +2545,7 @@ update ab_a1 set b = 3 from ab_a2 where ab_a2.b = (select 1);
                            Filter: (b = $0)
                      ->  Seq Scan on ab_a2_b3 ab_a2_3 (never executed)
                            Filter: (b = $0)
-   ->  Nested Loop (actual rows=1 loops=1)
+   ->  Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
          ->  Seq Scan on ab_a1_b3 ab_a1_3 (actual rows=1 loops=1)
          ->  Materialize (actual rows=1 loops=1)
                ->  Append (actual rows=1 loops=1)
@@ -2589,7 +2591,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 join tprt on tbl1.col1 > tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=6 loops=1)
+ Nested Loop (actual min_rows=6 rows=6 max_rows=6 loops=1)
    ->  Seq Scan on tbl1 (actual rows=2 loops=1)
    ->  Append (actual rows=3 loops=2)
          ->  Index Scan using tprt1_idx on tprt_1 (actual rows=2 loops=2)
@@ -2610,7 +2612,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 join tprt on tbl1.col1 = tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=2 loops=1)
+ Nested Loop (actual min_rows=2 rows=2 max_rows=2 loops=1)
    ->  Seq Scan on tbl1 (actual rows=2 loops=1)
    ->  Append (actual rows=1 loops=2)
          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
@@ -2655,7 +2657,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=23 loops=1)
+ Nested Loop (actual min_rows=23 rows=23 max_rows=23 loops=1)
    ->  Seq Scan on tbl1 (actual rows=5 loops=1)
    ->  Append (actual rows=5 loops=5)
          ->  Index Scan using tprt1_idx on tprt_1 (actual rows=2 loops=5)
@@ -2676,7 +2678,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=3 loops=1)
+ Nested Loop (actual min_rows=3 rows=3 max_rows=3 loops=1)
    ->  Seq Scan on tbl1 (actual rows=5 loops=1)
    ->  Append (actual rows=1 loops=5)
          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
@@ -2740,7 +2742,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 join tprt on tbl1.col1 < tprt.col1;
                                 QUERY PLAN                                
 --------------------------------------------------------------------------
- Nested Loop (actual rows=1 loops=1)
+ Nested Loop (actual min_rows=1 rows=1 max_rows=1 loops=1)
    ->  Seq Scan on tbl1 (actual rows=1 loops=1)
    ->  Append (actual rows=1 loops=1)
          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
@@ -2772,7 +2774,7 @@ explain (analyze, costs off, summary off, timing off)
 select * from tbl1 join tprt on tbl1.col1 = tprt.col1;
                             QUERY PLAN                             
 -------------------------------------------------------------------
- Nested Loop (actual rows=0 loops=1)
+ Nested Loop (actual min_rows=0 rows=0 max_rows=0 loops=1)
    ->  Seq Scan on tbl1 (actual rows=1 loops=1)
    ->  Append (actual rows=0 loops=1)
          ->  Index Scan using tprt1_idx on tprt_1 (never executed)
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index 9b0c418db7..e8ccd903b4 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -547,10 +547,10 @@ alter table tenk2 set (parallel_workers = 0);
 explain (analyze, timing off, summary off, costs off)
    select count(*) from tenk1, tenk2 where tenk1.hundred > 1
         and tenk2.thousand=0;
-                                QUERY PLAN                                
---------------------------------------------------------------------------
+                                 QUERY PLAN                                  
+-----------------------------------------------------------------------------
  Aggregate (actual rows=1 loops=1)
-   ->  Nested Loop (actual rows=98000 loops=1)
+   ->  Nested Loop (actual min_rows=98000 rows=98000 max_rows=98000 loops=1)
          ->  Seq Scan on tenk2 (actual rows=10 loops=1)
                Filter: (thousand = 0)
                Rows Removed by Filter: 9990
@@ -581,9 +581,9 @@ begin
 end;
 $$;
 select * from explain_parallel_sort_stats();
-                       explain_parallel_sort_stats                        
---------------------------------------------------------------------------
- Nested Loop Left Join (actual rows=30000 loops=1)
+                           explain_parallel_sort_stats                           
+---------------------------------------------------------------------------------
+ Nested Loop Left Join (actual min_rows=30000 rows=30000 max_rows=30000 loops=1)
    ->  Values Scan on "*VALUES*" (actual rows=3 loops=1)
    ->  Gather Merge (actual rows=10000 loops=3)
          Workers Planned: 4
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index 1e904a8c5b..2068856161 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -452,6 +452,8 @@ begin
     loop
         ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
         ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
+        ln := regexp_replace(ln, 'actual min_rows=\d+ rows=\d+ max_rows=\d+ loops=\d+',
+				 'actual min_rows=N rows=N max_rows=N loops=N');
         ln := regexp_replace(ln, 'Rows Removed by Filter: \d+', 'Rows Removed by Filter: N');
         return next ln;
     end loop;
-- 
2.26.2

0002-minor-tweaks.patchtext/plain; charset=us-asciiDownload
From 28f3bdd55cd8784d0af4d53a4e88845782e2eaa0 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@2ndquadrant.com>
Date: Sat, 31 Oct 2020 02:07:20 +0100
Subject: [PATCH 2/2] minor tweaks

---
 src/backend/commands/explain.c    | 32 +++++++++++++++----------
 src/backend/executor/instrument.c | 39 ++++++++++++++++---------------
 2 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 72dae57ee2..fa8deac55a 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1575,7 +1575,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
-			if (nodeTag(plan) == T_NestLoop) {
+			if (nodeTag(plan) == T_NestLoop)
+			{
 				if (es->timing)
 					appendStringInfo(es->str,
 									 " (actual time=%.3f..%.3f min_time=%.3f max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
@@ -1599,8 +1600,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		}
 		else
 		{
-			if (nodeTag(plan) == T_NestLoop) {
-				if (es->timing) {
+			if (nodeTag(plan) == T_NestLoop)
+			{
+				if (es->timing)
+				{
 					ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
 										 3, es);
 					ExplainPropertyFloat("Actual Total Time", "s", total_ms,
@@ -1617,7 +1620,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			}
 			else
 			{
-				if (es->timing) {
+				if (es->timing)
+				{
 					ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
 										 3, es);
 					ExplainPropertyFloat("Actual Total Time", "s", total_ms,
@@ -1670,18 +1674,19 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				continue;
 			startup_ms = 1000.0 * instrument->startup / nloops;
 			total_ms = 1000.0 * instrument->total / nloops;
-			min_t_ms = 1000.0 * planstate->instrument->min_t;
-			max_t_ms = 1000.0 * planstate->instrument->max_t;
+			min_t_ms = 1000.0 * instrument->min_t;
+			max_t_ms = 1000.0 * instrument->max_t;
 			rows = instrument->ntuples / nloops;
-			min_r = planstate->instrument->min_tuples;
-			max_r = planstate->instrument->max_tuples;
+			min_r = instrument->min_tuples;
+			max_r = instrument->max_tuples;
 
 			ExplainOpenWorker(n, es);
 
 			if (es->format == EXPLAIN_FORMAT_TEXT)
 			{
 				ExplainIndentText(es);
-				if (nodeTag(plan) == T_NestLoop) {
+				if (nodeTag(plan) == T_NestLoop)
+				{
 					if (es->timing)
 						appendStringInfo(es->str,
 										 "actual time=%.3f..%.3f min_time=%.3f  max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f\n",
@@ -1705,8 +1710,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			}
 			else
 			{
-				if (nodeTag(plan) == T_NestLoop) {
-					if (es->timing) {
+				if (nodeTag(plan) == T_NestLoop)
+				{
+					if (es->timing)
+					{
 						ExplainPropertyFloat("Actual Startup Time", "ms",
 											 startup_ms, 3, es);
 						ExplainPropertyFloat("Actual Total Time", "ms",
@@ -1723,7 +1730,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				}
 				else
 				{
-					if (es->timing) {
+					if (es->timing)
+					{
 						ExplainPropertyFloat("Actual Startup Time", "ms",
 											 startup_ms, 3, es);
 						ExplainPropertyFloat("Actual Total Time", "ms",
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index b0748721c3..0198bb3a5a 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -118,8 +118,9 @@ InstrStopNode(Instrumentation *instr, double nTuples)
 
 /* Finish a run cycle for a plan node */
 void
-InstrEndLoop(Instrumentation *instr) {
-	double totaltime;
+InstrEndLoop(Instrumentation *instr)
+{
+	double		totaltime;
 
 	/* Skip if nothing has happened, or already shut down */
 	if (!instr->running)
@@ -133,34 +134,34 @@ InstrEndLoop(Instrumentation *instr) {
 
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
-	if (instr->nloops == 0)					/* this is first loop */
+
+	/*
+	 * this is first loop
+	 *
+	 * We only initialize the min values. We don't need to bother with
+	 * the max, because those are 0 and the non-zero values will get
+	 * updated a couple lines later.
+	 */
+	if (instr->nloops == 0)
 	{
-		if (instr->min_t == 0)
-		{
-			instr->min_t = totaltime;		/* init min_t */
-		}
-		if (instr->min_tuples == 0)
-		{
-			instr->min_tuples = instr->tuplecount;	/* init min_tuples */
-		}
+		instr->min_t = totaltime;
+		instr->min_tuples = instr->tuplecount;
 	}
+
 	if (instr->min_t > totaltime)
-	{
 		instr->min_t = totaltime;
-	}
+
 	if (instr->max_t < totaltime)
-	{
 		instr->max_t = totaltime;
-	}
+
 	instr->ntuples += instr->tuplecount;
+
 	if (instr->min_tuples > instr->tuplecount)
-	{
 		instr->min_tuples = instr->tuplecount;
-	}
+
 	if (instr->max_tuples < instr->tuplecount)
-	{
 		instr->max_tuples = instr->tuplecount;
-	}
+
 	instr->nloops += 1;
 
 	/* Reset for next cycle (if any) */
-- 
2.26.2

#15Georgios Kokolatos
gkokolatos@protonmail.com
In reply to: Tomas Vondra (#14)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Hi,

I noticed that this patch fails on the cfbot.
For this, I changed the status to: 'Waiting on Author'.

Cheers,
//Georgios

The new status of this patch is: Waiting on Author

#16Noname
e.sokolova@postgrespro.ru
In reply to: Tomas Vondra (#14)
2 attachment(s)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Tomas Vondra писал 2020-10-31 04:20:

seems like an interesting and useful improvement. I did a quick review
of the patch - attached is a 0002 patch with a couple minor changes
(the
0001 is just your v1 patch, to keep cfbot happy).

Thank you for your review and changes!

3) In ExplainNode, in the part processing per-worker stats, I think
some
of the fields are incorrectly referencing planstate->instrument instead
of using the 'instrument' variable from WorkerInstrumentation.

It's correct behavior because of struct WorkerInstrumentation contains
struct Instrumentation instrument. But planstate->instrument is struct
Instrumentation too.

I wonder if we should have another option EXPLAIN option enabling this.
I.e. by default we'd not collect/print this, and users would have to
pass some option to EXPLAIN. Or maybe we could tie this to VERBOSE?

It's good idea. Now additional statistics are only printed when we set
the VERBOSE.

New version of this patch prints extra statistics for all cases of
multiple loops, not only for Nested Loop. Also I fixed the example by
adding VERBOSE.

Please don't hesitate to share any thoughts on this topic!
--
Ekaterina Sokolova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Attachments:

extra_statistics_v2.patchtext/x-diff; name=extra_statistics_v2.patchDownload
From 148b0792b8eed1527ee3266e7e627b3c75effb9e Mon Sep 17 00:00:00 2001
From: Ekaterina Sokolova <e.sokolova@postgrespro.ru>
Date: Thu, 12 Nov 2020 22:06:41 +0300
Subject: [PATCH] extra_statistics_v2

---
 src/backend/commands/explain.c                | 141 ++++++++++++++----
 src/backend/executor/instrument.c             |  27 ++++
 src/include/executor/instrument.h             |   6 +
 src/test/regress/expected/explain.out         |   8 +
 src/test/regress/expected/partition_prune.out |   2 +
 src/test/regress/expected/timetz.out          |   8 +-
 src/test/regress/sql/partition_prune.sql      |   2 +
 7 files changed, 161 insertions(+), 33 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 41317f18374..b525b12b1d6 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1568,29 +1568,68 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
 		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
 		double		rows = planstate->instrument->ntuples / nloops;
+		double		min_r = planstate->instrument->min_tuples;
+		double		max_r = planstate->instrument->max_tuples;
+		double		min_t_ms = 1000.0 * planstate->instrument->min_t;
+		double		max_t_ms = 1000.0 * planstate->instrument->max_t;
 
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
-			if (es->timing)
-				appendStringInfo(es->str,
-								 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
-								 startup_ms, total_ms, rows, nloops);
+			if (nloops > 1 && es->verbose)
+			{
+				if (es->timing)
+					appendStringInfo(es->str,
+									 " (actual time=%.3f..%.3f min_time=%.3f max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
+									 startup_ms, total_ms, min_t_ms, max_t_ms, min_r, rows, max_r, nloops);
+				else
+					appendStringInfo(es->str,
+									 " (actual min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
+									 min_r, rows, max_r, nloops);
+			}
 			else
-				appendStringInfo(es->str,
-								 " (actual rows=%.0f loops=%.0f)",
-								 rows, nloops);
+			{
+				if (es->timing)
+					appendStringInfo(es->str,
+									 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
+									 startup_ms, total_ms, rows, nloops);
+				else
+					appendStringInfo(es->str,
+									 " (actual rows=%.0f loops=%.0f)",
+									 rows, nloops);
+			}
 		}
 		else
 		{
-			if (es->timing)
+			if (nloops > 1 && es->verbose)
+			{
+				if (es->timing)
+				{
+					ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
+										 3, es);
+					ExplainPropertyFloat("Actual Total Time", "s", total_ms,
+										 3, es);
+					ExplainPropertyFloat("Min Time", "s", min_t_ms,
+										 3, es);
+					ExplainPropertyFloat("Max Time", "s", max_t_ms,
+										 3, es);
+				}
+				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Min Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Max Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+			}
+			else
 			{
-				ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
-									 3, es);
-				ExplainPropertyFloat("Actual Total Time", "s", total_ms,
-									 3, es);
+				if (es->timing)
+				{
+					ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
+										 3, es);
+					ExplainPropertyFloat("Actual Total Time", "s", total_ms,
+										 3, es);
+				}
+				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 			}
-			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 		}
 	}
 	else if (es->analyze)
@@ -1599,6 +1638,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			appendStringInfoString(es->str, " (never executed)");
 		else
 		{
+			/* without min and max values because actual result is 0 */
 			if (es->timing)
 			{
 				ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
@@ -1624,39 +1664,82 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			double		nloops = instrument->nloops;
 			double		startup_ms;
 			double		total_ms;
+			double		min_t_ms;
+			double		max_t_ms;
 			double		rows;
+			double		min_r;
+			double		max_r;
 
 			if (nloops <= 0)
 				continue;
 			startup_ms = 1000.0 * instrument->startup / nloops;
 			total_ms = 1000.0 * instrument->total / nloops;
+			min_t_ms = 1000.0 * instrument->min_t;
+			max_t_ms = 1000.0 * instrument->max_t;
 			rows = instrument->ntuples / nloops;
+			min_r = instrument->min_tuples;
+			max_r = instrument->max_tuples;
 
 			ExplainOpenWorker(n, es);
 
 			if (es->format == EXPLAIN_FORMAT_TEXT)
 			{
 				ExplainIndentText(es);
-				if (es->timing)
-					appendStringInfo(es->str,
-									 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
-									 startup_ms, total_ms, rows, nloops);
+				if (nloops > 1)
+				{
+					if (es->timing)
+						appendStringInfo(es->str,
+										 "actual time=%.3f..%.3f min_time=%.3f  max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f\n",
+										 startup_ms, total_ms, min_t_ms, max_t_ms, min_r, rows, max_r, nloops);
+					else
+						appendStringInfo(es->str,
+										 "actual min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f\n",
+										 min_r, rows, max_r, nloops);
+				}
 				else
-					appendStringInfo(es->str,
-									 "actual rows=%.0f loops=%.0f\n",
-									 rows, nloops);
+				{
+					if (es->timing)
+						appendStringInfo(es->str,
+										 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
+										 startup_ms, total_ms, rows, nloops);
+					else
+						appendStringInfo(es->str,
+										 "actual rows=%.0f loops=%.0f\n",
+										 rows, nloops);
+				}
 			}
 			else
 			{
-				if (es->timing)
+				if (nloops > 1)
 				{
-					ExplainPropertyFloat("Actual Startup Time", "ms",
-										 startup_ms, 3, es);
-					ExplainPropertyFloat("Actual Total Time", "ms",
-										 total_ms, 3, es);
+					if (es->timing)
+					{
+						ExplainPropertyFloat("Actual Startup Time", "ms",
+											 startup_ms, 3, es);
+						ExplainPropertyFloat("Actual Total Time", "ms",
+											 total_ms, 3, es);
+						ExplainPropertyFloat("Min Time", "ms",
+											 min_t_ms, 3, es);
+						ExplainPropertyFloat("Max Time", "ms",
+											 max_t_ms, 3, es);
+					}
+					ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Min Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Max Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+				}
+				else
+				{
+					if (es->timing)
+					{
+						ExplainPropertyFloat("Actual Startup Time", "ms",
+											 startup_ms, 3, es);
+						ExplainPropertyFloat("Actual Total Time", "ms",
+											 total_ms, 3, es);
+					}
+					ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 				}
-				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 			}
 
 			ExplainCloseWorker(n, es);
@@ -3094,7 +3177,7 @@ show_hashagg_info(AggState *aggstate, ExplainState *es)
 			if (aggstate->hash_batches_used > 1)
 			{
 				appendStringInfo(es->str, "  Disk Usage: " UINT64_FORMAT "kB",
-					aggstate->hash_disk_used);
+								 aggstate->hash_disk_used);
 			}
 		}
 
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index fbedb5aaf60..db1be682e1c 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -134,7 +134,34 @@ InstrEndLoop(Instrumentation *instr)
 
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
+
+	/*
+	 * this is first loop
+	 *
+	 * We only initialize the min values. We don't need to bother with the
+	 * max, because those are 0 and the non-zero values will get updated a
+	 * couple lines later.
+	 */
+	if (instr->nloops == 0)
+	{
+		instr->min_t = totaltime;
+		instr->min_tuples = instr->tuplecount;
+	}
+
+	if (instr->min_t > totaltime)
+		instr->min_t = totaltime;
+
+	if (instr->max_t < totaltime)
+		instr->max_t = totaltime;
+
 	instr->ntuples += instr->tuplecount;
+
+	if (instr->min_tuples > instr->tuplecount)
+		instr->min_tuples = instr->tuplecount;
+
+	if (instr->max_tuples < instr->tuplecount)
+		instr->max_tuples = instr->tuplecount;
+
 	instr->nloops += 1;
 
 	/* Reset for next cycle (if any) */
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index 9dc3ecb07d7..a458b566e0d 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -66,7 +66,13 @@ typedef struct Instrumentation
 	/* Accumulated statistics across all completed cycles: */
 	double		startup;		/* total startup time (in seconds) */
 	double		total;			/* total time (in seconds) */
+	double		min_t;			/* time of fastest loop (in seconds) */
+	double		max_t;			/* time of slowest loop (in seconds) */
 	double		ntuples;		/* total tuples produced */
+	double		min_tuples;		/* min counter of produced tuples for all
+								 * loops */
+	double		max_tuples;		/* max counter of produced tuples for all
+								 * loops */
 	double		ntuples2;		/* secondary node-specific tuple counter */
 	double		nloops;			/* # of run cycles for this node */
 	double		nfiltered1;		/* # of tuples removed by scanqual or joinqual */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index dc7ab2ce8bf..aa3955ad1fc 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -341,6 +341,10 @@ select jsonb_pretty(
                                  "string4"                  +
                              ],                             +
                              "Schema": "public",            +
+                             "Max Rows": 0,                 +
+                             "Max Time": 0.0,               +
+                             "Min Rows": 0,                 +
+                             "Min Time": 0.0,               +
                              "Node Type": "Seq Scan",       +
                              "Plan Rows": 0,                +
                              "Plan Width": 0,               +
@@ -383,6 +387,10 @@ select jsonb_pretty(
                          "stringu2",                        +
                          "string4"                          +
                      ],                                     +
+                     "Max Rows": 0,                         +
+                     "Max Time": 0.0,                       +
+                     "Min Rows": 0,                         +
+                     "Min Time": 0.0,                       +
                      "Sort Key": [                          +
                          "tenk1.tenthous"                   +
                      ],                                     +
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 50d2a7e4b97..2d8ad48ff09 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -1929,6 +1929,8 @@ begin
     loop
         ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
         ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
+        ln := regexp_replace(ln, 'actual min_rows=\d+ rows=\d+ max_rows=\d+ loops=\d+',
+				 'actual min_rows=N rows=N max_rows=N loops=N');
         ln := regexp_replace(ln, 'Rows Removed by Filter: \d+', 'Rows Removed by Filter: N');
         return next ln;
     end loop;
diff --git a/src/test/regress/expected/timetz.out b/src/test/regress/expected/timetz.out
index 038bb5fa094..5294179aa45 100644

diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index 1e904a8c5b7..20688561614 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -452,6 +452,8 @@ begin
     loop
         ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
         ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
+        ln := regexp_replace(ln, 'actual min_rows=\d+ rows=\d+ max_rows=\d+ loops=\d+',
+				 'actual min_rows=N rows=N max_rows=N loops=N');
         ln := regexp_replace(ln, 'Rows Removed by Filter: \d+', 'Rows Removed by Filter: N');
         return next ln;
     end loop;
-- 
2.26.0

example_v1.sqltext/plain; name=example_v1.sqlDownload
#17Michael Christofides
michael@pgmustard.com
In reply to: David G. Johnston (#9)
Re: [PATCH] Add extra statistics to explain for Nested Loop

New version of this patch prints extra statistics for all cases of
multiple loops, not only for Nested Loop. Also I fixed the example by
adding VERBOSE.

Please don't hesitate to share any thoughts on this topic!

Thanks a lot for working on this! I really like the extra details, and
including it only with VERBOSE sounds good.

rows * loops is still an important calculation.

Why not just add total_rows while we are at it - last in the listing?

(actual rows=N loops=N min_rows=N max_rows=N total_rows=N)

This total_rows idea from David would really help us too, especially
in the cases where the actual rows is rounded down to zero. We make an
explain visualisation tool, and it'd be nice to show people a better
total than loops * actual rows. It would also help the accuracy of
some of our tips, that use this number.

Apologies if this input is too late to be helpful.

Cheers,
Michael

#18Yugo NAGATA
nagata@sraoss.co.jp
In reply to: Noname (#16)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Hello,

On Thu, 12 Nov 2020 23:10:05 +0300
e.sokolova@postgrespro.ru wrote:

New version of this patch prints extra statistics for all cases of
multiple loops, not only for Nested Loop. Also I fixed the example by
adding VERBOSE.

I think this extra statistics seems good because it is useful for DBA
to understand explained plan. I reviewed this patch. Here are a few
comments:

1)
postgres=# explain (analyze, verbose) select * from a,b where a.i=b.j;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.00..2752.00 rows=991 width=8) (actual time=0.021..17.651 rows=991 loops=1)
Output: a.i, b.j
Join Filter: (a.i = b.j)
Rows Removed by Join Filter: 99009
-> Seq Scan on public.b (cost=0.00..2.00 rows=100 width=4) (actual time=0.009..0.023 rows=100 loops=1)
Output: b.j
-> Seq Scan on public.a (cost=0.00..15.00 rows=1000 width=4) (actual time=0.005..0.091 min_time=0.065 max_time=0.163 min_rows=1000 rows=1000 max_rows=1000 loops=100)
Output: a.i
Planning Time: 0.066 ms
Execution Time: 17.719 ms
(10 rows)

I don't like this format where the extra statistics appear in the same
line of existing information because the output format differs depended
on whether the plan node's loops > 1 or not. This makes the length of a
line too long. Also, other information reported by VERBOSE doesn't change
the exiting row format and just add extra rows for new information.

Instead, it seems good for me to add extra rows for the new statistics
without changint the existing row format as other VERBOSE information,
like below.

-> Seq Scan on public.a (cost=0.00..15.00 rows=1000 width=4) (actual time=0.005..0.091 rows=1000 loops=100)
Output: a.i
Min Time: 0.065 ms
Max Time: 0.163 ms
Min Rows: 1000
Max Rows: 1000

or, like Buffers,

-> Seq Scan on public.a (cost=0.00..15.00 rows=1000 width=4) (actual time=0.005..0.091 rows=1000 loops=100)
Output: a.i
Loops: min_time=0.065 max_time=0.163 min_rows=1000 max_rows=1000

and so on. What do you think about it?

2)
In parallel scan, the extra statistics are not reported correctly.

postgres=# explain (analyze, verbose) select * from a,b where a.i=b.j;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather (cost=1000.00..2463.52 rows=991 width=8) (actual time=0.823..25.651 rows=991 loops=1)
Output: a.i, b.j
Workers Planned: 2
Workers Launched: 2
-> Nested Loop (cost=0.00..1364.42 rows=413 width=8) (actual time=9.426..16.723 min_time=0.000 max_time=22.017 min_rows=0 rows=330 max_rows=991 loops=3)
Output: a.i, b.j
Join Filter: (a.i = b.j)
Rows Removed by Join Filter: 33003
Worker 0: actual time=14.689..14.692 rows=0 loops=1
Worker 1: actual time=13.458..13.460 rows=0 loops=1
-> Parallel Seq Scan on public.a (cost=0.00..9.17 rows=417 width=4) (actual time=0.049..0.152 min_time=0.000 max_time=0.202 min_rows=0 rows=333 max_rows=452 loops=3)
Output: a.i
Worker 0: actual time=0.040..0.130 rows=322 loops=1
Worker 1: actual time=0.039..0.125 rows=226 loops=1
-> Seq Scan on public.b (cost=0.00..2.00 rows=100 width=4) (actual time=0.006..0.026 min_time=0.012 max_time=0.066 min_rows=100 rows=100 max_rows=100 loops=1000)
Output: b.j
Worker 0: actual time=0.006..0.024 min_time=0.000 max_time=0.000 min_rows=0 rows=100 max_rows=0 loops=322
Worker 1: actual time=0.008..0.030 min_time=0.000 max_time=0.000 min_rows=0 rows=100 max_rows=0 loops=226
Planning Time: 0.186 ms
Execution Time: 25.838 ms
(20 rows)

This reports max/min rows or time of inner scan as 0 in parallel workers,
and as a result only the leader process's ones are accounted. To fix this,
we would change InstrAggNode as below.

@@ -167,6 +196,10 @@ InstrAggNode(Instrumentation *dst, Instrumentation *add)
        dst->nloops += add->nloops;
        dst->nfiltered1 += add->nfiltered1;
        dst->nfiltered2 += add->nfiltered2;
+       dst->min_t = Min(dst->min_t, add->min_t);
+       dst->max_t = Max(dst->max_t, add->max_t);
+       dst->min_tuples = Min(dst->min_tuples, add->min_tuples);
+       dst->max_tuples = Max(dst->max_tuples, add->max_tuples);

3)
There are garbage lines and I could not apply this patch.

diff --git a/src/test/regress/expected/timetz.out b/src/test/regress/expected/timetz.out
index 038bb5fa094..5294179aa45 100644

Regards,
Yugo Nagata

--
Yugo NAGATA <nagata@sraoss.co.jp>

#19Julien Rouhaud
rjuju123@gmail.com
In reply to: Yugo NAGATA (#18)
Re: [PATCH] Add extra statistics to explain for Nested Loop

On Thu, Jan 28, 2021 at 8:38 PM Yugo NAGATA <nagata@sraoss.co.jp> wrote:

postgres=# explain (analyze, verbose) select * from a,b where a.i=b.j;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.00..2752.00 rows=991 width=8) (actual time=0.021..17.651 rows=991 loops=1)
Output: a.i, b.j
Join Filter: (a.i = b.j)
Rows Removed by Join Filter: 99009
-> Seq Scan on public.b (cost=0.00..2.00 rows=100 width=4) (actual time=0.009..0.023 rows=100 loops=1)
Output: b.j
-> Seq Scan on public.a (cost=0.00..15.00 rows=1000 width=4) (actual time=0.005..0.091 min_time=0.065 max_time=0.163 min_rows=1000 rows=1000 max_rows=1000 loops=100)
Output: a.i
Planning Time: 0.066 ms
Execution Time: 17.719 ms
(10 rows)

I don't like this format where the extra statistics appear in the same
line of existing information because the output format differs depended
on whether the plan node's loops > 1 or not. This makes the length of a
line too long. Also, other information reported by VERBOSE doesn't change
the exiting row format and just add extra rows for new information.

Instead, it seems good for me to add extra rows for the new statistics
without changint the existing row format as other VERBOSE information,
like below.

-> Seq Scan on public.a (cost=0.00..15.00 rows=1000 width=4) (actual time=0.005..0.091 rows=1000 loops=100)
Output: a.i
Min Time: 0.065 ms
Max Time: 0.163 ms
Min Rows: 1000
Max Rows: 1000

or, like Buffers,

-> Seq Scan on public.a (cost=0.00..15.00 rows=1000 width=4) (actual time=0.005..0.091 rows=1000 loops=100)
Output: a.i
Loops: min_time=0.065 max_time=0.163 min_rows=1000 max_rows=1000

and so on. What do you think about it?

It's true that the current output is a bit long, which isn't really
convenient to read. Using one of those alternative format would also
have the advantage of not breaking compatibility with tools that
process those entries. I personally prefer the 2nd option with the
extra "Loops:" line . For non text format, should we keep the current
format?

2)
In parallel scan, the extra statistics are not reported correctly.

postgres=# explain (analyze, verbose) select * from a,b where a.i=b.j;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather (cost=1000.00..2463.52 rows=991 width=8) (actual time=0.823..25.651 rows=991 loops=1)
Output: a.i, b.j
Workers Planned: 2
Workers Launched: 2
-> Nested Loop (cost=0.00..1364.42 rows=413 width=8) (actual time=9.426..16.723 min_time=0.000 max_time=22.017 min_rows=0 rows=330 max_rows=991 loops=3)
Output: a.i, b.j
Join Filter: (a.i = b.j)
Rows Removed by Join Filter: 33003
Worker 0: actual time=14.689..14.692 rows=0 loops=1
Worker 1: actual time=13.458..13.460 rows=0 loops=1
-> Parallel Seq Scan on public.a (cost=0.00..9.17 rows=417 width=4) (actual time=0.049..0.152 min_time=0.000 max_time=0.202 min_rows=0 rows=333 max_rows=452 loops=3)
Output: a.i
Worker 0: actual time=0.040..0.130 rows=322 loops=1
Worker 1: actual time=0.039..0.125 rows=226 loops=1
-> Seq Scan on public.b (cost=0.00..2.00 rows=100 width=4) (actual time=0.006..0.026 min_time=0.012 max_time=0.066 min_rows=100 rows=100 max_rows=100 loops=1000)
Output: b.j
Worker 0: actual time=0.006..0.024 min_time=0.000 max_time=0.000 min_rows=0 rows=100 max_rows=0 loops=322
Worker 1: actual time=0.008..0.030 min_time=0.000 max_time=0.000 min_rows=0 rows=100 max_rows=0 loops=226
Planning Time: 0.186 ms
Execution Time: 25.838 ms
(20 rows)

This reports max/min rows or time of inner scan as 0 in parallel workers,
and as a result only the leader process's ones are accounted. To fix this,
we would change InstrAggNode as below.

@@ -167,6 +196,10 @@ InstrAggNode(Instrumentation *dst, Instrumentation *add)
dst->nloops += add->nloops;
dst->nfiltered1 += add->nfiltered1;
dst->nfiltered2 += add->nfiltered2;
+       dst->min_t = Min(dst->min_t, add->min_t);
+       dst->max_t = Max(dst->max_t, add->max_t);
+       dst->min_tuples = Min(dst->min_tuples, add->min_tuples);
+       dst->max_tuples = Max(dst->max_tuples, add->max_tuples);

Agreed.

3)
There are garbage lines and I could not apply this patch.

diff --git a/src/test/regress/expected/timetz.out b/src/test/regress/expected/timetz.out
index 038bb5fa094..5294179aa45 100644

I also switch the patch to "waiting on author" on the commit fest app.

#20Yugo NAGATA
nagata@sraoss.co.jp
In reply to: Julien Rouhaud (#19)
Re: [PATCH] Add extra statistics to explain for Nested Loop

On Mon, 1 Feb 2021 13:28:45 +0800
Julien Rouhaud <rjuju123@gmail.com> wrote:

On Thu, Jan 28, 2021 at 8:38 PM Yugo NAGATA <nagata@sraoss.co.jp> wrote:

postgres=# explain (analyze, verbose) select * from a,b where a.i=b.j;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.00..2752.00 rows=991 width=8) (actual time=0.021..17.651 rows=991 loops=1)
Output: a.i, b.j
Join Filter: (a.i = b.j)
Rows Removed by Join Filter: 99009
-> Seq Scan on public.b (cost=0.00..2.00 rows=100 width=4) (actual time=0.009..0.023 rows=100 loops=1)
Output: b.j
-> Seq Scan on public.a (cost=0.00..15.00 rows=1000 width=4) (actual time=0.005..0.091 min_time=0.065 max_time=0.163 min_rows=1000 rows=1000 max_rows=1000 loops=100)
Output: a.i
Planning Time: 0.066 ms
Execution Time: 17.719 ms
(10 rows)

I don't like this format where the extra statistics appear in the same
line of existing information because the output format differs depended
on whether the plan node's loops > 1 or not. This makes the length of a
line too long. Also, other information reported by VERBOSE doesn't change
the exiting row format and just add extra rows for new information.

Instead, it seems good for me to add extra rows for the new statistics
without changint the existing row format as other VERBOSE information,
like below.

-> Seq Scan on public.a (cost=0.00..15.00 rows=1000 width=4) (actual time=0.005..0.091 rows=1000 loops=100)
Output: a.i
Min Time: 0.065 ms
Max Time: 0.163 ms
Min Rows: 1000
Max Rows: 1000

or, like Buffers,

-> Seq Scan on public.a (cost=0.00..15.00 rows=1000 width=4) (actual time=0.005..0.091 rows=1000 loops=100)
Output: a.i
Loops: min_time=0.065 max_time=0.163 min_rows=1000 max_rows=1000

and so on. What do you think about it?

It's true that the current output is a bit long, which isn't really
convenient to read. Using one of those alternative format would also
have the advantage of not breaking compatibility with tools that
process those entries. I personally prefer the 2nd option with the
extra "Loops:" line . For non text format, should we keep the current
format?

For non text format, I think "Max/Min Rows", "Max/Min Times" are a bit
simple and the meaning is unclear. Instead, similar to a style of "Buffers",
does it make sense using "Max/Min Rows in Loops" and "Max/Min Times in Loops"?

Regards,
Yugo Nagata

--
Yugo NAGATA <nagata@sraoss.co.jp>

#21Noname
e.sokolova@postgrespro.ru
In reply to: Julien Rouhaud (#19)
1 attachment(s)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Thank you all for your feedback and reforms.
I attach a new version of the patch with the some changes and fixes.
Here's a list of the major changes:
1) New format of extra statistics. This is now contained in a line
separate from the main statistics.

Julien Rouhaud писал 2021-02-01 08:28:

On Thu, Jan 28, 2021 at 8:38 PM Yugo NAGATA <nagata@sraoss.co.jp>
wrote:

postgres=# explain (analyze, verbose) select * from a,b where a.i=b.j;

QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.00..2752.00 rows=991 width=8) (actual
time=0.021..17.651 rows=991 loops=1)
Output: a.i, b.j
Join Filter: (a.i = b.j)
Rows Removed by Join Filter: 99009
-> Seq Scan on public.b (cost=0.00..2.00 rows=100 width=4)
(actual time=0.009..0.023 rows=100 loops=1)
Output: b.j
-> Seq Scan on public.a (cost=0.00..15.00 rows=1000 width=4)
(actual time=0.005..0.091 min_time=0.065 max_time=0.163 min_rows=1000
rows=1000 max_rows=1000 loops=100)
Output: a.i
Planning Time: 0.066 ms
Execution Time: 17.719 ms
(10 rows)

I don't like this format where the extra statistics appear in the same
line of existing information because the output format differs
depended
on whether the plan node's loops > 1 or not. This makes the length of
a
line too long. Also, other information reported by VERBOSE doesn't
change
the exiting row format and just add extra rows for new information.

Instead, it seems good for me to add extra rows for the new statistics
without changint the existing row format as other VERBOSE information,
like below.

-> Seq Scan on public.a (cost=0.00..15.00 rows=1000 width=4)
(actual time=0.005..0.091 rows=1000 loops=100)
Output: a.i
Loops: min_time=0.065 max_time=0.163 min_rows=1000
max_rows=1000

and so on. What do you think about it?

2) Correction of the case of parallel scan

In parallel scan, the extra statistics are not reported correctly.

This reports max/min rows or time of inner scan as 0 in parallel
workers,
and as a result only the leader process's ones are accounted. To fix
this,
we would change InstrAggNode as below.

3) Adding extra statistics about total number of rows (total rows).
There were many wishes for this here.

Please don't hesitate to share any thoughts on this topic.

--
Ekaterina Sokolova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Attachments:

extra_statistics_v3.patchtext/x-diff; charset=us-ascii; name=extra_statistics_v3.patchDownload
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index afc45429ba4..723eccca013 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1589,29 +1589,82 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
 		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
 		double		rows = planstate->instrument->ntuples / nloops;
+		double		total_rows = planstate->instrument->ntuples;
+		double		min_r = planstate->instrument->min_tuples;
+		double		max_r = planstate->instrument->max_tuples;
+		double		min_t_ms = 1000.0 * planstate->instrument->min_t;
+		double		max_t_ms = 1000.0 * planstate->instrument->max_t;
 
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
-			if (es->timing)
-				appendStringInfo(es->str,
-								 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
-								 startup_ms, total_ms, rows, nloops);
+			if (nloops > 1 && es->verbose)
+			{
+				if (es->timing)
+				{
+					appendStringInfo(es->str,
+									 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)\n",
+									 startup_ms, total_ms, rows, nloops);
+					ExplainIndentText(es);
+					appendStringInfo(es->str,
+									 "Loop: min_time=%.3f max_time=%.3f min_rows=%.0f max_rows=%.0f total_rows=%.0f",
+									 min_t_ms, max_t_ms, min_r, max_r, total_rows);
+				}
+				else
+				{
+					appendStringInfo(es->str,
+									 " (actual rows=%.0f loops=%.0f)\n",
+									 rows, nloops);
+					ExplainIndentText(es);
+					appendStringInfo(es->str,
+									 "Loop: min_rows=%.0f max_rows=%.0f total_rows=%.0f",
+									 min_r, max_r, total_rows);
+				}
+			}
 			else
-				appendStringInfo(es->str,
-								 " (actual rows=%.0f loops=%.0f)",
-								 rows, nloops);
+			{
+				if (es->timing)
+					appendStringInfo(es->str,
+									 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
+									 startup_ms, total_ms, rows, nloops);
+				else
+					appendStringInfo(es->str,
+									 " (actual rows=%.0f loops=%.0f)",
+									 rows, nloops);
+			}
 		}
 		else
 		{
-			if (es->timing)
+			if (nloops > 1 && es->verbose)
+			{
+				if (es->timing)
+				{
+					ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
+										 3, es);
+					ExplainPropertyFloat("Actual Total Time", "s", total_ms,
+										 3, es);
+					ExplainPropertyFloat("Min Time", "s", min_t_ms,
+										 3, es);
+					ExplainPropertyFloat("Max Time", "s", max_t_ms,
+										 3, es);
+				}
+				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Min Rows", NULL, min_r, 0, es);
+				ExplainPropertyFloat("Max Rows", NULL, max_r, 0, es);
+				ExplainPropertyFloat("Total Rows", NULL, total_rows, 0, es);
+				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+			}
+			else
 			{
-				ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
-									 3, es);
-				ExplainPropertyFloat("Actual Total Time", "s", total_ms,
-									 3, es);
+				if (es->timing)
+				{
+					ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
+										 3, es);
+					ExplainPropertyFloat("Actual Total Time", "s", total_ms,
+										 3, es);
+				}
+				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 			}
-			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 		}
 	}
 	else if (es->analyze)
@@ -1620,6 +1673,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			appendStringInfoString(es->str, " (never executed)");
 		else
 		{
+			/* without min and max values because actual result is 0 */
 			if (es->timing)
 			{
 				ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
@@ -1646,38 +1700,96 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			double		startup_ms;
 			double		total_ms;
 			double		rows;
+			double		min_t_ms;
+			double		max_t_ms;
+			double		min_r;
+			double		max_r;
+			double		total_rows;
 
 			if (nloops <= 0)
 				continue;
 			startup_ms = 1000.0 * instrument->startup / nloops;
 			total_ms = 1000.0 * instrument->total / nloops;
 			rows = instrument->ntuples / nloops;
+			min_t_ms = 1000.0 * instrument->min_t;
+			max_t_ms = 1000.0 * instrument->max_t;
+			min_r = instrument->min_tuples;
+			max_r = instrument->max_tuples;
+			total_rows = instrument->ntuples;
 
 			ExplainOpenWorker(n, es);
 
 			if (es->format == EXPLAIN_FORMAT_TEXT)
 			{
 				ExplainIndentText(es);
-				if (es->timing)
-					appendStringInfo(es->str,
-									 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
-									 startup_ms, total_ms, rows, nloops);
+				if (nloops > 1)
+				{
+					if (es->timing)
+					{
+						appendStringInfo(es->str,
+										 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
+										 startup_ms, total_ms, rows, nloops);
+						ExplainIndentText(es);
+						appendStringInfo(es->str,
+										 "Loop: min_time=%.3f  max_time=%.3f min_rows=%.0f max_rows=%.0f total_rows=%.0f",
+										 min_t_ms, max_t_ms, min_r, max_r, total_rows);
+					}
+					else
+					{
+						appendStringInfo(es->str,
+										 "actual rows=%.0f loops=%.0f\n",
+										 rows, nloops);
+						ExplainIndentText(es);
+						appendStringInfo(es->str,
+										 "Loop: min_rows=%.0f max_rows=%.0f total_rows=%.0f",
+										 min_r, max_r, total_rows);
+					}
+				}
 				else
-					appendStringInfo(es->str,
-									 "actual rows=%.0f loops=%.0f\n",
-									 rows, nloops);
+				{
+					if (es->timing)
+						appendStringInfo(es->str,
+										 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
+										 startup_ms, total_ms, rows, nloops);
+					else
+						appendStringInfo(es->str,
+										 "actual rows=%.0f loops=%.0f\n",
+										 rows, nloops);
+				}
 			}
 			else
 			{
-				if (es->timing)
+				if (nloops > 1)
 				{
-					ExplainPropertyFloat("Actual Startup Time", "ms",
-										 startup_ms, 3, es);
-					ExplainPropertyFloat("Actual Total Time", "ms",
-										 total_ms, 3, es);
+					if (es->timing)
+					{
+						ExplainPropertyFloat("Actual Startup Time", "ms",
+											 startup_ms, 3, es);
+						ExplainPropertyFloat("Actual Total Time", "ms",
+											 total_ms, 3, es);
+						ExplainPropertyFloat("Min Time", "ms",
+											 min_t_ms, 3, es);
+						ExplainPropertyFloat("Max Time", "ms",
+											 max_t_ms, 3, es);
+					}
+					ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Min Rows", NULL, min_r, 0, es);
+					ExplainPropertyFloat("Max Rows", NULL, max_r, 0, es);
+					ExplainPropertyFloat("Total Rows", NULL, total_rows, 0, es);
+					ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+				}
+				else
+				{
+					if (es->timing)
+					{
+						ExplainPropertyFloat("Actual Startup Time", "ms",
+											 startup_ms, 3, es);
+						ExplainPropertyFloat("Actual Total Time", "ms",
+											 total_ms, 3, es);
+					}
+					ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+					ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 				}
-				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 			}
 
 			ExplainCloseWorker(n, es);
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index 237e13361b5..d2c3e678882 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -134,7 +134,34 @@ InstrEndLoop(Instrumentation *instr)
 
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
+
+	/*
+	 * this is first loop
+	 *
+	 * We only initialize the min values. We don't need to bother with the
+	 * max, because those are 0 and the non-zero values will get updated a
+	 * couple lines later.
+	 */
+	if (instr->nloops == 0)
+	{
+		instr->min_t = totaltime;
+		instr->min_tuples = instr->tuplecount;
+	}
+
+	if (instr->min_t > totaltime)
+		instr->min_t = totaltime;
+
+	if (instr->max_t < totaltime)
+		instr->max_t = totaltime;
+
 	instr->ntuples += instr->tuplecount;
+
+	if (instr->min_tuples > instr->tuplecount)
+		instr->min_tuples = instr->tuplecount;
+
+	if (instr->max_tuples < instr->tuplecount)
+		instr->max_tuples = instr->tuplecount;
+
 	instr->nloops += 1;
 
 	/* Reset for next cycle (if any) */
@@ -167,6 +194,10 @@ InstrAggNode(Instrumentation *dst, Instrumentation *add)
 	dst->nloops += add->nloops;
 	dst->nfiltered1 += add->nfiltered1;
 	dst->nfiltered2 += add->nfiltered2;
+	dst->min_t = Min(dst->min_t, add->min_t);
+	dst->max_t = Max(dst->max_t, add->max_t);
+	dst->min_tuples = Min(dst->min_tuples, add->min_tuples);
+	dst->max_tuples = Max(dst->max_tuples, add->max_tuples);
 
 	/* Add delta of buffer usage since entry to node's totals */
 	if (dst->need_bufusage)
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index aa8eceda5f4..93ba7c83461 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -66,7 +66,13 @@ typedef struct Instrumentation
 	/* Accumulated statistics across all completed cycles: */
 	double		startup;		/* total startup time (in seconds) */
 	double		total;			/* total time (in seconds) */
+	double		min_t;			/* time of fastest loop (in seconds) */
+	double		max_t;			/* time of slowest loop (in seconds) */
 	double		ntuples;		/* total tuples produced */
+	double		min_tuples;		/* min counter of produced tuples for all
+								 * loops */
+	double		max_tuples;		/* max counter of produced tuples for all
+								 * loops */
 	double		ntuples2;		/* secondary node-specific tuple counter */
 	double		nloops;			/* # of run cycles for this node */
 	double		nfiltered1;		/* # of tuples removed by scanqual or joinqual */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index dc7ab2ce8bf..0569662ca50 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -341,10 +341,15 @@ select jsonb_pretty(
                                  "string4"                  +
                              ],                             +
                              "Schema": "public",            +
+                             "Max Rows": 0,                 +
+                             "Max Time": 0.0,               +
+                             "Min Rows": 0,                 +
+                             "Min Time": 0.0,               +
                              "Node Type": "Seq Scan",       +
                              "Plan Rows": 0,                +
                              "Plan Width": 0,               +
                              "Total Cost": 0.0,             +
+                             "Total Rows": 0,               +
                              "Actual Rows": 0,              +
                              "Actual Loops": 0,             +
                              "Startup Cost": 0.0,           +
@@ -383,6 +388,10 @@ select jsonb_pretty(
                          "stringu2",                        +
                          "string4"                          +
                      ],                                     +
+                     "Max Rows": 0,                         +
+                     "Max Time": 0.0,                       +
+                     "Min Rows": 0,                         +
+                     "Min Time": 0.0,                       +
                      "Sort Key": [                          +
                          "tenk1.tenthous"                   +
                      ],                                     +
@@ -390,6 +399,7 @@ select jsonb_pretty(
                      "Plan Rows": 0,                        +
                      "Plan Width": 0,                       +
                      "Total Cost": 0.0,                     +
+                     "Total Rows": 0,                       +
                      "Actual Rows": 0,                      +
                      "Actual Loops": 0,                     +
                      "Startup Cost": 0.0,                   +
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index bde29e38a94..bb979432405 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -1957,6 +1957,8 @@ begin
     loop
         ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
         ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
+        ln := regexp_replace(ln, 'Loop: min_rows=\d+ max_rows=\d+ total_rows=\d+',
+				 'Loop: min_rows=N max_rows=N total_rows=N');
         ln := regexp_replace(ln, 'Rows Removed by Filter: \d+', 'Rows Removed by Filter: N');
         return next ln;
     end loop;
@@ -2761,6 +2763,72 @@ order by tbl1.col1, tprt.col1;
  1001 | 1001
 (3 rows)
 
+-- Tests for extra statistics
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop (actual rows=23 loops=1)
+   Output: tbl1.col1, tprt.col1
+   ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
+         Output: tbl1.col1
+   ->  Append (actual rows=5 loops=5)
+         Loop: min_rows=2 max_rows=6 total_rows=23
+         ->  Index Scan using tprt1_idx on public.tprt_1 (actual rows=2 loops=5)
+               Loop: min_rows=2 max_rows=2 total_rows=10
+               Output: tprt_1.col1
+               Index Cond: (tprt_1.col1 < tbl1.col1)
+         ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=3 loops=4)
+               Loop: min_rows=2 max_rows=3 total_rows=11
+               Output: tprt_2.col1
+               Index Cond: (tprt_2.col1 < tbl1.col1)
+         ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=1 loops=2)
+               Loop: min_rows=1 max_rows=1 total_rows=2
+               Output: tprt_3.col1
+               Index Cond: (tprt_3.col1 < tbl1.col1)
+         ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
+               Output: tprt_4.col1
+               Index Cond: (tprt_4.col1 < tbl1.col1)
+         ->  Index Scan using tprt5_idx on public.tprt_5 (never executed)
+               Output: tprt_5.col1
+               Index Cond: (tprt_5.col1 < tbl1.col1)
+         ->  Index Scan using tprt6_idx on public.tprt_6 (never executed)
+               Output: tprt_6.col1
+               Index Cond: (tprt_6.col1 < tbl1.col1)
+(27 rows)
+
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop (actual rows=3 loops=1)
+   Output: tbl1.col1, tprt.col1
+   ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
+         Output: tbl1.col1
+   ->  Append (actual rows=1 loops=5)
+         Loop: min_rows=0 max_rows=1 total_rows=3
+         ->  Index Scan using tprt1_idx on public.tprt_1 (never executed)
+               Output: tprt_1.col1
+               Index Cond: (tprt_1.col1 = tbl1.col1)
+         ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=1 loops=2)
+               Loop: min_rows=1 max_rows=1 total_rows=2
+               Output: tprt_2.col1
+               Index Cond: (tprt_2.col1 = tbl1.col1)
+         ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=0 loops=3)
+               Loop: min_rows=0 max_rows=1 total_rows=1
+               Output: tprt_3.col1
+               Index Cond: (tprt_3.col1 = tbl1.col1)
+         ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
+               Output: tprt_4.col1
+               Index Cond: (tprt_4.col1 = tbl1.col1)
+         ->  Index Scan using tprt5_idx on public.tprt_5 (never executed)
+               Output: tprt_5.col1
+               Index Cond: (tprt_5.col1 = tbl1.col1)
+         ->  Index Scan using tprt6_idx on public.tprt_6 (never executed)
+               Output: tprt_6.col1
+               Index Cond: (tprt_6.col1 = tbl1.col1)
+(26 rows)
+
 -- Last partition
 delete from tbl1;
 insert into tbl1 values (4400);
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index 6ccb52ad1d6..6224e3a4964 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -463,6 +463,8 @@ begin
     loop
         ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
         ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
+        ln := regexp_replace(ln, 'Loop: min_rows=\d+ max_rows=\d+ total_rows=\d+',
+				 'Loop: min_rows=N max_rows=N total_rows=N');
         ln := regexp_replace(ln, 'Rows Removed by Filter: \d+', 'Rows Removed by Filter: N');
         return next ln;
     end loop;
@@ -652,6 +654,13 @@ select tbl1.col1, tprt.col1 from tbl1
 inner join tprt on tbl1.col1 = tprt.col1
 order by tbl1.col1, tprt.col1;
 
+-- Tests for extra statistics
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
+
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
+
 -- Last partition
 delete from tbl1;
 insert into tbl1 values (4400);
#22Justin Pryzby
pryzby@telsasoft.com
In reply to: Noname (#21)
Re: [PATCH] Add extra statistics to explain for Nested Loop
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index afc45429ba4..723eccca013 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1589,29 +1589,82 @@ ExplainNode(PlanState *planstate, List *ancestors,
double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
double		total_ms = 1000.0 * planstate->instrument->total / nloops;
double		rows = planstate->instrument->ntuples / nloops;
+		double		total_rows = planstate->instrument->ntuples;
+		double		min_r = planstate->instrument->min_tuples;
+		double		max_r = planstate->instrument->max_tuples;
+		double		min_t_ms = 1000.0 * planstate->instrument->min_t;
+		double		max_t_ms = 1000.0 * planstate->instrument->max_t;
if (es->format == EXPLAIN_FORMAT_TEXT)
{
-			if (es->timing)
-				appendStringInfo(es->str,
-								 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
-								 startup_ms, total_ms, rows, nloops);
+			if (nloops > 1 && es->verbose)
+			{
+				if (es->timing)
+				{
+					appendStringInfo(es->str,
+									 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)\n",
+									 startup_ms, total_ms, rows, nloops);
+					ExplainIndentText(es);
+					appendStringInfo(es->str,
+									 "Loop: min_time=%.3f max_time=%.3f min_rows=%.0f max_rows=%.0f total_rows=%.0f",
+									 min_t_ms, max_t_ms, min_r, max_r, total_rows);

Lines with "colon" format shouldn't use equal signs, and should use two spaces
between fields. See:
/messages/by-id/20200619022001.GY17995@telsasoft.com
/messages/by-id/20200402054120.GC14618@telsasoft.com
/messages/by-id/20200407042521.GH2228@telsasoft.com

+				}
+				else
+				{
+					appendStringInfo(es->str,
+									 " (actual rows=%.0f loops=%.0f)\n",
+									 rows, nloops);
+					ExplainIndentText(es);
+					appendStringInfo(es->str,
+									 "Loop: min_rows=%.0f max_rows=%.0f total_rows=%.0f",
+									 min_r, max_r, total_rows);
+				}
+			}
else
-				appendStringInfo(es->str,
-								 " (actual rows=%.0f loops=%.0f)",
-								 rows, nloops);
+			{
+				if (es->timing)
+					appendStringInfo(es->str,
+									 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
+									 startup_ms, total_ms, rows, nloops);
+				else
+					appendStringInfo(es->str,
+									 " (actual rows=%.0f loops=%.0f)",
+									 rows, nloops);
+			}
}
else

Since this is now on a separate line, the "if (nloops > 1 && es->verbose)"
can be after the existing "if (es->timing)", and shouldn't need its own
"if (es->timing)". It should conditionally add a separate line, rather than
duplicating the "(actual.*" line.

-			if (es->timing)
+			if (nloops > 1 && es->verbose)

In non-text mode, think you should not check "nloops > 1". Rather, print the
field as 0.

The whole logic is duplicated for parallel workers. This could instead be a
function, called from both places. I think this would handle the computation
as well as the output. This would make the patch shorter.

+						ExplainPropertyFloat("Min Time", "ms",
+											 min_t_ms, 3, es);
+						ExplainPropertyFloat("Max Time", "ms",
+											 max_t_ms, 3, es);

I think the labels in non-text format should say "Loop Min Time" or similar.

diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index aa8eceda5f4..93ba7c83461 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -66,7 +66,13 @@ typedef struct Instrumentation
/* Accumulated statistics across all completed cycles: */
double		startup;		/* total startup time (in seconds) */
double		total;			/* total time (in seconds) */
+	double		min_t;			/* time of fastest loop (in seconds) */
+	double		max_t;			/* time of slowest loop (in seconds) */
double		ntuples;		/* total tuples produced */
+	double		min_tuples;		/* min counter of produced tuples for all
+								 * loops */
+	double		max_tuples;		/* max counter of produced tuples for all
+								 * loops */

And these variables should have a loop_ prefix like loop_min_t ?

--
Justin

#23Noname
e.sokolova@postgrespro.ru
In reply to: Justin Pryzby (#22)
1 attachment(s)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Thank you for working on this issue. Your comments helped me make this
patch more correct.

Lines with "colon" format shouldn't use equal signs, and should use two
spaces
between fields.

Done. Now extra line looks like "Loop min_rows: %.0f max_rows: %.0f
total_rows: %.0f" or "Loop min_time: %.3f max_time: %.3f min_rows:
%.0f max_rows: %.0f total_rows: %.0f".

Since this is now on a separate line, the "if (nloops > 1 &&
es->verbose)"
can be after the existing "if (es->timing)", and shouldn't need its own
"if (es->timing)". It should conditionally add a separate line, rather
than
duplicating the "(actual.*" line.

-			if (es->timing)
+			if (nloops > 1 && es->verbose)

New version of patch contains this correction. It helped make the patch
shorter.

In non-text mode, think you should not check "nloops > 1". Rather,
print the
field as 0.

The fields will not be zeros. New line will almost repeat the line with
main sttistics.

I think the labels in non-text format should say "Loop Min Time" or
similar.

And these variables should have a loop_ prefix like loop_min_t ?

There are good ideas. I changed it.

I apply new version of this patch. I hope it got better.
Please don't hesitate to share any thoughts on this topic.

--
Ekaterina Sokolova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Attachments:

extra_statistics_v4.patchtext/x-diff; name=extra_statistics_v4.patchDownload
From: "Ekaterina Sokolova" <e.sokolova@postgrespro.ru>
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index b62a76e7e5a..bf8c37baefd 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1615,6 +1615,11 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
 		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
 		double		rows = planstate->instrument->ntuples / nloops;
+		double		loop_total_rows = planstate->instrument->ntuples;
+		double		loop_min_r = planstate->instrument->min_tuples;
+		double		loop_max_r = planstate->instrument->max_tuples;
+		double		loop_min_t_ms = 1000.0 * planstate->instrument->min_t;
+		double		loop_max_t_ms = 1000.0 * planstate->instrument->max_t;
 
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
@@ -1626,6 +1631,19 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				appendStringInfo(es->str,
 								 " (actual rows=%.0f loops=%.0f)",
 								 rows, nloops);
+			if (nloops > 1 && es->verbose)
+			{
+                appendStringInfo(es->str, "\n");
+				ExplainIndentText(es);
+				if (es->timing)
+					appendStringInfo(es->str,
+									 "Loop min_time: %.3f  max_time: %.3f  min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+									 loop_min_t_ms, loop_max_t_ms, loop_min_r, loop_max_r, loop_total_rows);
+				else
+					appendStringInfo(es->str,
+									 "Loop min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+									 loop_min_r, loop_max_r, loop_total_rows);
+			}
 		}
 		else
 		{
@@ -1635,8 +1653,21 @@ ExplainNode(PlanState *planstate, List *ancestors,
 									 3, es);
 				ExplainPropertyFloat("Actual Total Time", "s", total_ms,
 									 3, es);
+				if (nloops > 1 && es->verbose)
+				{
+					ExplainPropertyFloat("Loop Min Time", "s", loop_min_t_ms,
+										 3, es);
+					ExplainPropertyFloat("Loop Max Time", "s", loop_max_t_ms,
+										 3, es);
+				}
 			}
 			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+			if (nloops > 1 && es->verbose)
+			{
+				ExplainPropertyFloat("Loop Min Rows", NULL, loop_min_r, 0, es);
+				ExplainPropertyFloat("Loop Max Rows", NULL, loop_max_r, 0, es);
+				ExplainPropertyFloat("Loop Total Rows", NULL, loop_total_rows, 0, es);
+			}
 			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 		}
 	}
@@ -1646,6 +1677,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			appendStringInfoString(es->str, " (never executed)");
 		else
 		{
+			/* without min and max values because actual result is 0 */
 			if (es->timing)
 			{
 				ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
@@ -1672,12 +1704,22 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			double		startup_ms;
 			double		total_ms;
 			double		rows;
+			double		loop_min_t_ms;
+			double		loop_max_t_ms;
+			double		loop_min_r;
+			double		loop_max_r;
+			double		loop_total_rows;
 
 			if (nloops <= 0)
 				continue;
 			startup_ms = 1000.0 * instrument->startup / nloops;
 			total_ms = 1000.0 * instrument->total / nloops;
 			rows = instrument->ntuples / nloops;
+			loop_min_t_ms = 1000.0 * instrument->min_t;
+			loop_max_t_ms = 1000.0 * instrument->max_t;
+			loop_min_r = instrument->min_tuples;
+			loop_max_r = instrument->max_tuples;
+			loop_total_rows = instrument->ntuples;
 
 			ExplainOpenWorker(n, es);
 
@@ -1692,6 +1734,19 @@ ExplainNode(PlanState *planstate, List *ancestors,
 					appendStringInfo(es->str,
 									 "actual rows=%.0f loops=%.0f\n",
 									 rows, nloops);
+				if (nloops > 1)
+				{
+                    appendStringInfo(es->str, "\n");
+					ExplainIndentText(es);
+					if (es->timing)
+						appendStringInfo(es->str,
+										 "Loop min_time: %.3f  max_time: %.3f  min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+										 loop_min_t_ms, loop_max_t_ms, loop_min_r, loop_max_r, loop_total_rows);
+					else
+						appendStringInfo(es->str,
+										 "Loop min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+										 loop_min_r, loop_max_r, loop_total_rows);
+				}
 			}
 			else
 			{
@@ -1701,8 +1756,21 @@ ExplainNode(PlanState *planstate, List *ancestors,
 										 startup_ms, 3, es);
 					ExplainPropertyFloat("Actual Total Time", "ms",
 										 total_ms, 3, es);
+					if (nloops > 1)
+					{
+						ExplainPropertyFloat("Loop Min Time", "ms",
+											 loop_min_t_ms, 3, es);
+						ExplainPropertyFloat("Loop Max Time", "ms",
+											 loop_max_t_ms, 3, es);
+					}
 				}
 				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+				if (nloops > 1)
+				{
+					ExplainPropertyFloat("Loop Min Rows", NULL, loop_min_r, 0, es);
+					ExplainPropertyFloat("Loop Max Rows", NULL, loop_max_r, 0, es);
+					ExplainPropertyFloat("Loop Total Rows", NULL, loop_total_rows, 0, es);
+				}
 				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 			}
 
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index 237e13361b5..d2c3e678882 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -134,7 +134,34 @@ InstrEndLoop(Instrumentation *instr)
 
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
+
+	/*
+	 * this is first loop
+	 *
+	 * We only initialize the min values. We don't need to bother with the
+	 * max, because those are 0 and the non-zero values will get updated a
+	 * couple lines later.
+	 */
+	if (instr->nloops == 0)
+	{
+		instr->min_t = totaltime;
+		instr->min_tuples = instr->tuplecount;
+	}
+
+	if (instr->min_t > totaltime)
+		instr->min_t = totaltime;
+
+	if (instr->max_t < totaltime)
+		instr->max_t = totaltime;
+
 	instr->ntuples += instr->tuplecount;
+
+	if (instr->min_tuples > instr->tuplecount)
+		instr->min_tuples = instr->tuplecount;
+
+	if (instr->max_tuples < instr->tuplecount)
+		instr->max_tuples = instr->tuplecount;
+
 	instr->nloops += 1;
 
 	/* Reset for next cycle (if any) */
@@ -167,6 +194,10 @@ InstrAggNode(Instrumentation *dst, Instrumentation *add)
 	dst->nloops += add->nloops;
 	dst->nfiltered1 += add->nfiltered1;
 	dst->nfiltered2 += add->nfiltered2;
+	dst->min_t = Min(dst->min_t, add->min_t);
+	dst->max_t = Max(dst->max_t, add->max_t);
+	dst->min_tuples = Min(dst->min_tuples, add->min_tuples);
+	dst->max_tuples = Max(dst->max_tuples, add->max_tuples);
 
 	/* Add delta of buffer usage since entry to node's totals */
 	if (dst->need_bufusage)
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index aa8eceda5f4..93ba7c83461 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -66,7 +66,13 @@ typedef struct Instrumentation
 	/* Accumulated statistics across all completed cycles: */
 	double		startup;		/* total startup time (in seconds) */
 	double		total;			/* total time (in seconds) */
+	double		min_t;			/* time of fastest loop (in seconds) */
+	double		max_t;			/* time of slowest loop (in seconds) */
 	double		ntuples;		/* total tuples produced */
+	double		min_tuples;		/* min counter of produced tuples for all
+								 * loops */
+	double		max_tuples;		/* max counter of produced tuples for all
+								 * loops */
 	double		ntuples2;		/* secondary node-specific tuple counter */
 	double		nloops;			/* # of run cycles for this node */
 	double		nfiltered1;		/* # of tuples removed by scanqual or joinqual */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index cda28098baa..b51659d57f3 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -351,8 +351,13 @@ select jsonb_pretty(
                              "Actual Loops": 0,             +
                              "Startup Cost": 0.0,           +
                              "Async Capable": false,        +
+                             "Loop Max Rows": 0,            +
+                             "Loop Max Time": 0.0,          +
+                             "Loop Min Rows": 0,            +
+                             "Loop Min Time": 0.0,          +
                              "Relation Name": "tenk1",      +
                              "Parallel Aware": true,        +
+                             "Loop Total Rows": 0,          +
                              "Local Hit Blocks": 0,         +
                              "Temp Read Blocks": 0,         +
                              "Actual Total Time": 0.0,      +
@@ -397,7 +402,12 @@ select jsonb_pretty(
                      "Actual Loops": 0,                     +
                      "Startup Cost": 0.0,                   +
                      "Async Capable": false,                +
+                     "Loop Max Rows": 0,                    +
+                     "Loop Max Time": 0.0,                  +
+                     "Loop Min Rows": 0,                    +
+                     "Loop Min Time": 0.0,                  +
                      "Parallel Aware": false,               +
+                     "Loop Total Rows": 0,                  +
                      "Sort Space Used": 0,                  +
                      "Local Hit Blocks": 0,                 +
                      "Temp Read Blocks": 0,                 +
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 1a7149bfd57..8d87af92593 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -1961,6 +1961,10 @@ begin
         ln := regexp_replace(ln, 'Hits: \d+', 'Hits: N');
         ln := regexp_replace(ln, 'Misses: \d+', 'Misses: N');
         ln := regexp_replace(ln, 'Memory Usage: \d+', 'Memory Usage: N');
+        ln := regexp_replace(ln, 'Loop min_time: \d+  max_time: \d+  min_rows: \d+  max_rows: \d+  total_rows: \d+',
+                                 'Loop min_time: N  max_time: N  min_rows: N  max_rows: N  total_rows: N');
+        ln := regexp_replace(ln, 'Loop min_rows: \d+  max_rows: \d+  total_rows: \d+',
+                                 'Loop min_rows: N  max_rows: N  total_rows: N');
         return next ln;
     end loop;
 end;
@@ -2718,6 +2722,72 @@ order by tbl1.col1, tprt.col1;
  1001 | 1001
 (3 rows)
 
+-- Tests for extra statistics
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop (actual rows=23 loops=1)
+   Output: tbl1.col1, tprt.col1
+   ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
+         Output: tbl1.col1
+   ->  Append (actual rows=5 loops=5)
+         Loop min_rows: 2  max_rows: 6  total_rows: 23
+         ->  Index Scan using tprt1_idx on public.tprt_1 (actual rows=2 loops=5)
+               Loop min_rows: 2  max_rows: 2  total_rows: 10
+               Output: tprt_1.col1
+               Index Cond: (tprt_1.col1 < tbl1.col1)
+         ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=3 loops=4)
+               Loop min_rows: 2  max_rows: 3  total_rows: 11
+               Output: tprt_2.col1
+               Index Cond: (tprt_2.col1 < tbl1.col1)
+         ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=1 loops=2)
+               Loop min_rows: 1  max_rows: 1  total_rows: 2
+               Output: tprt_3.col1
+               Index Cond: (tprt_3.col1 < tbl1.col1)
+         ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
+               Output: tprt_4.col1
+               Index Cond: (tprt_4.col1 < tbl1.col1)
+         ->  Index Scan using tprt5_idx on public.tprt_5 (never executed)
+               Output: tprt_5.col1
+               Index Cond: (tprt_5.col1 < tbl1.col1)
+         ->  Index Scan using tprt6_idx on public.tprt_6 (never executed)
+               Output: tprt_6.col1
+               Index Cond: (tprt_6.col1 < tbl1.col1)
+(27 rows)
+
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop (actual rows=3 loops=1)
+   Output: tbl1.col1, tprt.col1
+   ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
+         Output: tbl1.col1
+   ->  Append (actual rows=1 loops=5)
+         Loop min_rows: 0  max_rows: 1  total_rows: 3
+         ->  Index Scan using tprt1_idx on public.tprt_1 (never executed)
+               Output: tprt_1.col1
+               Index Cond: (tprt_1.col1 = tbl1.col1)
+         ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=1 loops=2)
+               Loop min_rows: 1  max_rows: 1  total_rows: 2
+               Output: tprt_2.col1
+               Index Cond: (tprt_2.col1 = tbl1.col1)
+         ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=0 loops=3)
+               Loop min_rows: 0  max_rows: 1  total_rows: 1
+               Output: tprt_3.col1
+               Index Cond: (tprt_3.col1 = tbl1.col1)
+         ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
+               Output: tprt_4.col1
+               Index Cond: (tprt_4.col1 = tbl1.col1)
+         ->  Index Scan using tprt5_idx on public.tprt_5 (never executed)
+               Output: tprt_5.col1
+               Index Cond: (tprt_5.col1 = tbl1.col1)
+         ->  Index Scan using tprt6_idx on public.tprt_6 (never executed)
+               Output: tprt_6.col1
+               Index Cond: (tprt_6.col1 = tbl1.col1)
+(26 rows)
+
 -- Last partition
 delete from tbl1;
 insert into tbl1 values (4400);
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index 247264f93b7..c1268e23f27 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -467,6 +467,10 @@ begin
         ln := regexp_replace(ln, 'Hits: \d+', 'Hits: N');
         ln := regexp_replace(ln, 'Misses: \d+', 'Misses: N');
         ln := regexp_replace(ln, 'Memory Usage: \d+', 'Memory Usage: N');
+        ln := regexp_replace(ln, 'Loop min_time: \d+  max_time: \d+  min_rows: \d+  max_rows: \d+  total_rows: \d+',
+                                 'Loop min_time: N  max_time: N  min_rows: N  max_rows: N  total_rows: N');
+        ln := regexp_replace(ln, 'Loop min_rows: \d+  max_rows: \d+  total_rows: \d+',
+                                 'Loop min_rows: N  max_rows: N  total_rows: N');
         return next ln;
     end loop;
 end;
@@ -657,6 +661,13 @@ select tbl1.col1, tprt.col1 from tbl1
 inner join tprt on tbl1.col1 = tprt.col1
 order by tbl1.col1, tprt.col1;
 
+-- Tests for extra statistics
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
+
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
+
 -- Last partition
 delete from tbl1;
 insert into tbl1 values (4400);
#24vignesh C
vignesh21@gmail.com
In reply to: Noname (#23)
Re: [PATCH] Add extra statistics to explain for Nested Loop

On Wed, Apr 14, 2021 at 4:57 PM <e.sokolova@postgrespro.ru> wrote:

Thank you for working on this issue. Your comments helped me make this
patch more correct.

Lines with "colon" format shouldn't use equal signs, and should use two
spaces
between fields.

Done. Now extra line looks like "Loop min_rows: %.0f max_rows: %.0f
total_rows: %.0f" or "Loop min_time: %.3f max_time: %.3f min_rows:
%.0f max_rows: %.0f total_rows: %.0f".

Since this is now on a separate line, the "if (nloops > 1 &&
es->verbose)"
can be after the existing "if (es->timing)", and shouldn't need its own
"if (es->timing)". It should conditionally add a separate line, rather
than
duplicating the "(actual.*" line.

-                    if (es->timing)
+                    if (nloops > 1 && es->verbose)

New version of patch contains this correction. It helped make the patch
shorter.

In non-text mode, think you should not check "nloops > 1". Rather,
print the
field as 0.

The fields will not be zeros. New line will almost repeat the line with
main sttistics.

I think the labels in non-text format should say "Loop Min Time" or
similar.

And these variables should have a loop_ prefix like loop_min_t ?

There are good ideas. I changed it.

I apply new version of this patch. I hope it got better.
Please don't hesitate to share any thoughts on this topic.

The patch does not apply on Head, I'm changing the status to "Waiting
for Author":
1 out of 2 hunks FAILED -- saving rejects to file
src/test/regress/expected/partition_prune.out.rej
patching file src/test/regress/sql/partition_prune.sql
Hunk #1 FAILED at 467.
Hunk #2 succeeded at 654 (offset -3 lines).
1 out of 2 hunks FAILED -- saving rejects to file
src/test/regress/sql/partition_prune.sql.rej

Please post a new patch rebased on head.

Regards,
Vignesh

#25Ekaterina Sokolova
e.sokolova@postgrespro.ru
In reply to: vignesh C (#24)
1 attachment(s)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Hi, hackers.

Here is the new version of patch that add printing of min, max and total
statistics for time and rows across all loops to EXPLAIN ANALYSE.

1) Please add VERBOSE to display extra statistics.
2) Format of extra statistics is:

a) FORMAT TEXT

Loop min_time: N max_time: N min_rows: N max_rows: N total_rows: N
Output: ...

b) FORMAT JSON

...
"Actual Total Time": N,
"Loop Min Time": N,
"Loop Max Time": N,
"Actual Rows": N,
"Loop Min Rows": N,
"Loop Max Rows": N,
"Loop Total Rows": N,
"Actual Loops": N,
...

I hope you find this patch useful.
Please don't hesitate to share any thoughts on this topic!
--
Ekaterina Sokolova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Attachments:

extra_statistics_v5.patchtext/x-diff; name=extra_statistics_v5.patchDownload
Author: Ekaterina Sokolova <e.sokolova@postgrespro.ru>

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 10644dfac44..9c71819deb2 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1611,6 +1611,11 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
 		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
 		double		rows = planstate->instrument->ntuples / nloops;
+		double		loop_total_rows = planstate->instrument->ntuples;
+		double		loop_min_r = planstate->instrument->min_tuples;
+		double		loop_max_r = planstate->instrument->max_tuples;
+		double		loop_min_t_ms = 1000.0 * planstate->instrument->min_t;
+		double		loop_max_t_ms = 1000.0 * planstate->instrument->max_t;
 
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
@@ -1622,6 +1627,19 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				appendStringInfo(es->str,
 								 " (actual rows=%.0f loops=%.0f)",
 								 rows, nloops);
+			if (nloops > 1 && es->verbose)
+			{
+				appendStringInfo(es->str, "\n");
+				ExplainIndentText(es);
+				if (es->timing)
+					appendStringInfo(es->str,
+									 "Loop min_time: %.3f  max_time: %.3f  min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+									 loop_min_t_ms, loop_max_t_ms, loop_min_r, loop_max_r, loop_total_rows);
+				else
+					appendStringInfo(es->str,
+									 "Loop min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+									 loop_min_r, loop_max_r, loop_total_rows);
+			}
 		}
 		else
 		{
@@ -1631,8 +1649,21 @@ ExplainNode(PlanState *planstate, List *ancestors,
 									 3, es);
 				ExplainPropertyFloat("Actual Total Time", "ms", total_ms,
 									 3, es);
+				if (nloops > 1 && es->verbose)
+				{
+					ExplainPropertyFloat("Loop Min Time", "s", loop_min_t_ms,
+										 3, es);
+					ExplainPropertyFloat("Loop Max Time", "s", loop_max_t_ms,
+										 3, es);
+				}
 			}
 			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+			if (nloops > 1 && es->verbose)
+			{
+				ExplainPropertyFloat("Loop Min Rows", NULL, loop_min_r, 0, es);
+				ExplainPropertyFloat("Loop Max Rows", NULL, loop_max_r, 0, es);
+				ExplainPropertyFloat("Loop Total Rows", NULL, loop_total_rows, 0, es);
+			}
 			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 		}
 	}
@@ -1642,6 +1673,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			appendStringInfoString(es->str, " (never executed)");
 		else
 		{
+			/* without min and max values because actual result is 0 */
 			if (es->timing)
 			{
 				ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
@@ -1668,12 +1700,22 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			double		startup_ms;
 			double		total_ms;
 			double		rows;
+			double		loop_min_t_ms;
+			double		loop_max_t_ms;
+			double		loop_min_r;
+			double		loop_max_r;
+			double		loop_total_rows;
 
 			if (nloops <= 0)
 				continue;
 			startup_ms = 1000.0 * instrument->startup / nloops;
 			total_ms = 1000.0 * instrument->total / nloops;
 			rows = instrument->ntuples / nloops;
+			loop_min_t_ms = 1000.0 * instrument->min_t;
+			loop_max_t_ms = 1000.0 * instrument->max_t;
+			loop_min_r = instrument->min_tuples;
+			loop_max_r = instrument->max_tuples;
+			loop_total_rows = instrument->ntuples;
 
 			ExplainOpenWorker(n, es);
 
@@ -1688,6 +1730,19 @@ ExplainNode(PlanState *planstate, List *ancestors,
 					appendStringInfo(es->str,
 									 "actual rows=%.0f loops=%.0f\n",
 									 rows, nloops);
+				if (nloops > 1)
+				{
+					appendStringInfo(es->str, "\n");
+					ExplainIndentText(es);
+					if (es->timing)
+						appendStringInfo(es->str,
+										 "Loop min_time: %.3f  max_time: %.3f  min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+										 loop_min_t_ms, loop_max_t_ms, loop_min_r, loop_max_r, loop_total_rows);
+					else
+						appendStringInfo(es->str,
+										 "Loop min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+										 loop_min_r, loop_max_r, loop_total_rows);
+				}
 			}
 			else
 			{
@@ -1697,8 +1752,21 @@ ExplainNode(PlanState *planstate, List *ancestors,
 										 startup_ms, 3, es);
 					ExplainPropertyFloat("Actual Total Time", "ms",
 										 total_ms, 3, es);
+					if (nloops > 1)
+					{
+						ExplainPropertyFloat("Loop Min Time", "ms",
+											 loop_min_t_ms, 3, es);
+						ExplainPropertyFloat("Loop Max Time", "ms",
+											 loop_max_t_ms, 3, es);
+					}
 				}
 				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+				if (nloops > 1)
+				{
+					ExplainPropertyFloat("Loop Min Rows", NULL, loop_min_r, 0, es);
+					ExplainPropertyFloat("Loop Max Rows", NULL, loop_max_r, 0, es);
+					ExplainPropertyFloat("Loop Total Rows", NULL, loop_total_rows, 0, es);
+				}
 				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 			}
 
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index 2b106d8473c..6588ce2db6a 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -153,7 +153,34 @@ InstrEndLoop(Instrumentation *instr)
 
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
+
+	/*
+	 * this is first loop
+	 *
+	 * We only initialize the min values. We don't need to bother with the
+	 * max, because those are 0 and the non-zero values will get updated a
+	 * couple lines later.
+	 */
+	if (instr->nloops == 0)
+	{
+		instr->min_t = totaltime;
+		instr->min_tuples = instr->tuplecount;
+	}
+
+	if (instr->min_t > totaltime)
+		instr->min_t = totaltime;
+
+	if (instr->max_t < totaltime)
+		instr->max_t = totaltime;
+
 	instr->ntuples += instr->tuplecount;
+
+	if (instr->min_tuples > instr->tuplecount)
+		instr->min_tuples = instr->tuplecount;
+
+	if (instr->max_tuples < instr->tuplecount)
+		instr->max_tuples = instr->tuplecount;
+
 	instr->nloops += 1;
 
 	/* Reset for next cycle (if any) */
@@ -186,6 +213,10 @@ InstrAggNode(Instrumentation *dst, Instrumentation *add)
 	dst->nloops += add->nloops;
 	dst->nfiltered1 += add->nfiltered1;
 	dst->nfiltered2 += add->nfiltered2;
+	dst->min_t = Min(dst->min_t, add->min_t);
+	dst->max_t = Max(dst->max_t, add->max_t);
+	dst->min_tuples = Min(dst->min_tuples, add->min_tuples);
+	dst->max_tuples = Max(dst->max_tuples, add->max_tuples);
 
 	/* Add delta of buffer usage since entry to node's totals */
 	if (dst->need_bufusage)
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index 2f9905b7c8e..e2ff330b8f1 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -79,7 +79,13 @@ typedef struct Instrumentation
 	/* Accumulated statistics across all completed cycles: */
 	double		startup;		/* total startup time (in seconds) */
 	double		total;			/* total time (in seconds) */
+	double		min_t;			/* time of fastest loop (in seconds) */
+	double		max_t;			/* time of slowest loop (in seconds) */
 	double		ntuples;		/* total tuples produced */
+	double		min_tuples;		/* min counter of produced tuples for all
+								 * loops */
+	double		max_tuples;		/* max counter of produced tuples for all
+								 * loops */
 	double		ntuples2;		/* secondary node-specific tuple counter */
 	double		nloops;			/* # of run cycles for this node */
 	double		nfiltered1;		/* # of tuples removed by scanqual or joinqual */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index 1734dfee8cc..275cdb37672 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -354,8 +354,13 @@ select jsonb_pretty(
                              "Actual Loops": 0,             +
                              "Startup Cost": 0.0,           +
                              "Async Capable": false,        +
+                             "Loop Max Rows": 0,            +
+                             "Loop Max Time": 0.0,          +
+                             "Loop Min Rows": 0,            +
+                             "Loop Min Time": 0.0,          +
                              "Relation Name": "tenk1",      +
                              "Parallel Aware": true,        +
+                             "Loop Total Rows": 0,          +
                              "Local Hit Blocks": 0,         +
                              "Temp Read Blocks": 0,         +
                              "Actual Total Time": 0.0,      +
@@ -400,7 +405,12 @@ select jsonb_pretty(
                      "Actual Loops": 0,                     +
                      "Startup Cost": 0.0,                   +
                      "Async Capable": false,                +
+                     "Loop Max Rows": 0,                    +
+                     "Loop Max Time": 0.0,                  +
+                     "Loop Min Rows": 0,                    +
+                     "Loop Min Time": 0.0,                  +
                      "Parallel Aware": false,               +
+                     "Loop Total Rows": 0,                  +
                      "Sort Space Used": 0,                  +
                      "Local Hit Blocks": 0,                 +
                      "Temp Read Blocks": 0,                 +
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 7555764c779..0e1242c089a 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -2715,6 +2715,72 @@ order by tbl1.col1, tprt.col1;
  1001 | 1001
 (3 rows)
 
+-- Tests for extra statistics
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop (actual rows=23 loops=1)
+   Output: tbl1.col1, tprt.col1
+   ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
+         Output: tbl1.col1
+   ->  Append (actual rows=5 loops=5)
+         Loop min_rows: 2  max_rows: 6  total_rows: 23
+         ->  Index Scan using tprt1_idx on public.tprt_1 (actual rows=2 loops=5)
+               Loop min_rows: 2  max_rows: 2  total_rows: 10
+               Output: tprt_1.col1
+               Index Cond: (tprt_1.col1 < tbl1.col1)
+         ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=3 loops=4)
+               Loop min_rows: 2  max_rows: 3  total_rows: 11
+               Output: tprt_2.col1
+               Index Cond: (tprt_2.col1 < tbl1.col1)
+         ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=1 loops=2)
+               Loop min_rows: 1  max_rows: 1  total_rows: 2
+               Output: tprt_3.col1
+               Index Cond: (tprt_3.col1 < tbl1.col1)
+         ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
+               Output: tprt_4.col1
+               Index Cond: (tprt_4.col1 < tbl1.col1)
+         ->  Index Scan using tprt5_idx on public.tprt_5 (never executed)
+               Output: tprt_5.col1
+               Index Cond: (tprt_5.col1 < tbl1.col1)
+         ->  Index Scan using tprt6_idx on public.tprt_6 (never executed)
+               Output: tprt_6.col1
+               Index Cond: (tprt_6.col1 < tbl1.col1)
+(27 rows)
+
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop (actual rows=3 loops=1)
+   Output: tbl1.col1, tprt.col1
+   ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
+         Output: tbl1.col1
+   ->  Append (actual rows=1 loops=5)
+         Loop min_rows: 0  max_rows: 1  total_rows: 3
+         ->  Index Scan using tprt1_idx on public.tprt_1 (never executed)
+               Output: tprt_1.col1
+               Index Cond: (tprt_1.col1 = tbl1.col1)
+         ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=1 loops=2)
+               Loop min_rows: 1  max_rows: 1  total_rows: 2
+               Output: tprt_2.col1
+               Index Cond: (tprt_2.col1 = tbl1.col1)
+         ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=0 loops=3)
+               Loop min_rows: 0  max_rows: 1  total_rows: 1
+               Output: tprt_3.col1
+               Index Cond: (tprt_3.col1 = tbl1.col1)
+         ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
+               Output: tprt_4.col1
+               Index Cond: (tprt_4.col1 = tbl1.col1)
+         ->  Index Scan using tprt5_idx on public.tprt_5 (never executed)
+               Output: tprt_5.col1
+               Index Cond: (tprt_5.col1 = tbl1.col1)
+         ->  Index Scan using tprt6_idx on public.tprt_6 (never executed)
+               Output: tprt_6.col1
+               Index Cond: (tprt_6.col1 = tbl1.col1)
+(26 rows)
+
 -- Last partition
 delete from tbl1;
 insert into tbl1 values (4400);
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index d70bd8610cb..d5ba8d6f386 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -654,6 +654,13 @@ select tbl1.col1, tprt.col1 from tbl1
 inner join tprt on tbl1.col1 = tprt.col1
 order by tbl1.col1, tprt.col1;
 
+-- Tests for extra statistics
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
+
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
+
 -- Last partition
 delete from tbl1;
 insert into tbl1 values (4400);
#26Justin Pryzby
pryzby@telsasoft.com
In reply to: Noname (#23)
3 attachment(s)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Hi, and sorry to take such a long break from this patch.

On Wed, Apr 14, 2021 at 02:27:36PM +0300, e.sokolova@postgrespro.ru wrote:

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index b62a76e7e5a..bf8c37baefd 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1615,6 +1615,11 @@ ExplainNode(PlanState *planstate, List *ancestors,
double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
double		total_ms = 1000.0 * planstate->instrument->total / nloops;
double		rows = planstate->instrument->ntuples / nloops;
+		double		loop_total_rows = planstate->instrument->ntuples;
+		double		loop_min_r = planstate->instrument->min_tuples;
+		double		loop_max_r = planstate->instrument->max_tuples;
+		double		loop_min_t_ms = 1000.0 * planstate->instrument->min_t;
+		double		loop_max_t_ms = 1000.0 * planstate->instrument->max_t;
if (es->format == EXPLAIN_FORMAT_TEXT)
{
@@ -1626,6 +1631,19 @@ ExplainNode(PlanState *planstate, List *ancestors,
appendStringInfo(es->str,
" (actual rows=%.0f loops=%.0f)",
rows, nloops);
+			if (nloops > 1 && es->verbose)
+			{
+                appendStringInfo(es->str, "\n");
+				ExplainIndentText(es);
+				if (es->timing)
+					appendStringInfo(es->str,
+									 "Loop min_time: %.3f  max_time: %.3f  min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+									 loop_min_t_ms, loop_max_t_ms, loop_min_r, loop_max_r, loop_total_rows);

Now that I see it, I think it should say it with spaces, and not underscores,
like
| Loop Min Time: %.3f Max Time: %.3f ...

"Memory Usage:" already has spaces in its fields names, so this is more
consistent, and isn't doing anything new.

I think the min/max/total should be first, and the timing should follow, if
enabled. The "if(timing)" doesn't even need to duplicate the output, it could
append just the timing part.

I refactored this all into a separate function. I don't see why we'd repeat
these.

+               double          loop_total_rows = planstate->instrument->ntuples;
+               double          loop_min_r = planstate->instrument->min_tuples;
+               double          loop_max_r = planstate->instrument->max_tuples;
+               double          loop_min_t_ms = 1000.0 * planstate->instrument->min_t;
+               double          loop_max_t_ms = 1000.0 * planstate->instrument->max_t;

I realize the duplication doesn't originate with your patch. But because of
the duplication, there can be inconsistencies; for example, you wrote "ms" in
one place and "s" in another. Maybe you copied from before
f90c708a048667befbf6bbe5f48ae9695cb89de4 (an issue I reported the first time I
was looking at this patch).

I think the non-text format timing stuff needs to be within "if (timing)".

I'm curious to hear what you and others think of the refactoring.

It'd be nice if there's a good way to add a test case for verbose output
involving parallel workers, but the output is unstable ...

--
Justin

Attachments:

0001-explain.c-refactor-ExplainNode.patchtext/x-diff; charset=us-asciiDownload
From e9dc405c55507b6c93a0e66434c5757254e95d54 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Thu, 15 Apr 2021 11:55:09 -0500
Subject: [PATCH 1/3] explain.c: refactor ExplainNode()

---
 src/backend/commands/explain.c | 110 ++++++++++++++-------------------
 1 file changed, 47 insertions(+), 63 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 891ad0e717..db99739cc5 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -118,6 +118,8 @@ static void show_instrumentation_count(const char *qlabel, int which,
 									   PlanState *planstate, ExplainState *es);
 static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
 static void show_eval_params(Bitmapset *bms_params, ExplainState *es);
+static void show_loop_info(Instrumentation *instrument, bool isworker,
+		ExplainState *es);
 static const char *explain_get_index_name(Oid indexId);
 static void show_buffer_usage(ExplainState *es, const BufferUsage *usage,
 							  bool planning);
@@ -1609,36 +1611,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 
 	if (es->analyze &&
 		planstate->instrument && planstate->instrument->nloops > 0)
-	{
-		double		nloops = planstate->instrument->nloops;
-		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
-		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
-		double		rows = planstate->instrument->ntuples / nloops;
-
-		if (es->format == EXPLAIN_FORMAT_TEXT)
-		{
-			if (es->timing)
-				appendStringInfo(es->str,
-								 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
-								 startup_ms, total_ms, rows, nloops);
-			else
-				appendStringInfo(es->str,
-								 " (actual rows=%.0f loops=%.0f)",
-								 rows, nloops);
-		}
-		else
-		{
-			if (es->timing)
-			{
-				ExplainPropertyFloat("Actual Startup Time", "ms", startup_ms,
-									 3, es);
-				ExplainPropertyFloat("Actual Total Time", "ms", total_ms,
-									 3, es);
-			}
-			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
-		}
-	}
+		show_loop_info(planstate->instrument, false, es);
 	else if (es->analyze)
 	{
 		if (es->format == EXPLAIN_FORMAT_TEXT)
@@ -1667,44 +1640,14 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		for (int n = 0; n < w->num_workers; n++)
 		{
 			Instrumentation *instrument = &w->instrument[n];
-			double		nloops = instrument->nloops;
-			double		startup_ms;
-			double		total_ms;
-			double		rows;
 
-			if (nloops <= 0)
+			if (instrument->nloops <= 0)
 				continue;
-			startup_ms = 1000.0 * instrument->startup / nloops;
-			total_ms = 1000.0 * instrument->total / nloops;
-			rows = instrument->ntuples / nloops;
 
 			ExplainOpenWorker(n, es);
-
+			show_loop_info(instrument, true, es);
 			if (es->format == EXPLAIN_FORMAT_TEXT)
-			{
-				ExplainIndentText(es);
-				if (es->timing)
-					appendStringInfo(es->str,
-									 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
-									 startup_ms, total_ms, rows, nloops);
-				else
-					appendStringInfo(es->str,
-									 "actual rows=%.0f loops=%.0f\n",
-									 rows, nloops);
-			}
-			else
-			{
-				if (es->timing)
-				{
-					ExplainPropertyFloat("Actual Startup Time", "ms",
-										 startup_ms, 3, es);
-					ExplainPropertyFloat("Actual Total Time", "ms",
-										 total_ms, 3, es);
-				}
-				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
-			}
-
+				appendStringInfoChar(es->str, '\n');
 			ExplainCloseWorker(n, es);
 		}
 	}
@@ -4030,6 +3973,47 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
 		ExplainCloseGroup("Target Tables", "Target Tables", false, es);
 }
 
+void
+show_loop_info(Instrumentation *instrument, bool isworker, ExplainState *es)
+{
+	double		nloops = instrument->nloops;
+	double		startup_ms = 1000.0 * instrument->startup / nloops;
+	double		total_ms = 1000.0 * instrument->total / nloops;
+	double		rows = instrument->ntuples / nloops;
+
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		if (isworker)
+			ExplainIndentText(es);
+		else
+			appendStringInfo(es->str, " (");
+
+		if (es->timing)
+			appendStringInfo(es->str,
+							 "actual time=%.3f..%.3f rows=%.0f loops=%.0f",
+							 startup_ms, total_ms, rows, nloops);
+		else
+			appendStringInfo(es->str,
+							 "actual rows=%.0f loops=%.0f",
+							 rows, nloops);
+
+		if (!isworker)
+			appendStringInfoChar(es->str, ')');
+	}
+	else
+	{
+		if (es->timing)
+		{
+			ExplainPropertyFloat("Actual Startup Time", "ms", startup_ms,
+								 3, es);
+			ExplainPropertyFloat("Actual Total Time", "ms", total_ms,
+								 3, es);
+		}
+		ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+		ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+	}
+}
+
 /*
  * Explain the constituent plans of an Append, MergeAppend,
  * BitmapAnd, or BitmapOr node.
-- 
2.17.0

0002-Re-PATCH-Add-extra-statistics-to-explain-for-Nested-.patchtext/x-diff; charset=us-asciiDownload
From bd6f2fc1073a3e5d9c982ea681094856f67bf30a Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Thu, 30 Sep 2021 01:22:02 -0500
Subject: [PATCH 2/3] Re: [PATCH] Add extra statistics to explain for Nested
 Loop

Aug 17 Ekaterina Sokol
---
 src/backend/commands/explain.c                | 37 +++++++++++
 src/backend/executor/instrument.c             | 31 +++++++++
 src/include/executor/instrument.h             |  6 ++
 src/test/regress/expected/explain.out         | 10 +++
 src/test/regress/expected/partition_prune.out | 66 +++++++++++++++++++
 src/test/regress/sql/partition_prune.sql      |  7 ++
 6 files changed, 157 insertions(+)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index db99739cc5..428a073099 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1618,6 +1618,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			appendStringInfoString(es->str, " (never executed)");
 		else
 		{
+			/* without min and max values because actual result is 0 */
 			if (es->timing)
 			{
 				ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
@@ -3980,6 +3981,11 @@ show_loop_info(Instrumentation *instrument, bool isworker, ExplainState *es)
 	double		startup_ms = 1000.0 * instrument->startup / nloops;
 	double		total_ms = 1000.0 * instrument->total / nloops;
 	double		rows = instrument->ntuples / nloops;
+	double		loop_total_rows = instrument->ntuples;
+	double		loop_min_r = instrument->min_tuples;
+	double		loop_max_r = instrument->max_tuples;
+	double		loop_min_t_ms = 1000.0 * instrument->min_t;
+	double		loop_max_t_ms = 1000.0 * instrument->max_t;
 
 	if (es->format == EXPLAIN_FORMAT_TEXT)
 	{
@@ -3999,6 +4005,20 @@ show_loop_info(Instrumentation *instrument, bool isworker, ExplainState *es)
 
 		if (!isworker)
 			appendStringInfoChar(es->str, ')');
+
+		if (nloops > 1 && es->verbose)
+		{
+			appendStringInfo(es->str, "\n");
+			ExplainIndentText(es);
+			if (es->timing)
+				appendStringInfo(es->str,
+								 "Loop min_time: %.3f  max_time: %.3f  min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+								 loop_min_t_ms, loop_max_t_ms, loop_min_r, loop_max_r, loop_total_rows);
+			else
+				appendStringInfo(es->str,
+								 "Loop min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+								 loop_min_r, loop_max_r, loop_total_rows);
+		}
 	}
 	else
 	{
@@ -4009,7 +4029,24 @@ show_loop_info(Instrumentation *instrument, bool isworker, ExplainState *es)
 			ExplainPropertyFloat("Actual Total Time", "ms", total_ms,
 								 3, es);
 		}
+
+		if (nloops > 1 && es->verbose)
+		{
+			ExplainPropertyFloat("Loop Min Time", "s", loop_min_t_ms,
+								 3, es);
+			ExplainPropertyFloat("Loop Max Time", "s", loop_max_t_ms,
+								 3, es);
+		}
+
 		ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+
+		if (nloops > 1 && es->verbose)
+		{
+			ExplainPropertyFloat("Loop Min Rows", NULL, loop_min_r, 0, es);
+			ExplainPropertyFloat("Loop Max Rows", NULL, loop_max_r, 0, es);
+			ExplainPropertyFloat("Loop Total Rows", NULL, loop_total_rows, 0, es);
+		}
+
 		ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 	}
 }
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index 2b106d8473..6588ce2db6 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -153,7 +153,34 @@ InstrEndLoop(Instrumentation *instr)
 
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
+
+	/*
+	 * this is first loop
+	 *
+	 * We only initialize the min values. We don't need to bother with the
+	 * max, because those are 0 and the non-zero values will get updated a
+	 * couple lines later.
+	 */
+	if (instr->nloops == 0)
+	{
+		instr->min_t = totaltime;
+		instr->min_tuples = instr->tuplecount;
+	}
+
+	if (instr->min_t > totaltime)
+		instr->min_t = totaltime;
+
+	if (instr->max_t < totaltime)
+		instr->max_t = totaltime;
+
 	instr->ntuples += instr->tuplecount;
+
+	if (instr->min_tuples > instr->tuplecount)
+		instr->min_tuples = instr->tuplecount;
+
+	if (instr->max_tuples < instr->tuplecount)
+		instr->max_tuples = instr->tuplecount;
+
 	instr->nloops += 1;
 
 	/* Reset for next cycle (if any) */
@@ -186,6 +213,10 @@ InstrAggNode(Instrumentation *dst, Instrumentation *add)
 	dst->nloops += add->nloops;
 	dst->nfiltered1 += add->nfiltered1;
 	dst->nfiltered2 += add->nfiltered2;
+	dst->min_t = Min(dst->min_t, add->min_t);
+	dst->max_t = Max(dst->max_t, add->max_t);
+	dst->min_tuples = Min(dst->min_tuples, add->min_tuples);
+	dst->max_tuples = Max(dst->max_tuples, add->max_tuples);
 
 	/* Add delta of buffer usage since entry to node's totals */
 	if (dst->need_bufusage)
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index 1f3aa733ec..d0af28b0b8 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -79,7 +79,13 @@ typedef struct Instrumentation
 	/* Accumulated statistics across all completed cycles: */
 	double		startup;		/* total startup time (in seconds) */
 	double		total;			/* total time (in seconds) */
+	double		min_t;			/* time of fastest loop (in seconds) */
+	double		max_t;			/* time of slowest loop (in seconds) */
 	double		ntuples;		/* total tuples produced */
+	double		min_tuples;		/* min counter of produced tuples for all
+								 * loops */
+	double		max_tuples;		/* max counter of produced tuples for all
+								 * loops */
 	double		ntuples2;		/* secondary node-specific tuple counter */
 	double		nloops;			/* # of run cycles for this node */
 	double		nfiltered1;		/* # tuples removed by scanqual or joinqual OR
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index 1734dfee8c..275cdb3767 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -354,8 +354,13 @@ select jsonb_pretty(
                              "Actual Loops": 0,             +
                              "Startup Cost": 0.0,           +
                              "Async Capable": false,        +
+                             "Loop Max Rows": 0,            +
+                             "Loop Max Time": 0.0,          +
+                             "Loop Min Rows": 0,            +
+                             "Loop Min Time": 0.0,          +
                              "Relation Name": "tenk1",      +
                              "Parallel Aware": true,        +
+                             "Loop Total Rows": 0,          +
                              "Local Hit Blocks": 0,         +
                              "Temp Read Blocks": 0,         +
                              "Actual Total Time": 0.0,      +
@@ -400,7 +405,12 @@ select jsonb_pretty(
                      "Actual Loops": 0,                     +
                      "Startup Cost": 0.0,                   +
                      "Async Capable": false,                +
+                     "Loop Max Rows": 0,                    +
+                     "Loop Max Time": 0.0,                  +
+                     "Loop Min Rows": 0,                    +
+                     "Loop Min Time": 0.0,                  +
                      "Parallel Aware": false,               +
+                     "Loop Total Rows": 0,                  +
                      "Sort Space Used": 0,                  +
                      "Local Hit Blocks": 0,                 +
                      "Temp Read Blocks": 0,                 +
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 7555764c77..0e1242c089 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -2715,6 +2715,72 @@ order by tbl1.col1, tprt.col1;
  1001 | 1001
 (3 rows)
 
+-- Tests for extra statistics
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop (actual rows=23 loops=1)
+   Output: tbl1.col1, tprt.col1
+   ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
+         Output: tbl1.col1
+   ->  Append (actual rows=5 loops=5)
+         Loop min_rows: 2  max_rows: 6  total_rows: 23
+         ->  Index Scan using tprt1_idx on public.tprt_1 (actual rows=2 loops=5)
+               Loop min_rows: 2  max_rows: 2  total_rows: 10
+               Output: tprt_1.col1
+               Index Cond: (tprt_1.col1 < tbl1.col1)
+         ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=3 loops=4)
+               Loop min_rows: 2  max_rows: 3  total_rows: 11
+               Output: tprt_2.col1
+               Index Cond: (tprt_2.col1 < tbl1.col1)
+         ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=1 loops=2)
+               Loop min_rows: 1  max_rows: 1  total_rows: 2
+               Output: tprt_3.col1
+               Index Cond: (tprt_3.col1 < tbl1.col1)
+         ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
+               Output: tprt_4.col1
+               Index Cond: (tprt_4.col1 < tbl1.col1)
+         ->  Index Scan using tprt5_idx on public.tprt_5 (never executed)
+               Output: tprt_5.col1
+               Index Cond: (tprt_5.col1 < tbl1.col1)
+         ->  Index Scan using tprt6_idx on public.tprt_6 (never executed)
+               Output: tprt_6.col1
+               Index Cond: (tprt_6.col1 < tbl1.col1)
+(27 rows)
+
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop (actual rows=3 loops=1)
+   Output: tbl1.col1, tprt.col1
+   ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
+         Output: tbl1.col1
+   ->  Append (actual rows=1 loops=5)
+         Loop min_rows: 0  max_rows: 1  total_rows: 3
+         ->  Index Scan using tprt1_idx on public.tprt_1 (never executed)
+               Output: tprt_1.col1
+               Index Cond: (tprt_1.col1 = tbl1.col1)
+         ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=1 loops=2)
+               Loop min_rows: 1  max_rows: 1  total_rows: 2
+               Output: tprt_2.col1
+               Index Cond: (tprt_2.col1 = tbl1.col1)
+         ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=0 loops=3)
+               Loop min_rows: 0  max_rows: 1  total_rows: 1
+               Output: tprt_3.col1
+               Index Cond: (tprt_3.col1 = tbl1.col1)
+         ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
+               Output: tprt_4.col1
+               Index Cond: (tprt_4.col1 = tbl1.col1)
+         ->  Index Scan using tprt5_idx on public.tprt_5 (never executed)
+               Output: tprt_5.col1
+               Index Cond: (tprt_5.col1 = tbl1.col1)
+         ->  Index Scan using tprt6_idx on public.tprt_6 (never executed)
+               Output: tprt_6.col1
+               Index Cond: (tprt_6.col1 = tbl1.col1)
+(26 rows)
+
 -- Last partition
 delete from tbl1;
 insert into tbl1 values (4400);
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index d70bd8610c..d5ba8d6f38 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -654,6 +654,13 @@ select tbl1.col1, tprt.col1 from tbl1
 inner join tprt on tbl1.col1 = tprt.col1
 order by tbl1.col1, tprt.col1;
 
+-- Tests for extra statistics
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
+
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
+
 -- Last partition
 delete from tbl1;
 insert into tbl1 values (4400);
-- 
2.17.0

0003-Justin-s-changes.patchtext/x-diff; charset=us-asciiDownload
From b7c5c53a210ecf1895e70d206f1d67a9e4f13163 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Thu, 30 Sep 2021 01:58:11 -0500
Subject: [PATCH 3/3] Justin's changes

---
 src/backend/commands/explain.c                | 24 +++++++++----------
 src/test/regress/expected/partition_prune.out | 14 +++++------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 428a073099..756051864c 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -4008,15 +4008,16 @@ show_loop_info(Instrumentation *instrument, bool isworker, ExplainState *es)
 
 		if (nloops > 1 && es->verbose)
 		{
-			appendStringInfo(es->str, "\n");
+			appendStringInfoChar(es->str, '\n');
 			ExplainIndentText(es);
+
 			if (es->timing)
 				appendStringInfo(es->str,
-								 "Loop min_time: %.3f  max_time: %.3f  min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
-								 loop_min_t_ms, loop_max_t_ms, loop_min_r, loop_max_r, loop_total_rows);
+								 "Loop Min Rows: %.0f  Max Rows: %.0f  Total Rows: %.0f  Min Time: %.3f  Max Time: %.3f",
+								 loop_min_r, loop_max_r, loop_total_rows, loop_min_t_ms, loop_max_t_ms);
 			else
 				appendStringInfo(es->str,
-								 "Loop min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+								 "Loop Min Rows: %.0f  Max Rows: %.0f  Total Rows: %.0f",
 								 loop_min_r, loop_max_r, loop_total_rows);
 		}
 	}
@@ -4028,14 +4029,13 @@ show_loop_info(Instrumentation *instrument, bool isworker, ExplainState *es)
 								 3, es);
 			ExplainPropertyFloat("Actual Total Time", "ms", total_ms,
 								 3, es);
-		}
-
-		if (nloops > 1 && es->verbose)
-		{
-			ExplainPropertyFloat("Loop Min Time", "s", loop_min_t_ms,
-								 3, es);
-			ExplainPropertyFloat("Loop Max Time", "s", loop_max_t_ms,
-								 3, es);
+			if (nloops > 1 && es->verbose)
+			{
+				ExplainPropertyFloat("Loop Min Time", "ms", loop_min_t_ms,
+									 3, es);
+				ExplainPropertyFloat("Loop Max Time", "ms", loop_max_t_ms,
+									 3, es);
+			}
 		}
 
 		ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 0e1242c089..d9d77090dc 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -2725,17 +2725,17 @@ select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
    ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
          Output: tbl1.col1
    ->  Append (actual rows=5 loops=5)
-         Loop min_rows: 2  max_rows: 6  total_rows: 23
+         Loop Min Rows: 2  Max Rows: 6  Total Rows: 23
          ->  Index Scan using tprt1_idx on public.tprt_1 (actual rows=2 loops=5)
-               Loop min_rows: 2  max_rows: 2  total_rows: 10
+               Loop Min Rows: 2  Max Rows: 2  Total Rows: 10
                Output: tprt_1.col1
                Index Cond: (tprt_1.col1 < tbl1.col1)
          ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=3 loops=4)
-               Loop min_rows: 2  max_rows: 3  total_rows: 11
+               Loop Min Rows: 2  Max Rows: 3  Total Rows: 11
                Output: tprt_2.col1
                Index Cond: (tprt_2.col1 < tbl1.col1)
          ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=1 loops=2)
-               Loop min_rows: 1  max_rows: 1  total_rows: 2
+               Loop Min Rows: 1  Max Rows: 1  Total Rows: 2
                Output: tprt_3.col1
                Index Cond: (tprt_3.col1 < tbl1.col1)
          ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
@@ -2758,16 +2758,16 @@ select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
    ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
          Output: tbl1.col1
    ->  Append (actual rows=1 loops=5)
-         Loop min_rows: 0  max_rows: 1  total_rows: 3
+         Loop Min Rows: 0  Max Rows: 1  Total Rows: 3
          ->  Index Scan using tprt1_idx on public.tprt_1 (never executed)
                Output: tprt_1.col1
                Index Cond: (tprt_1.col1 = tbl1.col1)
          ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=1 loops=2)
-               Loop min_rows: 1  max_rows: 1  total_rows: 2
+               Loop Min Rows: 1  Max Rows: 1  Total Rows: 2
                Output: tprt_2.col1
                Index Cond: (tprt_2.col1 = tbl1.col1)
          ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=0 loops=3)
-               Loop min_rows: 0  max_rows: 1  total_rows: 1
+               Loop Min Rows: 0  Max Rows: 1  Total Rows: 1
                Output: tprt_3.col1
                Index Cond: (tprt_3.col1 = tbl1.col1)
          ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
-- 
2.17.0

#27Lukas Fittl
lukas@fittl.com
In reply to: Justin Pryzby (#26)
Re: [PATCH] Add extra statistics to explain for Nested Loop

On Sun, Nov 21, 2021 at 8:55 PM Justin Pryzby <pryzby@telsasoft.com> wrote:

I'm curious to hear what you and others think of the refactoring.

It'd be nice if there's a good way to add a test case for verbose output
involving parallel workers, but the output is unstable ...

I've reviewed this patch, and it works as expected - the refactoring
changes by Justin also appear to make sense to me.

I've briefly thought whether this needs documentation (currently the patch
includes none), but there does not appear to be a good place to add
documentation about this from a quick glance, so it seems acceptable to
leave this out given the lack of more detailed EXPLAIN documentation in
general.

The one item that still feels a bit open to me is benchmarking, based on
Andres' comment a while ago:

On Mon, Oct 19, 2020 at 4:20 PM Andres Freund <andres@anarazel.de> wrote:

I'm a bit worried that further increasing the size of struct
Instrumentation will increase the overhead of EXPLAIN ANALYZE further -
in some workloads we spend a fair bit of time in code handling that. It
would be good to try to find a few bad cases, and see what the overhead is.

Whilst no specific bad cases were provided, I wonder if even a simple
pgbench with auto_explain (and log_analyze=1) would be a way to test this?

The overhead of the Instrumentation struct size should show regardless of
whether a plan actually includes a Nested Loop.

Thanks,
Lukas

--
Lukas Fittl

#28Ekaterina Sokolova
e.sokolova@postgrespro.ru
In reply to: Justin Pryzby (#26)
2 attachment(s)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Hi, hackers.

I apply the new version of patch.

Justin Pryzby <pryzby@telsasoft.com> wrote:

I'm curious to hear what you and others think of the refactoring.

Thank you so much. With your changes, the patch has become more
understandable and readable.

It'd be nice if there's a good way to add a test case for verbose
output
involving parallel workers, but the output is unstable ...

Done!

Lukas Fittl <lukas@fittl.com> wrote:

I've briefly thought whether this needs documentation (currently the
patch includes none),
but there does not appear to be a good place to add documentation about
this from a
quick glance, so it seems acceptable to leave this out given the lack
of more detailed
EXPLAIN documentation in general.

You're right! I added feature description to the patch header.

Whilst no specific bad cases were provided, I wonder if even a simple
pgbench with
auto_explain (and log_analyze=1) would be a way to test this?

I wanted to measure overheads, but could't choose correct way. Thanks
for idea with auto_explain.
I loaded it and made 10 requests of pgbench (number of clients: 1, of
threads: 1).
I'm not sure I chose the right way to measure overhead, so any
suggestions are welcome.
Current results are in file overhead_v0.txt.

Please feel free to share your suggestions and comments. Regards,

--
Ekaterina Sokolova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Attachments:

extra_statistics_v6.patchtext/x-diff; name=extra_statistics_v6.patchDownload
From: Ekaterina Sokolova <e.sokolova@postgrespro.ru>
Subject: [PATCH] Add extra statistics to explain for Nested Loop

For some distributions of data in tables, different loops in nested loop
joins can take different time and process different amounts of entries.
It makes average statistics returned by explain analyze not very useful for DBA.

This patch add collecting of min, max and total statistics for time and rows
across all loops to EXPLAIN ANALYSE. You need to set the VERBOSE flag to display
this information. The patch contains regression tests.

Example of results in TEXT format:
   ->  Append (actual rows=5 loops=5)
         Loop Min Rows: 2  Max Rows: 6  Total Rows: 23

Reviewed-by: Lukas Fittl, Justin Pryzby, Yugo Nagata, Julien Rouhaud.

---
 src/backend/commands/explain.c                | 147 +++++++++++++++-----------
 src/backend/executor/instrument.c             |  31 ++++++
 src/include/executor/instrument.h             |   6 ++
 src/test/regress/expected/explain.out         |  10 ++
 src/test/regress/expected/partition_prune.out | 117 ++++++++++++++++++++
 src/test/regress/sql/partition_prune.sql      |  32 ++++++
 6 files changed, 280 insertions(+), 63 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 10644dfac44..458f37c3fc6 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -118,6 +118,8 @@ static void show_instrumentation_count(const char *qlabel, int which,
 									   PlanState *planstate, ExplainState *es);
 static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
 static void show_eval_params(Bitmapset *bms_params, ExplainState *es);
+static void show_loop_info(Instrumentation *instrument, bool isworker,
+						   ExplainState *es);
 static const char *explain_get_index_name(Oid indexId);
 static void show_buffer_usage(ExplainState *es, const BufferUsage *usage,
 							  bool planning);
@@ -1606,42 +1608,14 @@ ExplainNode(PlanState *planstate, List *ancestors,
 
 	if (es->analyze &&
 		planstate->instrument && planstate->instrument->nloops > 0)
-	{
-		double		nloops = planstate->instrument->nloops;
-		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
-		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
-		double		rows = planstate->instrument->ntuples / nloops;
-
-		if (es->format == EXPLAIN_FORMAT_TEXT)
-		{
-			if (es->timing)
-				appendStringInfo(es->str,
-								 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
-								 startup_ms, total_ms, rows, nloops);
-			else
-				appendStringInfo(es->str,
-								 " (actual rows=%.0f loops=%.0f)",
-								 rows, nloops);
-		}
-		else
-		{
-			if (es->timing)
-			{
-				ExplainPropertyFloat("Actual Startup Time", "ms", startup_ms,
-									 3, es);
-				ExplainPropertyFloat("Actual Total Time", "ms", total_ms,
-									 3, es);
-			}
-			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
-		}
-	}
+		show_loop_info(planstate->instrument, false, es);
 	else if (es->analyze)
 	{
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 			appendStringInfoString(es->str, " (never executed)");
 		else
 		{
+			/* without min and max values because actual result is 0 */
 			if (es->timing)
 			{
 				ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
@@ -1664,44 +1638,14 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		for (int n = 0; n < w->num_workers; n++)
 		{
 			Instrumentation *instrument = &w->instrument[n];
-			double		nloops = instrument->nloops;
-			double		startup_ms;
-			double		total_ms;
-			double		rows;
 
-			if (nloops <= 0)
+			if (instrument->nloops <= 0)
 				continue;
-			startup_ms = 1000.0 * instrument->startup / nloops;
-			total_ms = 1000.0 * instrument->total / nloops;
-			rows = instrument->ntuples / nloops;
 
 			ExplainOpenWorker(n, es);
-
+			show_loop_info(instrument, true, es);
 			if (es->format == EXPLAIN_FORMAT_TEXT)
-			{
-				ExplainIndentText(es);
-				if (es->timing)
-					appendStringInfo(es->str,
-									 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
-									 startup_ms, total_ms, rows, nloops);
-				else
-					appendStringInfo(es->str,
-									 "actual rows=%.0f loops=%.0f\n",
-									 rows, nloops);
-			}
-			else
-			{
-				if (es->timing)
-				{
-					ExplainPropertyFloat("Actual Startup Time", "ms",
-										 startup_ms, 3, es);
-					ExplainPropertyFloat("Actual Total Time", "ms",
-										 total_ms, 3, es);
-				}
-				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
-			}
-
+				appendStringInfoChar(es->str, '\n');
 			ExplainCloseWorker(n, es);
 		}
 	}
@@ -3995,6 +3939,83 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
 		ExplainCloseGroup("Target Tables", "Target Tables", false, es);
 }
 
+void
+show_loop_info(Instrumentation *instrument, bool isworker, ExplainState *es)
+{
+	double		nloops = instrument->nloops;
+	double		startup_ms = 1000.0 * instrument->startup / nloops;
+	double		total_ms = 1000.0 * instrument->total / nloops;
+	double		rows = instrument->ntuples / nloops;
+	double		loop_total_rows = instrument->ntuples;
+	double		loop_min_r = instrument->min_tuples;
+	double		loop_max_r = instrument->max_tuples;
+	double		loop_min_t_ms = 1000.0 * instrument->min_t;
+	double		loop_max_t_ms = 1000.0 * instrument->max_t;
+
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		if (isworker)
+			ExplainIndentText(es);
+		else
+			appendStringInfo(es->str, " (");
+
+		if (es->timing)
+			appendStringInfo(es->str,
+							 "actual time=%.3f..%.3f rows=%.0f loops=%.0f",
+							 startup_ms, total_ms, rows, nloops);
+		else
+			appendStringInfo(es->str,
+							 "actual rows=%.0f loops=%.0f",
+							 rows, nloops);
+
+		if (!isworker)
+			appendStringInfoChar(es->str, ')');
+
+		if (nloops > 1 && es->verbose)
+		{
+			appendStringInfoChar(es->str, '\n');
+			ExplainIndentText(es);
+
+			if (es->timing)
+				appendStringInfo(es->str,
+								 "Loop Min Rows: %.0f  Max Rows: %.0f  Total Rows: %.0f  Min Time: %.3f  Max Time: %.3f",
+								 loop_min_r, loop_max_r, loop_total_rows, loop_min_t_ms, loop_max_t_ms);
+			else
+				appendStringInfo(es->str,
+								 "Loop Min Rows: %.0f  Max Rows: %.0f  Total Rows: %.0f",
+								 loop_min_r, loop_max_r, loop_total_rows);
+		}
+	}
+	else
+	{
+		if (es->timing)
+		{
+			ExplainPropertyFloat("Actual Startup Time", "ms", startup_ms,
+								 3, es);
+			ExplainPropertyFloat("Actual Total Time", "ms", total_ms,
+								 3, es);
+			if (nloops > 1 && es->verbose)
+			{
+				ExplainPropertyFloat("Loop Min Time", "ms", loop_min_t_ms,
+									 3, es);
+				ExplainPropertyFloat("Loop Max Time", "ms", loop_max_t_ms,
+									 3, es);
+			}
+		}
+
+		ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+
+		if (nloops > 1 && es->verbose)
+		{
+			ExplainPropertyFloat("Loop Min Rows", NULL, loop_min_r, 0, es);
+			ExplainPropertyFloat("Loop Max Rows", NULL, loop_max_r, 0, es);
+			ExplainPropertyFloat("Loop Total Rows", NULL, loop_total_rows, 0, es);
+		}
+
+		ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+	}
+}
+
 /*
  * Explain the constituent plans of an Append, MergeAppend,
  * BitmapAnd, or BitmapOr node.
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index 2b106d8473c..a49308ed126 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -153,7 +153,34 @@ InstrEndLoop(Instrumentation *instr)
 
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
+
+	/*
+	 * this is first loop
+	 *
+	 * We only initialize the min values. We don't need to bother with the
+	 * max, because those are 0 and the non-zero values will get updated a
+	 * couple lines later.
+	 */
+	if (instr->nloops == 0)
+	{
+		instr->min_t = totaltime;
+		instr->min_tuples = instr->tuplecount;
+	}
+
+	if (instr->min_t > totaltime)
+		instr->min_t = totaltime;
+
+	if (instr->max_t < totaltime)
+		instr->max_t = totaltime;
+
 	instr->ntuples += instr->tuplecount;
+
+	if (instr->min_tuples > instr->tuplecount)
+		instr->min_tuples = instr->tuplecount;
+
+	if (instr->max_tuples < instr->tuplecount)
+		instr->max_tuples = instr->tuplecount;
+
 	instr->nloops += 1;
 
 	/* Reset for next cycle (if any) */
@@ -186,6 +213,10 @@ InstrAggNode(Instrumentation *dst, Instrumentation *add)
 	dst->nloops += add->nloops;
 	dst->nfiltered1 += add->nfiltered1;
 	dst->nfiltered2 += add->nfiltered2;
+	dst->min_t = Min(dst->min_t, add->min_t);
+	dst->max_t = Max(dst->max_t, add->max_t);
+	dst->min_tuples = Min(dst->min_tuples, add->min_tuples);
+	dst->max_tuples = Max(dst->max_tuples, add->max_tuples);
 
 	/* Add delta of buffer usage since entry to node's totals */
 	if (dst->need_bufusage)
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index 2f9905b7c8e..e2ff330b8f1 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -79,7 +79,13 @@ typedef struct Instrumentation
 	/* Accumulated statistics across all completed cycles: */
 	double		startup;		/* total startup time (in seconds) */
 	double		total;			/* total time (in seconds) */
+	double		min_t;			/* time of fastest loop (in seconds) */
+	double		max_t;			/* time of slowest loop (in seconds) */
 	double		ntuples;		/* total tuples produced */
+	double		min_tuples;		/* min counter of produced tuples for all
+								 * loops */
+	double		max_tuples;		/* max counter of produced tuples for all
+								 * loops */
 	double		ntuples2;		/* secondary node-specific tuple counter */
 	double		nloops;			/* # of run cycles for this node */
 	double		nfiltered1;		/* # of tuples removed by scanqual or joinqual */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index 1734dfee8cc..275cdb37672 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -354,8 +354,13 @@ select jsonb_pretty(
                              "Actual Loops": 0,             +
                              "Startup Cost": 0.0,           +
                              "Async Capable": false,        +
+                             "Loop Max Rows": 0,            +
+                             "Loop Max Time": 0.0,          +
+                             "Loop Min Rows": 0,            +
+                             "Loop Min Time": 0.0,          +
                              "Relation Name": "tenk1",      +
                              "Parallel Aware": true,        +
+                             "Loop Total Rows": 0,          +
                              "Local Hit Blocks": 0,         +
                              "Temp Read Blocks": 0,         +
                              "Actual Total Time": 0.0,      +
@@ -400,7 +405,12 @@ select jsonb_pretty(
                      "Actual Loops": 0,                     +
                      "Startup Cost": 0.0,                   +
                      "Async Capable": false,                +
+                     "Loop Max Rows": 0,                    +
+                     "Loop Max Time": 0.0,                  +
+                     "Loop Min Rows": 0,                    +
+                     "Loop Min Time": 0.0,                  +
                      "Parallel Aware": false,               +
+                     "Loop Total Rows": 0,                  +
                      "Sort Space Used": 0,                  +
                      "Local Hit Blocks": 0,                 +
                      "Temp Read Blocks": 0,                 +
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 7555764c779..18617b9e206 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -1962,6 +1962,24 @@ begin
     end loop;
 end;
 $$;
+create function explain_verbose_parallel_append(text) returns setof text
+language plpgsql as
+$$
+declare
+    ln text;
+begin
+    for ln in
+        execute format('explain (analyze, verbose, costs off, summary off, timing off) %s',
+            $1)
+    loop
+        ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
+        ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
+        ln := regexp_replace(ln, 'Loop Min Rows: \d+  Max Rows: \d+  Total Rows: \d+',
+                                 'Loop Min Rows: N  Max Rows: N  Total Rows: N');
+        return next ln;
+    end loop;
+end;
+$$;
 prepare ab_q4 (int, int) as
 select avg(a) from ab where a between $1 and $2 and b < 4;
 -- Encourage use of parallel plans
@@ -2218,6 +2236,39 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
                                  Index Cond: (a = a.a)
 (28 rows)
 
+-- Tests for extra statistics
+create table lprt_b (b int not null);
+insert into lprt_b select generate_series(1,20);
+select explain_verbose_parallel_append('select * from lprt_a join lprt_b on a != b');
+                       explain_verbose_parallel_append                        
+------------------------------------------------------------------------------
+ Nested Loop (actual rows=N loops=N)
+   Output: lprt_a.a, lprt_b.b
+   Join Filter: (lprt_a.a <> lprt_b.b)
+   Rows Removed by Join Filter: 4
+   ->  Gather (actual rows=N loops=N)
+         Output: lprt_b.b
+         Workers Planned: 2
+         Workers Launched: N
+         ->  Parallel Seq Scan on public.lprt_b (actual rows=N loops=N)
+               Loop Min Rows: N  Max Rows: N  Total Rows: N
+               Output: lprt_b.b
+               Worker 0:  actual rows=N loops=N
+               Worker 1:  actual rows=N loops=N
+   ->  Materialize (actual rows=N loops=N)
+         Loop Min Rows: N  Max Rows: N  Total Rows: N
+         Output: lprt_a.a
+         ->  Gather (actual rows=N loops=N)
+               Output: lprt_a.a
+               Workers Planned: 1
+               Workers Launched: N
+               ->  Parallel Seq Scan on public.lprt_a (actual rows=N loops=N)
+                     Loop Min Rows: N  Max Rows: N  Total Rows: N
+                     Output: lprt_a.a
+                     Worker 0:  actual rows=N loops=N
+(24 rows)
+
+drop table lprt_b;
 delete from lprt_a where a = 1;
 select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 0)');
                                      explain_parallel_append                                     
@@ -2715,6 +2766,72 @@ order by tbl1.col1, tprt.col1;
  1001 | 1001
 (3 rows)
 
+-- Tests for extra statistics
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop (actual rows=23 loops=1)
+   Output: tbl1.col1, tprt.col1
+   ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
+         Output: tbl1.col1
+   ->  Append (actual rows=5 loops=5)
+         Loop Min Rows: 2  Max Rows: 6  Total Rows: 23
+         ->  Index Scan using tprt1_idx on public.tprt_1 (actual rows=2 loops=5)
+               Loop Min Rows: 2  Max Rows: 2  Total Rows: 10
+               Output: tprt_1.col1
+               Index Cond: (tprt_1.col1 < tbl1.col1)
+         ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=3 loops=4)
+               Loop Min Rows: 2  Max Rows: 3  Total Rows: 11
+               Output: tprt_2.col1
+               Index Cond: (tprt_2.col1 < tbl1.col1)
+         ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=1 loops=2)
+               Loop Min Rows: 1  Max Rows: 1  Total Rows: 2
+               Output: tprt_3.col1
+               Index Cond: (tprt_3.col1 < tbl1.col1)
+         ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
+               Output: tprt_4.col1
+               Index Cond: (tprt_4.col1 < tbl1.col1)
+         ->  Index Scan using tprt5_idx on public.tprt_5 (never executed)
+               Output: tprt_5.col1
+               Index Cond: (tprt_5.col1 < tbl1.col1)
+         ->  Index Scan using tprt6_idx on public.tprt_6 (never executed)
+               Output: tprt_6.col1
+               Index Cond: (tprt_6.col1 < tbl1.col1)
+(27 rows)
+
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop (actual rows=3 loops=1)
+   Output: tbl1.col1, tprt.col1
+   ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
+         Output: tbl1.col1
+   ->  Append (actual rows=1 loops=5)
+         Loop Min Rows: 0  Max Rows: 1  Total Rows: 3
+         ->  Index Scan using tprt1_idx on public.tprt_1 (never executed)
+               Output: tprt_1.col1
+               Index Cond: (tprt_1.col1 = tbl1.col1)
+         ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=1 loops=2)
+               Loop Min Rows: 1  Max Rows: 1  Total Rows: 2
+               Output: tprt_2.col1
+               Index Cond: (tprt_2.col1 = tbl1.col1)
+         ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=0 loops=3)
+               Loop Min Rows: 0  Max Rows: 1  Total Rows: 1
+               Output: tprt_3.col1
+               Index Cond: (tprt_3.col1 = tbl1.col1)
+         ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
+               Output: tprt_4.col1
+               Index Cond: (tprt_4.col1 = tbl1.col1)
+         ->  Index Scan using tprt5_idx on public.tprt_5 (never executed)
+               Output: tprt_5.col1
+               Index Cond: (tprt_5.col1 = tbl1.col1)
+         ->  Index Scan using tprt6_idx on public.tprt_6 (never executed)
+               Output: tprt_6.col1
+               Index Cond: (tprt_6.col1 = tbl1.col1)
+(26 rows)
+
 -- Last partition
 delete from tbl1;
 insert into tbl1 values (4400);
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index d70bd8610cb..68110e20eeb 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -469,6 +469,25 @@ begin
 end;
 $$;
 
+create function explain_verbose_parallel_append(text) returns setof text
+language plpgsql as
+$$
+declare
+    ln text;
+begin
+    for ln in
+        execute format('explain (analyze, verbose, costs off, summary off, timing off) %s',
+            $1)
+    loop
+        ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
+        ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
+        ln := regexp_replace(ln, 'Loop Min Rows: \d+  Max Rows: \d+  Total Rows: \d+',
+                                 'Loop Min Rows: N  Max Rows: N  Total Rows: N');
+        return next ln;
+    end loop;
+end;
+$$;
+
 prepare ab_q4 (int, int) as
 select avg(a) from ab where a between $1 and $2 and b < 4;
 
@@ -528,6 +547,12 @@ insert into lprt_a values(3),(3);
 select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 3)');
 select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 0)');
 
+-- Tests for extra statistics
+create table lprt_b (b int not null);
+insert into lprt_b select generate_series(1,20);
+select explain_verbose_parallel_append('select * from lprt_a join lprt_b on a != b');
+drop table lprt_b;
+
 delete from lprt_a where a = 1;
 
 select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 0)');
@@ -654,6 +679,13 @@ select tbl1.col1, tprt.col1 from tbl1
 inner join tprt on tbl1.col1 = tprt.col1
 order by tbl1.col1, tprt.col1;
 
+-- Tests for extra statistics
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
+
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
+
 -- Last partition
 delete from tbl1;
 insert into tbl1 values (4400);
overhead_v0.txttext/plain; name=overhead_v0.txtDownload
#29Julien Rouhaud
rjuju123@gmail.com
In reply to: Ekaterina Sokolova (#28)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Hi,

On Thu, Feb 03, 2022 at 12:59:03AM +0300, Ekaterina Sokolova wrote:

I apply the new version of patch.

I wanted to measure overheads, but could't choose correct way. Thanks for
idea with auto_explain.
I loaded it and made 10 requests of pgbench (number of clients: 1, of
threads: 1).
I'm not sure I chose the right way to measure overhead, so any suggestions
are welcome.
Current results are in file overhead_v0.txt.

Please feel free to share your suggestions and comments. Regards,

| master latency (ms) | master tps | | new latency (ms) | new tps
--------------------------------------------------------------------------
1 | 2,462 | 406,190341 | | 4,485 | 222,950527
2 | 3,877 | 257,89813 | | 4,141 | 241,493395
3 | 3,789 | 263,935811 | | 2,837 | 352,522297
4 | 3,53 | 283,310196 | | 5,510 | 181,488203
5 | 3,413 | 292,997363 | | 6,475 | 154,432999
6 | 3,757 | 266,148564 | | 4,073 | 245,507218
7 | 3,752 | 266,560043 | | 3,901 | 256,331385
8 | 4,389 | 227,847524 | | 4,658 | 214,675196
9 | 4,341 | 230,372282 | | 4,220 | 236,983672
10 | 3,893 | 256,891104 | | 7.059 | 141,667139
--------------------------------------------------------------------------
avg| 3,7203 | 275,215136 | | 4,03 | 224,8052031

master/new latency | 0,92315 |
master/new tps | 1,22424 |

new/master latency | 1,08325 |
new/master tps | 0,81683 |

The overhead is quite significant (at least for OLTP-style workload).

I think this should be done with a new InstrumentOption, like
INSTRUMENT_LOOP_DETAILS or something like that, and set it where appropriate.
Otherwise you will have to pay that overhead even if you won't use the new
fields at all. It could be EXPLAIN (ANALYZE, VERBOSE OFF), but also simply
using pg_stat_statements which doesn't seem acceptable.

One problem is that some extensions (like pg_stat_statements) can rely on
INSTRUMENT_ALL but may or may not care about those extra counters. Maybe we
should remove that alias and instead provide two (like INSTRUMENT_ALL_VERBOSE
and INSTRUMENT_ALL_SOMETHINGELSE, I don't have any bright name right now) so
that authors can decide what they need instead of silently having such
extension ruin the performance for no reason.

About the implementation itself:

+static void show_loop_info(Instrumentation *instrument, bool isworker,
+                          ExplainState *es);

I think this should be done as a separate refactoring commit.

+   /*
+    * this is first loop
+    *
+    * We only initialize the min values. We don't need to bother with the
+    * max, because those are 0 and the non-zero values will get updated a
+    * couple lines later.
+    */
+   if (instr->nloops == 0)
+   {
+       instr->min_t = totaltime;
+       instr->min_tuples = instr->tuplecount;
+   }
+
+   if (instr->min_t > totaltime)
+       instr->min_t = totaltime;
+
+   if (instr->max_t < totaltime)
+       instr->max_t = totaltime;
+
    instr->ntuples += instr->tuplecount;
+
+   if (instr->min_tuples > instr->tuplecount)
+       instr->min_tuples = instr->tuplecount;
+
+   if (instr->max_tuples < instr->tuplecount)
+       instr->max_tuples = instr->tuplecount;
+
    instr->nloops += 1;

Why do you need to initialize min_t and min_tuples but not max_t and
max_tuples while both will initially be 0 and possibly updated afterwards?

I think you should either entirely remove this if (instr->nloops == 0) part, or
handle some else block.

#30Greg Stark
stark@mit.edu
In reply to: Julien Rouhaud (#29)
Re: [PATCH] Add extra statistics to explain for Nested Loop

This patch got some very positive feedback and some significant amount
of work earlier in the release cycle. The feedback from Julien earlier
this month seemed pretty minor.

Ekaterina, is there any chance you'll be able to work on this this
week and do you think it has a chance of making this release? Julien,
do you think it's likely to be possible to polish for this release?

Otherwise I guess we should move it to the next CF but it seems a
shame given how much work has been done and how close it is.

On Mon, 7 Mar 2022 at 00:17, Julien Rouhaud <rjuju123@gmail.com> wrote:

Hi,

On Thu, Feb 03, 2022 at 12:59:03AM +0300, Ekaterina Sokolova wrote:

I apply the new version of patch.

I wanted to measure overheads, but could't choose correct way. Thanks for
idea with auto_explain.
I loaded it and made 10 requests of pgbench (number of clients: 1, of
threads: 1).
I'm not sure I chose the right way to measure overhead, so any suggestions
are welcome.
Current results are in file overhead_v0.txt.

Please feel free to share your suggestions and comments. Regards,

| master latency (ms) | master tps | | new latency (ms) | new tps
--------------------------------------------------------------------------
1 | 2,462 | 406,190341 | | 4,485 | 222,950527
2 | 3,877 | 257,89813 | | 4,141 | 241,493395
3 | 3,789 | 263,935811 | | 2,837 | 352,522297
4 | 3,53 | 283,310196 | | 5,510 | 181,488203
5 | 3,413 | 292,997363 | | 6,475 | 154,432999
6 | 3,757 | 266,148564 | | 4,073 | 245,507218
7 | 3,752 | 266,560043 | | 3,901 | 256,331385
8 | 4,389 | 227,847524 | | 4,658 | 214,675196
9 | 4,341 | 230,372282 | | 4,220 | 236,983672
10 | 3,893 | 256,891104 | | 7.059 | 141,667139
--------------------------------------------------------------------------
avg| 3,7203 | 275,215136 | | 4,03 | 224,8052031

master/new latency | 0,92315 |
master/new tps | 1,22424 |

new/master latency | 1,08325 |
new/master tps | 0,81683 |

The overhead is quite significant (at least for OLTP-style workload).

I think this should be done with a new InstrumentOption, like
INSTRUMENT_LOOP_DETAILS or something like that, and set it where appropriate.
Otherwise you will have to pay that overhead even if you won't use the new
fields at all. It could be EXPLAIN (ANALYZE, VERBOSE OFF), but also simply
using pg_stat_statements which doesn't seem acceptable.

One problem is that some extensions (like pg_stat_statements) can rely on
INSTRUMENT_ALL but may or may not care about those extra counters. Maybe we
should remove that alias and instead provide two (like INSTRUMENT_ALL_VERBOSE
and INSTRUMENT_ALL_SOMETHINGELSE, I don't have any bright name right now) so
that authors can decide what they need instead of silently having such
extension ruin the performance for no reason.

About the implementation itself:

+static void show_loop_info(Instrumentation *instrument, bool isworker,
+                          ExplainState *es);

I think this should be done as a separate refactoring commit.

+   /*
+    * this is first loop
+    *
+    * We only initialize the min values. We don't need to bother with the
+    * max, because those are 0 and the non-zero values will get updated a
+    * couple lines later.
+    */
+   if (instr->nloops == 0)
+   {
+       instr->min_t = totaltime;
+       instr->min_tuples = instr->tuplecount;
+   }
+
+   if (instr->min_t > totaltime)
+       instr->min_t = totaltime;
+
+   if (instr->max_t < totaltime)
+       instr->max_t = totaltime;
+
instr->ntuples += instr->tuplecount;
+
+   if (instr->min_tuples > instr->tuplecount)
+       instr->min_tuples = instr->tuplecount;
+
+   if (instr->max_tuples < instr->tuplecount)
+       instr->max_tuples = instr->tuplecount;
+
instr->nloops += 1;

Why do you need to initialize min_t and min_tuples but not max_t and
max_tuples while both will initially be 0 and possibly updated afterwards?

I think you should either entirely remove this if (instr->nloops == 0) part, or
handle some else block.

--
greg

#31Justin Pryzby
pryzby@telsasoft.com
In reply to: Greg Stark (#30)
Re: [PATCH] Add extra statistics to explain for Nested Loop
+static void show_loop_info(Instrumentation *instrument, bool isworker,
+                          ExplainState *es);

I think this should be done as a separate refactoring commit.

Right - the 0001 patch I sent seems independently beneficial, and makes the
changes in 0002 more apparent. My 0001 could also be applied after the feature
freeze and before branching for v16..

#32Julien Rouhaud
rjuju123@gmail.com
In reply to: Greg Stark (#30)
Re: [PATCH] Add extra statistics to explain for Nested Loop

Hi,

On Mon, Mar 28, 2022 at 03:09:12PM -0400, Greg Stark wrote:

This patch got some very positive feedback and some significant amount
of work earlier in the release cycle. The feedback from Julien earlier
this month seemed pretty minor.

Ekaterina, is there any chance you'll be able to work on this this
week and do you think it has a chance of making this release? Julien,
do you think it's likely to be possible to polish for this release?

Most of the comments I have are easy to fix. But I think that the real problem
is the significant overhead shown by Ekaterina that for now would apply even if
you don't consume the new stats, for instance if you have pg_stat_statements.
And I'm still not sure of what is the best way to avoid that.

#33Justin Pryzby
pryzby@telsasoft.com
In reply to: Julien Rouhaud (#32)
2 attachment(s)
Re: [PATCH] Add extra statistics to explain for Nested Loop

This message lost track of the email headers so CFBOT isn't processing the new
patches. Which I'm attempting to remedy now.
/messages/by-id/ae576cac3f451d318374f2a2e494aab1@postgrespro.ru

Show quoted text

On Fri, Apr 01, 2022 at 11:46:47PM +0300, Ekaterina Sokolova wrote:

Hi, hackers. Thank you for your attention to this topic.

Julien Rouhaud wrote:

+static void show_loop_info(Instrumentation *instrument, bool isworker,
+                           ExplainState *es);

I think this should be done as a separate refactoring commit.

Sure. I divided the patch. Now Justin's refactor commit is separated. Also I
actualized it a bit.

Most of the comments I have are easy to fix. But I think that the real
problem
is the significant overhead shown by Ekaterina that for now would apply
even if
you don't consume the new stats, for instance if you have
pg_stat_statements.
And I'm still not sure of what is the best way to avoid that.

I took your advice about InstrumentOption. Now INSTRUMENT_EXTRA exists.
So currently it's no overheads during basic load. Operations using
INSTRUMENT_ALL contain overheads (because of INSTRUMENT_EXTRA is a part of
INSTRUMENT_ALL), but they are much less significant than before. I apply new
overhead statistics collected by pgbench with auto _explain enabled.

Why do you need to initialize min_t and min_tuples but not max_t and
max_tuples while both will initially be 0 and possibly updated
afterwards?

We need this initialization for min values so comment about it located above
the block of code with initialization.

I am convinced that the latest changes have affected the patch in a positive
way. I'll be pleased to hear your thoughts on this.

Attachments:

0001-explain.c-refactor-ExplainNode.patchtext/x-diff; charset=us-asciiDownload
From 0dec500a0ed934d5d2038cb087ba6a605cafcdef Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Thu, 15 Apr 2021 11:55:09 -0500
Subject: [PATCH 1/2] explain.c: refactor ExplainNode()

---
 src/backend/commands/explain.c | 110 ++++++++++++++-------------------
 1 file changed, 47 insertions(+), 63 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index cb13227db1f..06e089a1220 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -118,6 +118,8 @@ static void show_instrumentation_count(const char *qlabel, int which,
 									   PlanState *planstate, ExplainState *es);
 static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
 static void show_eval_params(Bitmapset *bms_params, ExplainState *es);
+static void show_loop_info(Instrumentation *instrument, bool isworker,
+		ExplainState *es);
 static const char *explain_get_index_name(Oid indexId);
 static void show_buffer_usage(ExplainState *es, const BufferUsage *usage,
 							  bool planning);
@@ -1615,36 +1617,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 
 	if (es->analyze &&
 		planstate->instrument && planstate->instrument->nloops > 0)
-	{
-		double		nloops = planstate->instrument->nloops;
-		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
-		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
-		double		rows = planstate->instrument->ntuples / nloops;
-
-		if (es->format == EXPLAIN_FORMAT_TEXT)
-		{
-			if (es->timing)
-				appendStringInfo(es->str,
-								 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
-								 startup_ms, total_ms, rows, nloops);
-			else
-				appendStringInfo(es->str,
-								 " (actual rows=%.0f loops=%.0f)",
-								 rows, nloops);
-		}
-		else
-		{
-			if (es->timing)
-			{
-				ExplainPropertyFloat("Actual Startup Time", "ms", startup_ms,
-									 3, es);
-				ExplainPropertyFloat("Actual Total Time", "ms", total_ms,
-									 3, es);
-			}
-			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
-		}
-	}
+		show_loop_info(planstate->instrument, false, es);
 	else if (es->analyze)
 	{
 		if (es->format == EXPLAIN_FORMAT_TEXT)
@@ -1673,44 +1646,14 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		for (int n = 0; n < w->num_workers; n++)
 		{
 			Instrumentation *instrument = &w->instrument[n];
-			double		nloops = instrument->nloops;
-			double		startup_ms;
-			double		total_ms;
-			double		rows;
 
-			if (nloops <= 0)
+			if (instrument->nloops <= 0)
 				continue;
-			startup_ms = 1000.0 * instrument->startup / nloops;
-			total_ms = 1000.0 * instrument->total / nloops;
-			rows = instrument->ntuples / nloops;
 
 			ExplainOpenWorker(n, es);
-
+			show_loop_info(instrument, true, es);
 			if (es->format == EXPLAIN_FORMAT_TEXT)
-			{
-				ExplainIndentText(es);
-				if (es->timing)
-					appendStringInfo(es->str,
-									 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
-									 startup_ms, total_ms, rows, nloops);
-				else
-					appendStringInfo(es->str,
-									 "actual rows=%.0f loops=%.0f\n",
-									 rows, nloops);
-			}
-			else
-			{
-				if (es->timing)
-				{
-					ExplainPropertyFloat("Actual Startup Time", "ms",
-										 startup_ms, 3, es);
-					ExplainPropertyFloat("Actual Total Time", "ms",
-										 total_ms, 3, es);
-				}
-				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
-			}
-
+				appendStringInfoChar(es->str, '\n');
 			ExplainCloseWorker(n, es);
 		}
 	}
@@ -4039,6 +3982,47 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
 		ExplainCloseGroup("Target Tables", "Target Tables", false, es);
 }
 
+void
+show_loop_info(Instrumentation *instrument, bool isworker, ExplainState *es)
+{
+	double		nloops = instrument->nloops;
+	double		startup_ms = 1000.0 * instrument->startup / nloops;
+	double		total_ms = 1000.0 * instrument->total / nloops;
+	double		rows = instrument->ntuples / nloops;
+
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		if (isworker)
+			ExplainIndentText(es);
+		else
+			appendStringInfo(es->str, " (");
+
+		if (es->timing)
+			appendStringInfo(es->str,
+							 "actual time=%.3f..%.3f rows=%.0f loops=%.0f",
+							 startup_ms, total_ms, rows, nloops);
+		else
+			appendStringInfo(es->str,
+							 "actual rows=%.0f loops=%.0f",
+							 rows, nloops);
+
+		if (!isworker)
+			appendStringInfoChar(es->str, ')');
+	}
+	else
+	{
+		if (es->timing)
+		{
+			ExplainPropertyFloat("Actual Startup Time", "ms", startup_ms,
+								 3, es);
+			ExplainPropertyFloat("Actual Total Time", "ms", total_ms,
+								 3, es);
+		}
+		ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+		ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+	}
+}
+
 /*
  * Explain the constituent plans of an Append, MergeAppend,
  * BitmapAnd, or BitmapOr node.
-- 
2.17.1

0002-Add-extra-statistics-to-explain-for-Nested-Loop.patchtext/x-diff; charset=us-asciiDownload
From e0d92f2c112002bbacaae7abb747ea618d7fbd93 Mon Sep 17 00:00:00 2001
From: Ekaterina Sokolova <e.sokolova@postgrespro.ru>
Date: Fri, 1 Apr 2022 21:36:21 -0500
Subject: [PATCH 2/2] Add extra statistics to explain for Nested Loop

For some distributions of data in tables, different loops in nested loop
joins can take different time and process different amounts of entries.
It makes average statistics returned by explain analyze not very useful for DBA.

This patch add collecting of min, max and total statistics for time and rows
across all loops to EXPLAIN ANALYSE. You need to set the VERBOSE flag to display
this information. The patch contains regression tests.

Example of results in TEXT format:
   ->  Append (actual rows=5 loops=5)
         Loop Min Rows: 2  Max Rows: 6  Total Rows: 23

Reviewed-by: Lukas Fittl, Justin Pryzby, Yugo Nagata, Julien Rouhaud.
---
 src/backend/commands/explain.c                |  42 ++++++-
 src/backend/executor/instrument.c             |  38 +++++-
 src/include/executor/instrument.h             |  10 ++
 src/test/regress/expected/explain.out         |  10 ++
 src/test/regress/expected/partition_prune.out | 117 ++++++++++++++++++
 src/test/regress/sql/partition_prune.sql      |  32 +++++
 6 files changed, 247 insertions(+), 2 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 06e089a1220..df14803ff06 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -119,7 +119,7 @@ static void show_instrumentation_count(const char *qlabel, int which,
 static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
 static void show_eval_params(Bitmapset *bms_params, ExplainState *es);
 static void show_loop_info(Instrumentation *instrument, bool isworker,
-		ExplainState *es);
+						   ExplainState *es);
 static const char *explain_get_index_name(Oid indexId);
 static void show_buffer_usage(ExplainState *es, const BufferUsage *usage,
 							  bool planning);
@@ -541,6 +541,9 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	if (es->wal)
 		instrument_option |= INSTRUMENT_WAL;
 
+	if (es->verbose)
+		instrument_option |= INSTRUMENT_EXTRA;
+
 	/*
 	 * We always collect timing for the entire statement, even when node-level
 	 * timing is off, so we don't look at es->timing here.  (We could skip
@@ -1624,6 +1627,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			appendStringInfoString(es->str, " (never executed)");
 		else
 		{
+			/* without min and max values because actual result is 0 */
 			if (es->timing)
 			{
 				ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
@@ -3989,6 +3993,11 @@ show_loop_info(Instrumentation *instrument, bool isworker, ExplainState *es)
 	double		startup_ms = 1000.0 * instrument->startup / nloops;
 	double		total_ms = 1000.0 * instrument->total / nloops;
 	double		rows = instrument->ntuples / nloops;
+	double		loop_total_rows = instrument->ntuples;
+	double		loop_min_r = instrument->min_tuples;
+	double		loop_max_r = instrument->max_tuples;
+	double		loop_min_t_ms = 1000.0 * instrument->min_t;
+	double		loop_max_t_ms = 1000.0 * instrument->max_t;
 
 	if (es->format == EXPLAIN_FORMAT_TEXT)
 	{
@@ -4008,6 +4017,21 @@ show_loop_info(Instrumentation *instrument, bool isworker, ExplainState *es)
 
 		if (!isworker)
 			appendStringInfoChar(es->str, ')');
+
+		if (nloops > 1 && es->verbose)
+		{
+			appendStringInfoChar(es->str, '\n');
+			ExplainIndentText(es);
+
+			if (es->timing)
+				appendStringInfo(es->str,
+								 "Loop Min Rows: %.0f  Max Rows: %.0f  Total Rows: %.0f  Min Time: %.3f  Max Time: %.3f",
+								 loop_min_r, loop_max_r, loop_total_rows, loop_min_t_ms, loop_max_t_ms);
+			else
+				appendStringInfo(es->str,
+								 "Loop Min Rows: %.0f  Max Rows: %.0f  Total Rows: %.0f",
+								 loop_min_r, loop_max_r, loop_total_rows);
+		}
 	}
 	else
 	{
@@ -4017,8 +4041,24 @@ show_loop_info(Instrumentation *instrument, bool isworker, ExplainState *es)
 								 3, es);
 			ExplainPropertyFloat("Actual Total Time", "ms", total_ms,
 								 3, es);
+			if (nloops > 1 && es->verbose)
+			{
+				ExplainPropertyFloat("Loop Min Time", "ms", loop_min_t_ms,
+									 3, es);
+				ExplainPropertyFloat("Loop Max Time", "ms", loop_max_t_ms,
+									 3, es);
+			}
 		}
+
 		ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+
+		if (nloops > 1 && es->verbose)
+		{
+			ExplainPropertyFloat("Loop Min Rows", NULL, loop_min_r, 0, es);
+			ExplainPropertyFloat("Loop Max Rows", NULL, loop_max_r, 0, es);
+			ExplainPropertyFloat("Loop Total Rows", NULL, loop_total_rows, 0, es);
+		}
+
 		ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 	}
 }
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index c5ff02a8424..ac46011e516 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -34,11 +34,12 @@ InstrAlloc(int n, int instrument_options, bool async_mode)
 
 	/* initialize all fields to zeroes, then modify as needed */
 	instr = palloc0(n * sizeof(Instrumentation));
-	if (instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_TIMER | INSTRUMENT_WAL))
+	if (instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_TIMER | INSTRUMENT_WAL | INSTRUMENT_EXTRA))
 	{
 		bool		need_buffers = (instrument_options & INSTRUMENT_BUFFERS) != 0;
 		bool		need_wal = (instrument_options & INSTRUMENT_WAL) != 0;
 		bool		need_timer = (instrument_options & INSTRUMENT_TIMER) != 0;
+		bool		need_extra = (instrument_options & INSTRUMENT_EXTRA) != 0;
 		int			i;
 
 		for (i = 0; i < n; i++)
@@ -46,6 +47,7 @@ InstrAlloc(int n, int instrument_options, bool async_mode)
 			instr[i].need_bufusage = need_buffers;
 			instr[i].need_walusage = need_wal;
 			instr[i].need_timer = need_timer;
+			instr[i].need_extra = need_extra;
 			instr[i].async_mode = async_mode;
 		}
 	}
@@ -61,6 +63,7 @@ InstrInit(Instrumentation *instr, int instrument_options)
 	instr->need_bufusage = (instrument_options & INSTRUMENT_BUFFERS) != 0;
 	instr->need_walusage = (instrument_options & INSTRUMENT_WAL) != 0;
 	instr->need_timer = (instrument_options & INSTRUMENT_TIMER) != 0;
+	instr->need_extra = (instrument_options & INSTRUMENT_EXTRA) != 0;
 }
 
 /* Entry to a plan node */
@@ -154,6 +157,35 @@ InstrEndLoop(Instrumentation *instr)
 	instr->startup += instr->firsttuple;
 	instr->total += totaltime;
 	instr->ntuples += instr->tuplecount;
+
+	/*
+	 * this is first loop
+	 *
+	 * We only initialize the min values. We don't need to bother with the
+	 * max, because those are 0 and the non-zero values will get updated a
+	 * couple lines later.
+	 */
+	if (instr->need_extra)
+	{
+		if (instr->nloops == 0)
+		{
+			instr->min_t = totaltime;
+			instr->min_tuples = instr->tuplecount;
+		}
+
+		if (instr->min_t > totaltime)
+			instr->min_t = totaltime;
+
+		if (instr->max_t < totaltime)
+			instr->max_t = totaltime;
+
+		if (instr->min_tuples > instr->tuplecount)
+			instr->min_tuples = instr->tuplecount;
+
+		if (instr->max_tuples < instr->tuplecount)
+			instr->max_tuples = instr->tuplecount;
+	}
+
 	instr->nloops += 1;
 
 	/* Reset for next cycle (if any) */
@@ -186,6 +218,10 @@ InstrAggNode(Instrumentation *dst, Instrumentation *add)
 	dst->nloops += add->nloops;
 	dst->nfiltered1 += add->nfiltered1;
 	dst->nfiltered2 += add->nfiltered2;
+	dst->min_t = Min(dst->min_t, add->min_t);
+	dst->max_t = Max(dst->max_t, add->max_t);
+	dst->min_tuples = Min(dst->min_tuples, add->min_tuples);
+	dst->max_tuples = Max(dst->max_tuples, add->max_tuples);
 
 	/* Add delta of buffer usage since entry to node's totals */
 	if (dst->need_bufusage)
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index 1b7157bdd15..e6178e248dc 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -58,6 +58,8 @@ typedef enum InstrumentOption
 	INSTRUMENT_BUFFERS = 1 << 1,	/* needs buffer usage */
 	INSTRUMENT_ROWS = 1 << 2,	/* needs row count */
 	INSTRUMENT_WAL = 1 << 3,	/* needs WAL usage */
+	INSTRUMENT_EXTRA = 1 << 4,	/* needs counters for min,
+								 * max and total values */
 	INSTRUMENT_ALL = PG_INT32_MAX
 } InstrumentOption;
 
@@ -67,6 +69,8 @@ typedef struct Instrumentation
 	bool		need_timer;		/* true if we need timer data */
 	bool		need_bufusage;	/* true if we need buffer usage data */
 	bool		need_walusage;	/* true if we need WAL usage data */
+	bool		need_extra;		/* true if we need min, max and total
+								 * statistics for loops */
 	bool		async_mode;		/* true if node is in async mode */
 	/* Info about current plan cycle: */
 	bool		running;		/* true if we've completed first tuple */
@@ -79,7 +83,13 @@ typedef struct Instrumentation
 	/* Accumulated statistics across all completed cycles: */
 	double		startup;		/* total startup time (in seconds) */
 	double		total;			/* total time (in seconds) */
+	double		min_t;			/* time of fastest loop (in seconds) */
+	double		max_t;			/* time of slowest loop (in seconds) */
 	double		ntuples;		/* total tuples produced */
+	double		min_tuples;		/* min counter of produced tuples for all
+								 * loops */
+	double		max_tuples;		/* max counter of produced tuples for all
+								 * loops */
 	double		ntuples2;		/* secondary node-specific tuple counter */
 	double		nloops;			/* # of run cycles for this node */
 	double		nfiltered1;		/* # of tuples removed by scanqual or joinqual */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index bc361759219..c70d0e288da 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -357,8 +357,13 @@ select jsonb_pretty(
                              "Actual Loops": 0,             +
                              "Startup Cost": 0.0,           +
                              "Async Capable": false,        +
+                             "Loop Max Rows": 0,            +
+                             "Loop Max Time": 0.0,          +
+                             "Loop Min Rows": 0,            +
+                             "Loop Min Time": 0.0,          +
                              "Relation Name": "tenk1",      +
                              "Parallel Aware": true,        +
+                             "Loop Total Rows": 0,          +
                              "Local Hit Blocks": 0,         +
                              "Temp Read Blocks": 0,         +
                              "Actual Total Time": 0.0,      +
@@ -403,7 +408,12 @@ select jsonb_pretty(
                      "Actual Loops": 0,                     +
                      "Startup Cost": 0.0,                   +
                      "Async Capable": false,                +
+                     "Loop Max Rows": 0,                    +
+                     "Loop Max Time": 0.0,                  +
+                     "Loop Min Rows": 0,                    +
+                     "Loop Min Time": 0.0,                  +
                      "Parallel Aware": false,               +
+                     "Loop Total Rows": 0,                  +
                      "Sort Space Used": 0,                  +
                      "Local Hit Blocks": 0,                 +
                      "Temp Read Blocks": 0,                 +
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 7555764c779..18617b9e206 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -1962,6 +1962,24 @@ begin
     end loop;
 end;
 $$;
+create function explain_verbose_parallel_append(text) returns setof text
+language plpgsql as
+$$
+declare
+    ln text;
+begin
+    for ln in
+        execute format('explain (analyze, verbose, costs off, summary off, timing off) %s',
+            $1)
+    loop
+        ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
+        ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
+        ln := regexp_replace(ln, 'Loop Min Rows: \d+  Max Rows: \d+  Total Rows: \d+',
+                                 'Loop Min Rows: N  Max Rows: N  Total Rows: N');
+        return next ln;
+    end loop;
+end;
+$$;
 prepare ab_q4 (int, int) as
 select avg(a) from ab where a between $1 and $2 and b < 4;
 -- Encourage use of parallel plans
@@ -2218,6 +2236,39 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on
                                  Index Cond: (a = a.a)
 (28 rows)
 
+-- Tests for extra statistics
+create table lprt_b (b int not null);
+insert into lprt_b select generate_series(1,20);
+select explain_verbose_parallel_append('select * from lprt_a join lprt_b on a != b');
+                       explain_verbose_parallel_append                        
+------------------------------------------------------------------------------
+ Nested Loop (actual rows=N loops=N)
+   Output: lprt_a.a, lprt_b.b
+   Join Filter: (lprt_a.a <> lprt_b.b)
+   Rows Removed by Join Filter: 4
+   ->  Gather (actual rows=N loops=N)
+         Output: lprt_b.b
+         Workers Planned: 2
+         Workers Launched: N
+         ->  Parallel Seq Scan on public.lprt_b (actual rows=N loops=N)
+               Loop Min Rows: N  Max Rows: N  Total Rows: N
+               Output: lprt_b.b
+               Worker 0:  actual rows=N loops=N
+               Worker 1:  actual rows=N loops=N
+   ->  Materialize (actual rows=N loops=N)
+         Loop Min Rows: N  Max Rows: N  Total Rows: N
+         Output: lprt_a.a
+         ->  Gather (actual rows=N loops=N)
+               Output: lprt_a.a
+               Workers Planned: 1
+               Workers Launched: N
+               ->  Parallel Seq Scan on public.lprt_a (actual rows=N loops=N)
+                     Loop Min Rows: N  Max Rows: N  Total Rows: N
+                     Output: lprt_a.a
+                     Worker 0:  actual rows=N loops=N
+(24 rows)
+
+drop table lprt_b;
 delete from lprt_a where a = 1;
 select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 0)');
                                      explain_parallel_append                                     
@@ -2715,6 +2766,72 @@ order by tbl1.col1, tprt.col1;
  1001 | 1001
 (3 rows)
 
+-- Tests for extra statistics
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop (actual rows=23 loops=1)
+   Output: tbl1.col1, tprt.col1
+   ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
+         Output: tbl1.col1
+   ->  Append (actual rows=5 loops=5)
+         Loop Min Rows: 2  Max Rows: 6  Total Rows: 23
+         ->  Index Scan using tprt1_idx on public.tprt_1 (actual rows=2 loops=5)
+               Loop Min Rows: 2  Max Rows: 2  Total Rows: 10
+               Output: tprt_1.col1
+               Index Cond: (tprt_1.col1 < tbl1.col1)
+         ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=3 loops=4)
+               Loop Min Rows: 2  Max Rows: 3  Total Rows: 11
+               Output: tprt_2.col1
+               Index Cond: (tprt_2.col1 < tbl1.col1)
+         ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=1 loops=2)
+               Loop Min Rows: 1  Max Rows: 1  Total Rows: 2
+               Output: tprt_3.col1
+               Index Cond: (tprt_3.col1 < tbl1.col1)
+         ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
+               Output: tprt_4.col1
+               Index Cond: (tprt_4.col1 < tbl1.col1)
+         ->  Index Scan using tprt5_idx on public.tprt_5 (never executed)
+               Output: tprt_5.col1
+               Index Cond: (tprt_5.col1 < tbl1.col1)
+         ->  Index Scan using tprt6_idx on public.tprt_6 (never executed)
+               Output: tprt_6.col1
+               Index Cond: (tprt_6.col1 < tbl1.col1)
+(27 rows)
+
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop (actual rows=3 loops=1)
+   Output: tbl1.col1, tprt.col1
+   ->  Seq Scan on public.tbl1 (actual rows=5 loops=1)
+         Output: tbl1.col1
+   ->  Append (actual rows=1 loops=5)
+         Loop Min Rows: 0  Max Rows: 1  Total Rows: 3
+         ->  Index Scan using tprt1_idx on public.tprt_1 (never executed)
+               Output: tprt_1.col1
+               Index Cond: (tprt_1.col1 = tbl1.col1)
+         ->  Index Scan using tprt2_idx on public.tprt_2 (actual rows=1 loops=2)
+               Loop Min Rows: 1  Max Rows: 1  Total Rows: 2
+               Output: tprt_2.col1
+               Index Cond: (tprt_2.col1 = tbl1.col1)
+         ->  Index Scan using tprt3_idx on public.tprt_3 (actual rows=0 loops=3)
+               Loop Min Rows: 0  Max Rows: 1  Total Rows: 1
+               Output: tprt_3.col1
+               Index Cond: (tprt_3.col1 = tbl1.col1)
+         ->  Index Scan using tprt4_idx on public.tprt_4 (never executed)
+               Output: tprt_4.col1
+               Index Cond: (tprt_4.col1 = tbl1.col1)
+         ->  Index Scan using tprt5_idx on public.tprt_5 (never executed)
+               Output: tprt_5.col1
+               Index Cond: (tprt_5.col1 = tbl1.col1)
+         ->  Index Scan using tprt6_idx on public.tprt_6 (never executed)
+               Output: tprt_6.col1
+               Index Cond: (tprt_6.col1 = tbl1.col1)
+(26 rows)
+
 -- Last partition
 delete from tbl1;
 insert into tbl1 values (4400);
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index d70bd8610cb..68110e20eeb 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -469,6 +469,25 @@ begin
 end;
 $$;
 
+create function explain_verbose_parallel_append(text) returns setof text
+language plpgsql as
+$$
+declare
+    ln text;
+begin
+    for ln in
+        execute format('explain (analyze, verbose, costs off, summary off, timing off) %s',
+            $1)
+    loop
+        ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
+        ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
+        ln := regexp_replace(ln, 'Loop Min Rows: \d+  Max Rows: \d+  Total Rows: \d+',
+                                 'Loop Min Rows: N  Max Rows: N  Total Rows: N');
+        return next ln;
+    end loop;
+end;
+$$;
+
 prepare ab_q4 (int, int) as
 select avg(a) from ab where a between $1 and $2 and b < 4;
 
@@ -528,6 +547,12 @@ insert into lprt_a values(3),(3);
 select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 3)');
 select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 0)');
 
+-- Tests for extra statistics
+create table lprt_b (b int not null);
+insert into lprt_b select generate_series(1,20);
+select explain_verbose_parallel_append('select * from lprt_a join lprt_b on a != b');
+drop table lprt_b;
+
 delete from lprt_a where a = 1;
 
 select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 0)');
@@ -654,6 +679,13 @@ select tbl1.col1, tprt.col1 from tbl1
 inner join tprt on tbl1.col1 = tprt.col1
 order by tbl1.col1, tprt.col1;
 
+-- Tests for extra statistics
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
+
+explain (analyze, verbose, costs off, summary off, timing off)
+select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
+
 -- Last partition
 delete from tbl1;
 insert into tbl1 values (4400);
-- 
2.17.1