From 02f3cc981ccdf5ca3f69d6f80172e63db247955e Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Tue, 31 Dec 2019 18:49:41 -0600
Subject: [PATCH v4 2/7] explain to show tuplehash bucket and memory stats..

Discussion: https://www.postgresql.org/message-id/flat/20200103161925.GM12066@telsasoft.com
---
 src/backend/commands/explain.c                | 131 +++++++++++++++++++++++---
 src/backend/executor/execGrouping.c           |  25 +++++
 src/backend/executor/nodeAgg.c                |  11 +++
 src/backend/executor/nodeRecursiveunion.c     |   3 +
 src/backend/executor/nodeSetOp.c              |   1 +
 src/backend/executor/nodeSubplan.c            |   3 +
 src/include/executor/executor.h               |   1 +
 src/include/nodes/execnodes.h                 |  10 ++
 src/test/regress/expected/groupingsets.out    |  11 ++-
 src/test/regress/expected/select_parallel.out |   4 +-
 src/test/regress/expected/subselect.out       |   8 +-
 src/test/regress/expected/union.out           |   6 +-
 src/test/regress/sql/select_parallel.sql      |   2 +-
 13 files changed, 195 insertions(+), 21 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index d901dc4..e262108 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -18,6 +18,7 @@
 #include "commands/createas.h"
 #include "commands/defrem.h"
 #include "commands/prepare.h"
+#include "executor/nodeAgg.h"
 #include "executor/nodeHash.h"
 #include "foreign/fdwapi.h"
 #include "jit/jit.h"
@@ -86,12 +87,13 @@ static void show_merge_append_keys(MergeAppendState *mstate, List *ancestors,
 								   ExplainState *es);
 static void show_agg_keys(AggState *astate, List *ancestors,
 						  ExplainState *es);
-static void show_grouping_sets(PlanState *planstate, Agg *agg,
+static void show_grouping_sets(AggState *planstate, Agg *agg,
 							   List *ancestors, ExplainState *es);
-static void show_grouping_set_keys(PlanState *planstate,
+static void show_grouping_set_keys(AggState *aggstate,
 								   Agg *aggnode, Sort *sortnode,
 								   List *context, bool useprefix,
-								   List *ancestors, ExplainState *es);
+								   List *ancestors, ExplainState *es,
+								   hash_instrumentation *inst);
 static void show_group_keys(GroupState *gstate, List *ancestors,
 							ExplainState *es);
 static void show_sort_group_keys(PlanState *planstate, const char *qlabel,
@@ -104,6 +106,7 @@ static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
 							 List *ancestors, ExplainState *es);
 static void show_sort_info(SortState *sortstate, ExplainState *es);
 static void show_hash_info(HashState *hashstate, ExplainState *es);
+static void show_tuplehash_info(hash_instrumentation *inst, ExplainState *es);
 static void show_tidbitmap_info(BitmapHeapScanState *planstate,
 								ExplainState *es);
 static void show_instrumentation_count(const char *qlabel, int which,
@@ -1489,6 +1492,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 					appendStringInfo(es->str, " %s", setopcmd);
 				else
 					ExplainPropertyText("Command", setopcmd, es);
+				// show strategy in text mode ?
 			}
 			break;
 		default:
@@ -1886,6 +1890,20 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
 			break;
+		case T_SetOp:
+			{
+				SetOpState *sos = castNode(SetOpState, planstate);
+				if (sos->hashtable)
+					show_tuplehash_info(&sos->hashtable->instrument, es);
+			}
+			break;
+		case T_RecursiveUnion:
+			{
+				RecursiveUnionState *rus = (RecursiveUnionState *)planstate;
+				if (rus->hashtable)
+					show_tuplehash_info(&rus->hashtable->instrument, es);
+				break;
+			}
 		case T_Group:
 			show_group_keys(castNode(GroupState, planstate), ancestors, es);
 			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
@@ -2262,21 +2280,26 @@ show_agg_keys(AggState *astate, List *ancestors,
 		ancestors = lcons(plan, ancestors);
 
 		if (plan->groupingSets)
-			show_grouping_sets(outerPlanState(astate), plan, ancestors, es);
-		else
+			show_grouping_sets(astate, plan, ancestors, es);
+		else {
 			show_sort_group_keys(outerPlanState(astate), "Group Key",
 								 plan->numCols, plan->grpColIdx,
 								 NULL, NULL, NULL,
 								 ancestors, es);
+			Assert(astate->num_hashes<=1);
+			if (astate->num_hashes)
+				show_tuplehash_info(&astate->perhash[0].hashtable->instrument, es);
+		}
 
 		ancestors = list_delete_first(ancestors);
 	}
 }
 
 static void
-show_grouping_sets(PlanState *planstate, Agg *agg,
+show_grouping_sets(AggState *aggstate, Agg *agg,
 				   List *ancestors, ExplainState *es)
 {
+	PlanState	*planstate = outerPlanState(aggstate);
 	List	   *context;
 	bool		useprefix;
 	ListCell   *lc;
@@ -2289,27 +2312,43 @@ show_grouping_sets(PlanState *planstate, Agg *agg,
 
 	ExplainOpenGroup("Grouping Sets", "Grouping Sets", false, es);
 
-	show_grouping_set_keys(planstate, agg, NULL,
-						   context, useprefix, ancestors, es);
+	// this will show things twice??
+	show_grouping_set_keys(aggstate, agg, NULL,
+						   context, useprefix, ancestors, es,
+						   aggstate->num_hashes ? &aggstate->perhash[0].hashtable->instrument : NULL);
 
 	foreach(lc, agg->chain)
 	{
 		Agg		   *aggnode = lfirst(lc);
 		Sort	   *sortnode = (Sort *) aggnode->plan.lefttree;
+		hash_instrumentation *inst;
 
-		show_grouping_set_keys(planstate, aggnode, sortnode,
-							   context, useprefix, ancestors, es);
+		if (aggnode->aggstrategy == AGG_HASHED ||
+				aggnode->aggstrategy == AGG_MIXED) {
+			int	nth = list_cell_number(agg->chain, lc);
+			Assert(nth < aggstate->num_hashes);
+			inst = &aggstate->perhash[nth].hashtable->instrument;
+		}
+		else
+			inst = NULL;
+
+		show_grouping_set_keys(aggstate, aggnode, sortnode,
+							   context, useprefix, ancestors, es,
+							   inst);
 	}
 
 	ExplainCloseGroup("Grouping Sets", "Grouping Sets", false, es);
 }
 
 static void
-show_grouping_set_keys(PlanState *planstate,
+show_grouping_set_keys(AggState *aggstate,
 					   Agg *aggnode, Sort *sortnode,
 					   List *context, bool useprefix,
-					   List *ancestors, ExplainState *es)
+					   List *ancestors, ExplainState *es,
+					   hash_instrumentation *inst)
+
 {
+	PlanState	*planstate = outerPlanState(aggstate);
 	Plan	   *plan = planstate->plan;
 	char	   *exprstr;
 	ListCell   *lc;
@@ -2369,6 +2408,10 @@ show_grouping_set_keys(PlanState *planstate,
 			ExplainPropertyText(keyname, "()", es);
 		else
 			ExplainPropertyListNested(keyname, result, es);
+
+		if (aggnode->aggstrategy == AGG_HASHED ||
+				aggnode->aggstrategy == AGG_MIXED)
+			show_tuplehash_info(inst, es);
 	}
 
 	ExplainCloseGroup(keysetname, keysetname, false, es);
@@ -2770,6 +2813,59 @@ show_hash_info(HashState *hashstate, ExplainState *es)
 }
 
 /*
+ * Show hash bucket stats and (optionally) memory.
+ */
+
+// fprintf(stderr, "memallocated %lu\n", astate->hashcontext->ecxt_per_query_memory->mem_allocated);
+// perhash->aggnode->numGroups; memctx; AggState->
+static void
+show_tuplehash_info(hash_instrumentation *inst, ExplainState *es)
+{
+	long	spacePeakKb_tuples = (inst->space_peak_tuples + 1023) / 1024,
+		spacePeakKb_hash = (inst->space_peak_hash + 1023) / 1024;
+
+	if (!es->analyze)
+		return;
+
+	if (es->format != EXPLAIN_FORMAT_TEXT)
+	{
+		ExplainPropertyInteger("Hash Buckets", NULL,
+							   inst->nbuckets, es);
+		ExplainPropertyInteger("Original Hash Buckets", NULL,
+							   inst->nbuckets_original, es);
+		ExplainPropertyInteger("Peak Memory Usage (hashtable)", "kB",
+							   spacePeakKb_hash, es);
+		ExplainPropertyInteger("Peak Memory Usage (tuples)", "kB",
+							   spacePeakKb_tuples, es);
+	}
+	else if (!inst->nbuckets)
+		; /* Do nothing */
+	else
+	{
+		if (inst->nbuckets_original != inst->nbuckets) {
+			ExplainIndentText(es);
+			appendStringInfo(es->str,
+						"Buckets: %ld (originally %ld)",
+						inst->nbuckets,
+						inst->nbuckets_original);
+		}
+		else
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str,
+						"Buckets: %ld",
+						inst->nbuckets);
+		}
+
+		if (es->verbose)
+			appendStringInfo(es->str,
+					"  Memory Usage: hashtable: %ldkB, tuples: %ldkB",
+					spacePeakKb_hash, spacePeakKb_tuples);
+		appendStringInfoChar(es->str, '\n');
+	}
+}
+
+/*
  * If it's EXPLAIN ANALYZE, show exact/lossy pages for a BitmapHeapScan node
  */
 static void
@@ -3436,6 +3532,17 @@ ExplainSubPlans(List *plans, List *ancestors,
 
 		ExplainNode(sps->planstate, ancestors,
 					relationship, sp->plan_name, es);
+		if (sps->hashtable)
+			show_tuplehash_info(&sps->hashtable->instrument, es);
+		if (sps->hashnulls) {
+			ExplainOpenGroup("Null hashtable", "Null hashtable", true, es);
+			if (es->format == EXPLAIN_FORMAT_TEXT) {
+				ExplainIndentText(es);
+				appendStringInfoString(es->str, "Null hashtable: ");
+			}
+			show_tuplehash_info(&sps->hashnulls->instrument, es);
+			ExplainCloseGroup("Null hashtable", "Null hashtable", true, es);
+		}
 
 		ancestors = list_delete_first(ancestors);
 	}
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index de0205f..cedb023 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -191,6 +191,7 @@ BuildTupleHashTableExt(PlanState *parent,
 	hashtable->inputslot = NULL;
 	hashtable->in_hash_funcs = NULL;
 	hashtable->cur_eq_func = NULL;
+	memset(&hashtable->instrument, 0, sizeof(hashtable->instrument));
 
 	/*
 	 * If parallelism is in use, even if the master backend is performing the
@@ -206,6 +207,7 @@ BuildTupleHashTableExt(PlanState *parent,
 		hashtable->hash_iv = 0;
 
 	hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
+	UpdateTupleHashTableStats(hashtable, true);
 
 	/*
 	 * We copy the input tuple descriptor just for safety --- we assume all
@@ -284,9 +286,32 @@ BuildTupleHashTable(PlanState *parent,
 void
 ResetTupleHashTable(TupleHashTable hashtable)
 {
+	UpdateTupleHashTableStats(hashtable, false);
 	tuplehash_reset(hashtable->hashtab);
 }
 
+/* Update instrumentation stats */
+void
+UpdateTupleHashTableStats(TupleHashTable hashtable, bool initial)
+{
+	hashtable->instrument.nbuckets = hashtable->hashtab->size;
+	if (initial) {
+		hashtable->instrument.nbuckets_original = hashtable->hashtab->size;
+		hashtable->instrument.space_peak_hash = hashtable->hashtab->size * sizeof(TupleHashEntryData);
+		hashtable->instrument.space_peak_tuples = 0;
+	}
+	else
+	{
+#define maxself(a,b) a=Max(a,b)
+		/* hashtable->entrysize includes additionalsize */
+		maxself(hashtable->instrument.space_peak_hash,
+				hashtable->hashtab->size * sizeof(TupleHashEntryData));
+		maxself(hashtable->instrument.space_peak_tuples,
+				hashtable->hashtab->members * hashtable->entrysize);
+#undef maxself
+	}
+}
+
 /*
  * Find or create a hashtable entry for the tuple group containing the
  * given tuple.  The tuple must be the same type as the hashtable entries.
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 2e9a21b..f90453c 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1704,6 +1704,7 @@ agg_retrieve_direct(AggState *aggstate)
 				 */
 				initialize_phase(aggstate, 0);
 				aggstate->table_filled = true;
+				UpdateTupleHashTableStats(aggstate->perhash[0].hashtable, false);
 				ResetTupleHashIterator(aggstate->perhash[0].hashtable,
 									   &aggstate->perhash[0].hashiter);
 				select_current_set(aggstate, 0, true);
@@ -1901,6 +1902,13 @@ agg_retrieve_direct(AggState *aggstate)
 						}
 					}
 				}
+
+				if (aggstate->aggstrategy == AGG_MIXED &&
+						aggstate->current_phase == 1)
+				{
+					for (int i = 0; i < aggstate->num_hashes; i++)
+						UpdateTupleHashTableStats(aggstate->perhash[i].hashtable, false);
+				}
 			}
 
 			/*
@@ -1975,6 +1983,9 @@ agg_fill_hash_table(AggState *aggstate)
 	}
 
 	aggstate->table_filled = true;
+	for (int i = 0; i < aggstate->num_hashes; i++)
+		UpdateTupleHashTableStats(aggstate->perhash[i].hashtable, false);
+
 	/* Initialize to walk the first hash table */
 	select_current_set(aggstate, 0, true);
 	ResetTupleHashIterator(aggstate->perhash[0].hashtable,
diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c
index 620414a..93272c2 100644
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@@ -156,6 +156,9 @@ ExecRecursiveUnion(PlanState *pstate)
 		return slot;
 	}
 
+	if (node->hashtable)
+		UpdateTupleHashTableStats(node->hashtable, false);
+
 	return NULL;
 }
 
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index bfd148a..9c0e0ab 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -415,6 +415,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 
 	setopstate->table_filled = true;
 	/* Initialize to walk the hash table */
+	UpdateTupleHashTableStats(setopstate->hashtable, false);
 	ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
 }
 
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index ff95317..eec849c 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -621,6 +621,9 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 	ExecClearTuple(node->projRight->pi_state.resultslot);
 
 	MemoryContextSwitchTo(oldcontext);
+	UpdateTupleHashTableStats(node->hashtable, false);
+	if (node->hashnulls)
+		UpdateTupleHashTableStats(node->hashnulls, false);
 }
 
 /*
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 81fdfa4..34199b5 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -150,6 +150,7 @@ extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
 										 ExprState *eqcomp,
 										 FmgrInfo *hashfunctions);
 extern void ResetTupleHashTable(TupleHashTable hashtable);
+extern void UpdateTupleHashTableStats(TupleHashTable hashtable, bool initial);
 
 /*
  * prototypes from functions in execJunk.c
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index cd3ddf7..504b557 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -691,6 +691,15 @@ typedef struct TupleHashEntryData
 #define SH_DECLARE
 #include "lib/simplehash.h"
 
+/* XXX: not to be confused with struct HashInstrumentation... */
+typedef struct hash_instrumentation
+{
+	size_t	nbuckets;				/* number of buckets at end of execution */
+	size_t	nbuckets_original;		/* planned number of buckets */
+	size_t	space_peak_hash;	/* peak memory usage in bytes */
+	size_t	space_peak_tuples;	/* peak memory usage in bytes */
+} hash_instrumentation;
+
 typedef struct TupleHashTableData
 {
 	tuplehash_hash *hashtab;	/* underlying hash table */
@@ -709,6 +718,7 @@ typedef struct TupleHashTableData
 	ExprState  *cur_eq_func;	/* comparator for input vs. table */
 	uint32		hash_iv;		/* hash-function IV */
 	ExprContext *exprcontext;	/* expression context */
+	hash_instrumentation instrument;
 }			TupleHashTableData;
 
 typedef tuplehash_iterator TupleHashIterator;
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index 95d619c..b77c43c 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -1133,9 +1133,11 @@ explain (costs off, timing off, summary off, analyze)
 --------------------------------------------------------
  HashAggregate (actual rows=0 loops=1)
    Hash Key: a, b
+   Buckets: 256
    Hash Key: a
+   Buckets: 256
    ->  Seq Scan on gstest_empty (actual rows=0 loops=1)
-(4 rows)
+(6 rows)
 
 select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
  a | b | sum | count 
@@ -1157,11 +1159,12 @@ explain (costs off, timing off, summary off, analyze)
 --------------------------------------------------------
  MixedAggregate (actual rows=3 loops=1)
    Hash Key: a, b
+   Buckets: 256
    Group Key: ()
    Group Key: ()
    Group Key: ()
    ->  Seq Scan on gstest_empty (actual rows=0 loops=1)
-(6 rows)
+(7 rows)
 
 select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
  sum | count 
@@ -1202,9 +1205,11 @@ explain (costs off, timing off, summary off, analyze)
 ---------------------------------------------------
  HashAggregate (actual rows=4 loops=1)
    Hash Key: a, b
+   Buckets: 4 (originally 2)
    Hash Key: a, c
+   Buckets: 4 (originally 2)
    ->  Seq Scan on gstest3 (actual rows=2 loops=1)
-(4 rows)
+(6 rows)
 
 -- simple rescan tests
 select a, b, sum(v.x)
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index 94cf969..783c1da 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -306,7 +306,9 @@ explain (costs off, timing off, summary off, analyze)
                        ->  Seq Scan on tenk2 (actual rows=8990 loops=5)
                              Filter: (thousand > 100)
                              Rows Removed by Filter: 1010
-(11 rows)
+                     Buckets: 16384
+                     Null hashtable: Buckets: 1024
+(13 rows)
 
 select count(*) from tenk1 where (two, four) not in
 	(select hundred, thousand from tenk2 where thousand > 100);
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 55991c8..410daa0 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -791,7 +791,9 @@ select 'foo'::text in (select 'bar'::name union all select 'bar'::name);
      ->  Append (actual rows=2 loops=1)
            ->  Result (actual rows=1 loops=1)
            ->  Result (actual rows=1 loops=1)
-(5 rows)
+   Buckets: 4 (originally 2)
+   Null hashtable: Buckets: 2
+(7 rows)
 
 select 'foo'::text in (select 'bar'::name union all select 'bar'::name);
  ?column? 
@@ -999,7 +1001,9 @@ select * from int4_tbl where
    SubPlan 1
      ->  Index Only Scan using tenk1_unique1 on tenk1 a (actual rows=10000 loops=1)
            Heap Fetches: 0
-(8 rows)
+   Buckets: 16384
+   Null hashtable: Buckets: 2
+(10 rows)
 
 select * from int4_tbl where
   (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 5ac1477..65797ee 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -355,13 +355,14 @@ select count(*) from
  Aggregate (actual rows=1 loops=1)
    ->  Subquery Scan on ss (actual rows=5000 loops=1)
          ->  HashSetOp Intersect (actual rows=5000 loops=1)
+               Buckets: 8192
                ->  Append (actual rows=20000 loops=1)
                      ->  Subquery Scan on "*SELECT* 2" (actual rows=10000 loops=1)
                            ->  Seq Scan on tenk1 (actual rows=10000 loops=1)
                      ->  Subquery Scan on "*SELECT* 1" (actual rows=10000 loops=1)
                            ->  Index Only Scan using tenk1_unique1 on tenk1 tenk1_1 (actual rows=10000 loops=1)
                                  Heap Fetches: 0
-(9 rows)
+(10 rows)
 
 select count(*) from
   ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
@@ -586,12 +587,13 @@ select from generate_series(1,5) intersect select from generate_series(1,3);
                                           QUERY PLAN                                          
 ----------------------------------------------------------------------------------------------
  HashSetOp Intersect (actual rows=1 loops=1)
+   Buckets: 4 (originally 2)
    ->  Append (actual rows=8 loops=1)
          ->  Subquery Scan on "*SELECT* 1" (actual rows=5 loops=1)
                ->  Function Scan on generate_series (actual rows=5 loops=1)
          ->  Subquery Scan on "*SELECT* 2" (actual rows=3 loops=1)
                ->  Function Scan on generate_series generate_series_1 (actual rows=3 loops=1)
-(6 rows)
+(7 rows)
 
 select from generate_series(1,5) union select from generate_series(1,3);
 --
diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql
index 49d44e2..b8b1c8c 100644
--- a/src/test/regress/sql/select_parallel.sql
+++ b/src/test/regress/sql/select_parallel.sql
@@ -114,7 +114,7 @@ explain (costs off, timing off, summary off, analyze)
 select count(*) from tenk1 where (two, four) not in
 	(select hundred, thousand from tenk2 where thousand > 100);
 -- this is not parallel-safe due to use of random() within SubLink's testexpr:
-explain (costs off, timing off, summary off, analyze)
+explain (costs off)
 	select * from tenk1 where (unique1 + random())::integer not in
 	(select ten from tenk2);
 alter table tenk2 reset (parallel_workers);
-- 
2.7.4

