From eac1ce2a3b49e0fa54d97695a599c66538407f7a Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Thu, 7 Nov 2024 20:15:44 -0800
Subject: [PATCH v2] HashAgg: free hash table memory each iteration.

---
 src/backend/executor/execGrouping.c       | 22 ++++++++++++++---
 src/backend/executor/nodeAgg.c            | 30 +++++++++++++++++++++--
 src/backend/executor/nodeRecursiveunion.c |  2 +-
 src/backend/executor/nodeSetOp.c          |  2 +-
 src/backend/executor/nodeSubplan.c        |  4 +--
 src/include/executor/executor.h           |  2 +-
 6 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 774e4de882..dd5c3e852a 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -279,13 +279,27 @@ BuildTupleHashTable(PlanState *parent,
 
 /*
  * Reset contents of the hashtable to be empty, preserving all the non-content
- * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
- * also be reset, otherwise there will be leaks.
+ * state. If nbuckets is -1, or if the bucket array is already the right size,
+ * then preserve it; otherwise rebuild it. Note that the tablecxt passed to
+ * BuildTupleHashTableExt() should also be reset, otherwise there will be
+ * leaks.
  */
 void
-ResetTupleHashTable(TupleHashTable hashtable)
+ResetTupleHashTable(TupleHashTable hashtable, long nbuckets)
 {
-	tuplehash_reset(hashtable->hashtab);
+	tuplehash_hash *hashtab = hashtable->hashtab;
+
+	if (nbuckets < 0 || nbuckets == hashtable->hashtab->size)
+	{
+		tuplehash_reset(hashtab);
+	}
+	else
+	{
+		MemoryContext metacxt = hashtab->ctx;
+
+		tuplehash_destroy(hashtab);
+		hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
+	}
 }
 
 /*
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 53ead77ece..40900a89bd 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1477,7 +1477,7 @@ build_hash_tables(AggState *aggstate)
 
 		if (perhash->hashtable != NULL)
 		{
-			ResetTupleHashTable(perhash->hashtable);
+			ResetTupleHashTable(perhash->hashtable, -1);
 			continue;
 		}
 
@@ -2623,7 +2623,33 @@ agg_refill_hash_table(AggState *aggstate)
 	/* free memory and reset hash tables */
 	ReScanExprContext(aggstate->hashcontext);
 	for (int setno = 0; setno < aggstate->num_hashes; setno++)
-		ResetTupleHashTable(aggstate->perhash[setno].hashtable);
+	{
+		TupleHashTable hashtable = aggstate->perhash[setno].hashtable;
+
+		if (setno == batch->setno)
+		{
+			/*
+			 * Recalculate nbuckets for this batch. This is important when a
+			 * previous iteration caused the number of buckets to grow to an
+			 * unexpectedly large value, crowding out space for the tuples.
+			 */
+			long		nbuckets;
+
+			nbuckets = hash_choose_num_buckets(aggstate->hashentrysize,
+											   batch->input_card,
+											   aggstate->hash_mem_limit);
+
+			ResetTupleHashTable(hashtable, nbuckets);
+		}
+		else
+		{
+			/*
+			 * Free memory but leave metadata intact by setting nbuckets to
+			 * minimal value.
+			 */
+			ResetTupleHashTable(hashtable, 1);
+		}
+	}
 
 	aggstate->hash_ngroups_current = 0;
 
diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c
index 22e7b83b2e..0a67083079 100644
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@@ -328,7 +328,7 @@ ExecReScanRecursiveUnion(RecursiveUnionState *node)
 
 	/* Empty hashtable if needed */
 	if (plan->numCols > 0)
-		ResetTupleHashTable(node->hashtable);
+		ResetTupleHashTable(node->hashtable, -1);
 
 	/* reset processing state */
 	node->recursing = false;
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index a8ac68b482..e9a0bdd36d 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -636,7 +636,7 @@ ExecReScanSetOp(SetOpState *node)
 	/* And rebuild empty hashtable if needed */
 	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
 	{
-		ResetTupleHashTable(node->hashtable);
+		ResetTupleHashTable(node->hashtable, -1);
 		node->table_filled = false;
 	}
 
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 236222d72a..792c3e2494 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -529,7 +529,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 		nbuckets = 1;
 
 	if (node->hashtable)
-		ResetTupleHashTable(node->hashtable);
+		ResetTupleHashTable(node->hashtable, -1);
 	else
 		node->hashtable = BuildTupleHashTableExt(node->parent,
 												 node->descRight,
@@ -557,7 +557,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 		}
 
 		if (node->hashnulls)
-			ResetTupleHashTable(node->hashnulls);
+			ResetTupleHashTable(node->hashnulls, -1);
 		else
 			node->hashnulls = BuildTupleHashTableExt(node->parent,
 													 node->descRight,
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 69c3ebff00..f210861ec8 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -161,7 +161,7 @@ extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
 										 TupleTableSlot *slot,
 										 ExprState *eqcomp,
 										 FmgrInfo *hashfunctions);
-extern void ResetTupleHashTable(TupleHashTable hashtable);
+extern void ResetTupleHashTable(TupleHashTable hashtable, long nbuckets);
 
 /*
  * prototypes from functions in execJunk.c
-- 
2.34.1

