From 0b431ec837f00f852e28b7b5e8e59dbdf61af27f Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Tue, 28 Nov 2023 18:38:32 +0100
Subject: [PATCH v20231128 3/4] use per-range memory context for merging in
 leader

---
 src/backend/access/brin/brin.c | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 0c6ca1ac18c..8d96d2ac9be 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -2475,6 +2475,8 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
 	BrinTuple  *emptyTuple = NULL;
 	Size		emptySize;
 	BrinSpool  *spool;
+	MemoryContext	rangeCxt,
+					oldCxt;
 
 	/* Shutdown worker processes */
 	WaitForParallelWorkersToFinish(brinleader->pcxt);
@@ -2496,6 +2498,19 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
 	 */
 	memtuple = brin_new_memtuple(state->bs_bdesc);
 
+	/*
+	 * Create a memory context we'll reset to combine results for a single
+	 * page range (received from the workers). We don't expect huge number
+	 * of overlaps under regular circumstances, because for large tables
+	 * the chunk size is likely larger than the BRIN page range), but it
+	 * can happen, and the union functions may do all kinds of stuff. So
+	 * we better reset the context once in a while.
+	 */
+	rangeCxt = AllocSetContextCreate(CurrentMemoryContext,
+									 "brin union",
+									 ALLOCSET_DEFAULT_SIZES);
+	oldCxt = MemoryContextSwitchTo(rangeCxt);
+
 	/*
 	 * Read the BRIN tuples from the shared tuplesort, sorted by block number.
 	 * That probably gives us an index that is cheaper to scan, thanks to mostly
@@ -2555,8 +2570,12 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
 			brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
 						  &state->bs_currentInsertBuf, tmp->bt_blkno, tmp, len);
 
-			/* free the formed on-disk tuple */
-			pfree(tmp);
+			/*
+			 * Reset the per-output-range context. This frees all the memory
+			 * possibly allocated by the union functions, and also the BRIN
+			 * tuple we just formed and inserted.
+			 */
+			MemoryContextReset(rangeCxt);
 
 			memtuple = brin_deform_tuple(state->bs_bdesc, btup, memtuple);
 
@@ -2593,7 +2612,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
 
 	tuplesort_end(spool->sortstate);
 
-	/* Fill empty ranges for all ranges missing in the tuplesort. */
+	/* Fill empty ranges at the end, for all ranges missing in the tuplesort. */
 	prevblkno = (prevblkno == InvalidBlockNumber) ? 0 : prevblkno;
 	while (prevblkno + state->bs_pagesPerRange < memtuple->bt_blkno)
 	{
@@ -2618,7 +2637,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
 					  emptyTuple->bt_blkno, emptyTuple, emptySize);
 	}
 
-	/**/
+	/* Fill the BRIN tuple for the last page range. */
 	if (prevblkno != InvalidBlockNumber)
 	{
 		BrinTuple  *tmp;
@@ -2633,6 +2652,13 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
 		pfree(tmp);
 	}
 
+	/*
+	 * Switch back to the originam memory context, and destroy the one we
+	 * created to isolate the union_tuple calls.
+	 */
+	MemoryContextSwitchTo(oldCxt);
+	MemoryContextDelete(rangeCxt);
+
 	/*
 	 * Next, accumulate WAL usage.  (This must wait for the workers to finish,
 	 * or we might get incomplete data.)
-- 
2.42.0

