From d75cfac7a5ca380ad86569a65b678390b9768d4e Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 3 Apr 2024 16:28:46 -0400
Subject: [PATCH v7 2/2] some ideas

---
 src/backend/access/heap/heapam_handler.c | 57 +++++-------------------
 src/backend/commands/analyze.c           | 21 +++++----
 src/backend/utils/misc/sampling.c        |  9 +---
 src/include/access/heapam.h              |  4 +-
 src/include/utils/sampling.h             |  1 -
 5 files changed, 27 insertions(+), 65 deletions(-)

diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index d83fbbe6af3..aab392f50e5 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -1053,40 +1053,6 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 	pfree(isnull);
 }
 
-/*
- * Prepare to analyze block returned from streaming object.  If the block returned
- * from streaming object is valid, true is returned; otherwise false is returned.
- * The scan has been started with SO_TYPE_ANALYZE option.
- *
- * This routine holds a buffer pin and lock on the heap page.  They are held
- * until heapam_scan_analyze_next_tuple() returns false.  That is until all the
- * items of the heap page are analyzed.
- */
-bool
-heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
-{
-	HeapScanDesc hscan = (HeapScanDesc) scan;
-
-	/*
-	 * We must maintain a pin on the target page's buffer to ensure that
-	 * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
-	 * under us.  Hence, pin the page until we are done looking at it.  We
-	 * also choose to hold sharelock on the buffer throughout --- we could
-	 * release and re-acquire sharelock for each tuple, but since we aren't
-	 * doing much work per tuple, the extra lock traffic is probably better
-	 * avoided.
-	 */
-	hscan->rs_cbuf = read_stream_next_buffer(stream, NULL);
-	if (hscan->rs_cbuf == InvalidBuffer)
-		return false;
-
-	LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-	hscan->rs_cblock = BufferGetBlockNumber(hscan->rs_cbuf);
-	hscan->rs_cindex = FirstOffsetNumber;
-	return true;
-}
-
 /*
  * Iterate over tuples in the block selected with
  * heapam_scan_analyze_next_block().  If a tuple that's suitable for sampling
@@ -1098,11 +1064,10 @@ heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
  * tuples.
  */
 bool
-heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
+heapam_scan_analyze_next_tuple(HeapScanDesc scan, TransactionId OldestXmin,
 							   double *liverows, double *deadrows,
 							   TupleTableSlot *slot)
 {
-	HeapScanDesc hscan = (HeapScanDesc) scan;
 	Page		targpage;
 	OffsetNumber maxoffset;
 	BufferHeapTupleTableSlot *hslot;
@@ -1110,17 +1075,17 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
 	Assert(TTS_IS_BUFFERTUPLE(slot));
 
 	hslot = (BufferHeapTupleTableSlot *) slot;
-	targpage = BufferGetPage(hscan->rs_cbuf);
+	targpage = BufferGetPage(scan->rs_cbuf);
 	maxoffset = PageGetMaxOffsetNumber(targpage);
 
 	/* Inner loop over all tuples on the selected page */
-	for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
+	for (; scan->rs_cindex <= maxoffset; scan->rs_cindex++)
 	{
 		ItemId		itemid;
 		HeapTuple	targtuple = &hslot->base.tupdata;
 		bool		sample_it = false;
 
-		itemid = PageGetItemId(targpage, hscan->rs_cindex);
+		itemid = PageGetItemId(targpage, scan->rs_cindex);
 
 		/*
 		 * We ignore unused and redirect line pointers.  DEAD line pointers
@@ -1135,14 +1100,14 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
 			continue;
 		}
 
-		ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
+		ItemPointerSet(&targtuple->t_self, scan->rs_cblock, scan->rs_cindex);
 
-		targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
+		targtuple->t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
 		targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
 		targtuple->t_len = ItemIdGetLength(itemid);
 
 		switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
-										 hscan->rs_cbuf))
+										 scan->rs_cbuf))
 		{
 			case HEAPTUPLE_LIVE:
 				sample_it = true;
@@ -1222,8 +1187,8 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
 
 		if (sample_it)
 		{
-			ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
-			hscan->rs_cindex++;
+			ExecStoreBufferHeapTuple(targtuple, slot, scan->rs_cbuf);
+			scan->rs_cindex++;
 
 			/* note that we leave the buffer locked here! */
 			return true;
@@ -1231,8 +1196,8 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
 	}
 
 	/* Now release the lock and pin on the page */
-	UnlockReleaseBuffer(hscan->rs_cbuf);
-	hscan->rs_cbuf = InvalidBuffer;
+	UnlockReleaseBuffer(scan->rs_cbuf);
+	scan->rs_cbuf = InvalidBuffer;
 
 	/* also prevent old slot contents from having pin on page */
 	ExecClearTuple(slot);
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 764520d5aa2..7002ece8be8 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -1111,9 +1111,7 @@ block_sampling_streaming_read_next(ReadStream *stream,
 								   void *user_data,
 								   void *per_buffer_data)
 {
-	BlockSamplerData *bs = user_data;
-
-	return BlockSampler_HasMore(bs) ? BlockSampler_Next(bs) : InvalidBlockNumber;
+	return BlockSampler_Next(user_data);
 }
 
 /*
@@ -1165,7 +1163,7 @@ acquire_sample_rows(Relation onerel, int elevel,
 	BlockSamplerData bs;
 	ReservoirStateData rstate;
 	TupleTableSlot *slot;
-	TableScanDesc scan;
+	HeapScanDesc scan;
 	BlockNumber nblocks;
 	BlockNumber blksdone = 0;
 	ReadStream *stream;
@@ -1188,12 +1186,12 @@ acquire_sample_rows(Relation onerel, int elevel,
 	/* Prepare for sampling rows */
 	reservoir_init_selection_state(&rstate, targrows);
 
-	scan = heap_beginscan(onerel, NULL, 0, NULL, NULL, SO_TYPE_ANALYZE);
+	scan = (HeapScanDesc) heap_beginscan(onerel, NULL, 0, NULL, NULL, SO_TYPE_ANALYZE);
 	slot = table_slot_create(onerel, NULL);
 
 	stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
 										vac_strategy,
-										scan->rs_rd,
+										onerel,
 										MAIN_FORKNUM,
 										block_sampling_streaming_read_next,
 										&bs,
@@ -1204,9 +1202,16 @@ acquire_sample_rows(Relation onerel, int elevel,
 	{
 		vacuum_delay_point();
 
-		if (!heapam_scan_analyze_next_block(scan, stream))
+		scan->rs_cbuf = read_stream_next_buffer(stream, NULL);
+
+		if (!BufferIsValid(scan->rs_cbuf))
 			break;
 
+		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+		scan->rs_cblock = BufferGetBlockNumber(scan->rs_cbuf);
+		scan->rs_cindex = FirstOffsetNumber;
+
 		while (heapam_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
 		{
 			/*
@@ -1258,7 +1263,7 @@ acquire_sample_rows(Relation onerel, int elevel,
 	read_stream_end(stream);
 
 	ExecDropSingleTupleTableSlot(slot);
-	heap_endscan(scan);
+	heap_endscan(&scan->rs_base);
 
 	/*
 	 * If we didn't find as many tuples as we wanted then we're done. No sort
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c
index 933db06702c..69b73f5b115 100644
--- a/src/backend/utils/misc/sampling.c
+++ b/src/backend/utils/misc/sampling.c
@@ -54,12 +54,6 @@ BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
 	return Min(bs->n, bs->N);
 }
 
-bool
-BlockSampler_HasMore(BlockSampler bs)
-{
-	return (bs->t < bs->N) && (bs->m < bs->n);
-}
-
 BlockNumber
 BlockSampler_Next(BlockSampler bs)
 {
@@ -68,7 +62,8 @@ BlockSampler_Next(BlockSampler bs)
 	double		p;				/* probability to skip block */
 	double		V;				/* random */
 
-	Assert(BlockSampler_HasMore(bs));	/* hence K > 0 and k > 0 */
+	if (bs->t >= bs->N || bs->m >= bs->n)
+		return InvalidBlockNumber;
 
 	if ((BlockNumber) k >= K)
 	{
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 633caee9d95..f76dd18ad5d 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -389,9 +389,7 @@ extern bool HeapTupleIsSurelyDead(HeapTuple htup,
 								  struct GlobalVisState *vistest);
 
 /* in heap/heapam_handler.c*/
-extern bool heapam_scan_analyze_next_block(TableScanDesc scan,
-										   ReadStream *stream);
-extern bool heapam_scan_analyze_next_tuple(TableScanDesc scan,
+extern bool heapam_scan_analyze_next_tuple(HeapScanDesc scan,
 										   TransactionId OldestXmin,
 										   double *liverows, double *deadrows,
 										   TupleTableSlot *slot);
diff --git a/src/include/utils/sampling.h b/src/include/utils/sampling.h
index be48ee52bac..fb5d6820a24 100644
--- a/src/include/utils/sampling.h
+++ b/src/include/utils/sampling.h
@@ -38,7 +38,6 @@ typedef BlockSamplerData *BlockSampler;
 
 extern BlockNumber BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
 									 int samplesize, uint32 randseed);
-extern bool BlockSampler_HasMore(BlockSampler bs);
 extern BlockNumber BlockSampler_Next(BlockSampler bs);
 
 /* Reservoir sampling methods */
-- 
2.40.1

