From c5c006322f4fb49b4da152ac8222babcb1e24651 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Thu, 4 Apr 2024 13:30:46 +0300
Subject: [PATCH v8 2/2] Refactorings on top of using streaming read API in
 ANALYZE

This patch includes a couple of refactorings on top of using streaming
read API in ANALYZE. So, this patch should be committed after
'Use streaming read API in ANALYZE' patch is committed.

Refactorings:

- heapam_scan_analyze_next_block() is inlined, its content is moved
  under a main loop in acquire_sample_rows(). Comments are updated
  regarding this change.

- heapam_scan_analyze_next_tuple() takes HeapScanDesc instead of
  TableScanDesc now.

- BlockSampler_HasMore() is removed, BlockSampler_Next() returns an
  InvalidBlockNumber if there are no remaining blocks or no blocks to
  sample.

Author: Melanie Plageman <melanieplageman@gmail.com>
---
 src/include/access/heapam.h              |  4 +-
 src/include/utils/sampling.h             |  1 -
 src/backend/access/heap/heapam_handler.c | 66 ++++++------------------
 src/backend/commands/analyze.c           | 33 +++++++++---
 src/backend/utils/misc/sampling.c        | 10 ++--
 5 files changed, 44 insertions(+), 70 deletions(-)

diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index fbadb9eeea1..8b5fcf4a4a9 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -389,9 +389,7 @@ extern bool HeapTupleIsSurelyDead(HeapTuple htup,
 								  struct GlobalVisState *vistest);
 
 /* in heap/heapam_handler.c*/
-extern bool heapam_scan_analyze_next_block(TableScanDesc scan,
-										   ReadStream *stream);
-extern bool heapam_scan_analyze_next_tuple(TableScanDesc scan,
+extern bool heapam_scan_analyze_next_tuple(HeapScanDesc scan,
 										   TransactionId OldestXmin,
 										   double *liverows, double *deadrows,
 										   TupleTableSlot *slot);
diff --git a/src/include/utils/sampling.h b/src/include/utils/sampling.h
index be48ee52bac..fb5d6820a24 100644
--- a/src/include/utils/sampling.h
+++ b/src/include/utils/sampling.h
@@ -38,7 +38,6 @@ typedef BlockSamplerData *BlockSampler;
 
 extern BlockNumber BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
 									 int samplesize, uint32 randseed);
-extern bool BlockSampler_HasMore(BlockSampler bs);
 extern BlockNumber BlockSampler_Next(BlockSampler bs);
 
 /* Reservoir sampling methods */
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index a32c69cf034..34d99e30c21 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -1054,55 +1054,19 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 }
 
 /*
- * Prepare to analyze block returned from streaming object.  If the block returned
- * from streaming object is valid, true is returned; otherwise false is returned.
- * The scan has been started with SO_TYPE_ANALYZE option.
- *
- * This routine holds a buffer pin and lock on the heap page.  They are held
- * until heapam_scan_analyze_next_tuple() returns false.  That is until all the
- * items of the heap page are analyzed.
- */
-bool
-heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
-{
-	HeapScanDesc hscan = (HeapScanDesc) scan;
-
-	/*
-	 * We must maintain a pin on the target page's buffer to ensure that
-	 * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
-	 * under us.  Hence, pin the page until we are done looking at it.  We
-	 * also choose to hold sharelock on the buffer throughout --- we could
-	 * release and re-acquire sharelock for each tuple, but since we aren't
-	 * doing much work per tuple, the extra lock traffic is probably better
-	 * avoided.
-	 */
-	hscan->rs_cbuf = read_stream_next_buffer(stream, NULL);
-	if (hscan->rs_cbuf == InvalidBuffer)
-		return false;
-
-	LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-	hscan->rs_cblock = BufferGetBlockNumber(hscan->rs_cbuf);
-	hscan->rs_cindex = FirstOffsetNumber;
-	return true;
-}
-
-/*
- * Iterate over tuples in the block selected with
- * heapam_scan_analyze_next_block().  If a tuple that's suitable for sampling
- * is found, true is returned and a tuple is stored in `slot`.  When no more
- * tuples for sampling, false is returned and the pin and lock acquired by
- * heapam_scan_analyze_next_block() are released.
+ * Iterate over tuples in the block selected to analyze.  If a tuple that's
+ * suitable for sampling is found, true is returned and a tuple is stored in
+ * `slot`.  When no more tuples for sampling, false is returned and the pin
+ * and lock on the current buffer in scan are released.
  *
  * *liverows and *deadrows are incremented according to the encountered
  * tuples.
  */
 bool
-heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
+heapam_scan_analyze_next_tuple(HeapScanDesc scan, TransactionId OldestXmin,
 							   double *liverows, double *deadrows,
 							   TupleTableSlot *slot)
 {
-	HeapScanDesc hscan = (HeapScanDesc) scan;
 	Page		targpage;
 	OffsetNumber maxoffset;
 	BufferHeapTupleTableSlot *hslot;
@@ -1110,17 +1074,17 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
 	Assert(TTS_IS_BUFFERTUPLE(slot));
 
 	hslot = (BufferHeapTupleTableSlot *) slot;
-	targpage = BufferGetPage(hscan->rs_cbuf);
+	targpage = BufferGetPage(scan->rs_cbuf);
 	maxoffset = PageGetMaxOffsetNumber(targpage);
 
 	/* Inner loop over all tuples on the selected page */
-	for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
+	for (; scan->rs_cindex <= maxoffset; scan->rs_cindex++)
 	{
 		ItemId		itemid;
 		HeapTuple	targtuple = &hslot->base.tupdata;
 		bool		sample_it = false;
 
-		itemid = PageGetItemId(targpage, hscan->rs_cindex);
+		itemid = PageGetItemId(targpage, scan->rs_cindex);
 
 		/*
 		 * We ignore unused and redirect line pointers.  DEAD line pointers
@@ -1135,14 +1099,14 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
 			continue;
 		}
 
-		ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
+		ItemPointerSet(&targtuple->t_self, scan->rs_cblock, scan->rs_cindex);
 
-		targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
+		targtuple->t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
 		targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
 		targtuple->t_len = ItemIdGetLength(itemid);
 
 		switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
-										 hscan->rs_cbuf))
+										 scan->rs_cbuf))
 		{
 			case HEAPTUPLE_LIVE:
 				sample_it = true;
@@ -1222,8 +1186,8 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
 
 		if (sample_it)
 		{
-			ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
-			hscan->rs_cindex++;
+			ExecStoreBufferHeapTuple(targtuple, slot, scan->rs_cbuf);
+			scan->rs_cindex++;
 
 			/* note that we leave the buffer locked here! */
 			return true;
@@ -1231,8 +1195,8 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
 	}
 
 	/* Now release the lock and pin on the page */
-	UnlockReleaseBuffer(hscan->rs_cbuf);
-	hscan->rs_cbuf = InvalidBuffer;
+	UnlockReleaseBuffer(scan->rs_cbuf);
+	scan->rs_cbuf = InvalidBuffer;
 
 	/* also prevent old slot contents from having pin on page */
 	ExecClearTuple(slot);
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 764520d5aa2..ae4f13be572 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -1111,9 +1111,7 @@ block_sampling_streaming_read_next(ReadStream *stream,
 								   void *user_data,
 								   void *per_buffer_data)
 {
-	BlockSamplerData *bs = user_data;
-
-	return BlockSampler_HasMore(bs) ? BlockSampler_Next(bs) : InvalidBlockNumber;
+	return BlockSampler_Next(user_data);
 }
 
 /*
@@ -1165,7 +1163,7 @@ acquire_sample_rows(Relation onerel, int elevel,
 	BlockSamplerData bs;
 	ReservoirStateData rstate;
 	TupleTableSlot *slot;
-	TableScanDesc scan;
+	HeapScanDesc scan;
 	BlockNumber nblocks;
 	BlockNumber blksdone = 0;
 	ReadStream *stream;
@@ -1188,12 +1186,12 @@ acquire_sample_rows(Relation onerel, int elevel,
 	/* Prepare for sampling rows */
 	reservoir_init_selection_state(&rstate, targrows);
 
-	scan = heap_beginscan(onerel, NULL, 0, NULL, NULL, SO_TYPE_ANALYZE);
+	scan = (HeapScanDesc) heap_beginscan(onerel, NULL, 0, NULL, NULL, SO_TYPE_ANALYZE);
 	slot = table_slot_create(onerel, NULL);
 
 	stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
 										vac_strategy,
-										scan->rs_rd,
+										onerel,
 										MAIN_FORKNUM,
 										block_sampling_streaming_read_next,
 										&bs,
@@ -1204,9 +1202,28 @@ acquire_sample_rows(Relation onerel, int elevel,
 	{
 		vacuum_delay_point();
 
-		if (!heapam_scan_analyze_next_block(scan, stream))
+		/*
+		 * We must maintain a pin on the target page's buffer to ensure that
+		 * concurrent activity - e.g. HOT pruning - doesn't delete tuples out
+		 * from under us.  Hence, pin the page until we are done looking at
+		 * it.
+		 */
+		scan->rs_cbuf = read_stream_next_buffer(stream, NULL);
+
+		if (!BufferIsValid(scan->rs_cbuf))
 			break;
 
+		/*
+		 * We choose to hold sharelock on the buffer throughout --- we could
+		 * release and re-acquire sharelock for each tuple, but since we
+		 * aren't doing much work per tuple, the extra lock traffic is
+		 * probably better avoided.
+		 */
+		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+		scan->rs_cblock = BufferGetBlockNumber(scan->rs_cbuf);
+		scan->rs_cindex = FirstOffsetNumber;
+
 		while (heapam_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
 		{
 			/*
@@ -1258,7 +1275,7 @@ acquire_sample_rows(Relation onerel, int elevel,
 	read_stream_end(stream);
 
 	ExecDropSingleTupleTableSlot(slot);
-	heap_endscan(scan);
+	heap_endscan(&scan->rs_base);
 
 	/*
 	 * If we didn't find as many tuples as we wanted then we're done. No sort
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c
index 933db06702c..cae89e331fb 100644
--- a/src/backend/utils/misc/sampling.c
+++ b/src/backend/utils/misc/sampling.c
@@ -54,12 +54,6 @@ BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
 	return Min(bs->n, bs->N);
 }
 
-bool
-BlockSampler_HasMore(BlockSampler bs)
-{
-	return (bs->t < bs->N) && (bs->m < bs->n);
-}
-
 BlockNumber
 BlockSampler_Next(BlockSampler bs)
 {
@@ -68,7 +62,9 @@ BlockSampler_Next(BlockSampler bs)
 	double		p;				/* probability to skip block */
 	double		V;				/* random */
 
-	Assert(BlockSampler_HasMore(bs));	/* hence K > 0 and k > 0 */
+	/* no remaining blocks or no blocks to sample */
+	if (K <= 0 || k <= 0)
+		return InvalidBlockNumber;
 
 	if ((BlockNumber) k >= K)
 	{
-- 
2.43.0

