From 5ccc64748fc60e10fdceebf4430b826bac81731a Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Tue, 13 Feb 2024 10:17:47 -0500
Subject: [PATCH v20 3/3] Make table_scan_bitmap_next_block() async-friendly

table_scan_bitmap_next_block() previously returned false when
table_scan_bitmap_next_tuple() should not be called for the tuples on
the page. This could happen when there were no visible tuples on the
page or when, due to concurrent activity on the table, the block
returned by the iterator is past the end of the table (as recorded when
the scan started).

That forced the caller to be responsible for determining if additional
blocks should be fetched and for invoking table_scan_bitmap_next_block()
for these blocks.

It makes more sense for table_scan_bitmap_next_block() to be responsible
for finding a block that is not past the end of the table (as of the
beginning of the scan) and for table_scan_bitmap_next_tuple() to return
false if there are no visible tuples on the page.

This also allows us to move responsibility for the iterator to table AM
specific code. This means handling invalid blocks is entirely up to the
table AM.

Author: Melanie Plageman
Reviewed-by: Tomas Vondra, Andres Freund, Heikki Linnakangas
Discussion: https://postgr.es/m/CAAKRu_ZwCwWFeL_H3ia26bP2e7HiKLWt0ZmGXPVwPO6uXq0vaA%40mail.gmail.com
---
 src/backend/access/heap/heapam_handler.c  |  58 +++++--
 src/backend/executor/nodeBitmapHeapscan.c | 198 ++++++++++------------
 src/include/access/relscan.h              |   7 +
 src/include/access/tableam.h              |  68 +++++---
 src/include/nodes/execnodes.h             |  12 +-
 5 files changed, 199 insertions(+), 144 deletions(-)

diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index f99e6efd757..903cb80157e 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -2188,18 +2188,52 @@ heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
 
 static bool
 heapam_scan_bitmap_next_block(TableScanDesc scan,
-							  TBMIterateResult *tbmres,
+							  BlockNumber *blockno, bool *recheck,
 							  long *lossy_pages, long *exact_pages)
 {
 	HeapScanDesc hscan = (HeapScanDesc) scan;
-	BlockNumber block = tbmres->blockno;
+	BlockNumber block;
 	Buffer		buffer;
 	Snapshot	snapshot;
 	int			ntup;
+	TBMIterateResult *tbmres;
 
 	hscan->rs_cindex = 0;
 	hscan->rs_ntuples = 0;
 
+	*blockno = InvalidBlockNumber;
+	*recheck = true;
+
+	do
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		if (scan->shared_tbmiterator)
+			tbmres = tbm_shared_iterate(scan->shared_tbmiterator);
+		else
+			tbmres = tbm_iterate(scan->tbmiterator);
+
+		if (tbmres == NULL)
+		{
+			/* no more entries in the bitmap */
+			Assert(hscan->rs_empty_tuples_pending == 0);
+			return false;
+		}
+
+		/*
+		 * Ignore any claimed entries past what we think is the end of the
+		 * relation. It may have been extended after the start of our scan (we
+		 * only hold an AccessShareLock, and it could be inserts from this
+		 * backend).  We don't take this optimization in SERIALIZABLE
+		 * isolation though, as we need to examine all invisible tuples
+		 * reachable by the index.
+		 */
+	} while (!IsolationIsSerializable() && tbmres->blockno >= hscan->rs_nblocks);
+
+	/* Got a valid block */
+	*blockno = tbmres->blockno;
+	*recheck = tbmres->recheck;
+
 	/*
 	 * We can skip fetching the heap page if we don't need any fields from the
 	 * heap, the bitmap entries don't need rechecking, and all tuples on the
@@ -2218,16 +2252,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 		return true;
 	}
 
-	/*
-	 * Ignore any claimed entries past what we think is the end of the
-	 * relation. It may have been extended after the start of our scan (we
-	 * only hold an AccessShareLock, and it could be inserts from this
-	 * backend).  We don't take this optimization in SERIALIZABLE isolation
-	 * though, as we need to examine all invisible tuples reachable by the
-	 * index.
-	 */
-	if (!IsolationIsSerializable() && block >= hscan->rs_nblocks)
-		return false;
+	block = tbmres->blockno;
 
 	/*
 	 * Acquire pin on the target heap page, trading in any pin we held before.
@@ -2322,7 +2347,14 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 	else
 		(*exact_pages)++;
 
-	return ntup > 0;
+	/*
+	 * Return true to indicate that a valid block was found and the bitmap is
+	 * not exhausted. If there are no visible tuples on this page,
+	 * hscan->rs_ntuples will be 0 and heapam_scan_bitmap_next_tuple() will
+	 * return false returning control to this function to advance to the next
+	 * block in the bitmap.
+	 */
+	return true;
 }
 
 static bool
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index e63f6f0429a..01703bd4865 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -51,8 +51,7 @@
 
 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
 static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
-static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
-												BlockNumber blockno);
+static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node);
 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
 static inline void BitmapPrefetch(BitmapHeapScanState *node,
 								  TableScanDesc scan);
@@ -71,9 +70,6 @@ BitmapHeapNext(BitmapHeapScanState *node)
 	ExprContext *econtext;
 	TableScanDesc scan;
 	TIDBitmap  *tbm;
-	TBMIterator *tbmiterator = NULL;
-	TBMSharedIterator *shared_tbmiterator = NULL;
-	TBMIterateResult *tbmres;
 	TupleTableSlot *slot;
 	ParallelBitmapHeapState *pstate = node->pstate;
 	dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
@@ -85,11 +81,6 @@ BitmapHeapNext(BitmapHeapScanState *node)
 	slot = node->ss.ss_ScanTupleSlot;
 	scan = node->ss.ss_currentScanDesc;
 	tbm = node->tbm;
-	if (pstate == NULL)
-		tbmiterator = node->tbmiterator;
-	else
-		shared_tbmiterator = node->shared_tbmiterator;
-	tbmres = node->tbmres;
 
 	/*
 	 * If we haven't yet performed the underlying index scan, do it, and begin
@@ -105,6 +96,9 @@ BitmapHeapNext(BitmapHeapScanState *node)
 	 */
 	if (!node->initialized)
 	{
+		TBMIterator *tbmiterator = NULL;
+		TBMSharedIterator *shared_tbmiterator = NULL;
+
 		if (!pstate)
 		{
 			tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
@@ -113,8 +107,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
 				elog(ERROR, "unrecognized result from subplan");
 
 			node->tbm = tbm;
-			node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
-			node->tbmres = tbmres = NULL;
+			tbmiterator = tbm_begin_iterate(tbm);
 
 #ifdef USE_PREFETCH
 			if (node->prefetch_maximum > 0)
@@ -166,9 +159,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
 			}
 
 			/* Allocate a private iterator and attach the shared state to it */
-			node->shared_tbmiterator = shared_tbmiterator =
-				tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
-			node->tbmres = tbmres = NULL;
+			shared_tbmiterator = tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
 
 #ifdef USE_PREFETCH
 			if (node->prefetch_maximum > 0)
@@ -207,46 +198,23 @@ BitmapHeapNext(BitmapHeapScanState *node)
 			node->ss.ss_currentScanDesc = scan;
 		}
 
+		scan->tbmiterator = tbmiterator;
+		scan->shared_tbmiterator = shared_tbmiterator;
 		node->initialized = true;
+
+		goto new_page;
 	}
 
 	for (;;)
 	{
-		CHECK_FOR_INTERRUPTS();
-
-		/*
-		 * Get next page of results if needed
-		 */
-		if (tbmres == NULL)
-		{
-			if (!pstate)
-				node->tbmres = tbmres = tbm_iterate(tbmiterator);
-			else
-				node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
-			if (tbmres == NULL)
-			{
-				/* no more entries in the bitmap */
-				break;
-			}
-
-			BitmapAdjustPrefetchIterator(node, tbmres->blockno);
-
-			if (!table_scan_bitmap_next_block(scan, tbmres,
-											  &node->lossy_pages, &node->exact_pages))
-			{
-				/* AM doesn't think this block is valid, skip */
-				continue;
-			}
-
-			/* Adjust the prefetch target */
-			BitmapAdjustPrefetchTarget(node);
-		}
-		else
+		while (table_scan_bitmap_next_tuple(scan, slot))
 		{
 			/*
 			 * Continuing in previously obtained page.
 			 */
 
+			CHECK_FOR_INTERRUPTS();
+
 #ifdef USE_PREFETCH
 
 			/*
@@ -267,45 +235,56 @@ BitmapHeapNext(BitmapHeapScanState *node)
 				SpinLockRelease(&pstate->mutex);
 			}
 #endif							/* USE_PREFETCH */
-		}
 
-		/*
-		 * We issue prefetch requests *after* fetching the current page to try
-		 * to avoid having prefetching interfere with the main I/O. Also, this
-		 * should happen only when we have determined there is still something
-		 * to do on the current page, else we may uselessly prefetch the same
-		 * page we are just about to request for real.
-		 */
-		BitmapPrefetch(node, scan);
+			/*
+			 * We issue prefetch requests *after* fetching the current page to
+			 * try to avoid having prefetching interfere with the main I/O.
+			 * Also, this should happen only when we have determined there is
+			 * still something to do on the current page, else we may
+			 * uselessly prefetch the same page we are just about to request
+			 * for real.
+			 */
+			BitmapPrefetch(node, scan);
 
-		/*
-		 * Attempt to fetch tuple from AM.
-		 */
-		if (!table_scan_bitmap_next_tuple(scan, slot))
-		{
-			/* nothing more to look at on this page */
-			node->tbmres = tbmres = NULL;
-			continue;
+			/*
+			 * If we are using lossy info, we have to recheck the qual
+			 * conditions at every tuple.
+			 */
+			if (node->recheck)
+			{
+				econtext->ecxt_scantuple = slot;
+				if (!ExecQualAndReset(node->bitmapqualorig, econtext))
+				{
+					/* Fails recheck, so drop it and loop back for another */
+					InstrCountFiltered2(node, 1);
+					ExecClearTuple(slot);
+					continue;
+				}
+			}
+
+			/* OK to return this tuple */
+			return slot;
 		}
 
+new_page:
+
+		BitmapAdjustPrefetchIterator(node);
+
+		if (!table_scan_bitmap_next_block(scan, &node->blockno, &node->recheck,
+										  &node->lossy_pages, &node->exact_pages))
+			break;
+
 		/*
-		 * If we are using lossy info, we have to recheck the qual conditions
-		 * at every tuple.
+		 * If serial, we can error out if the the prefetch block doesn't stay
+		 * ahead of the current block.
 		 */
-		if (tbmres->recheck)
-		{
-			econtext->ecxt_scantuple = slot;
-			if (!ExecQualAndReset(node->bitmapqualorig, econtext))
-			{
-				/* Fails recheck, so drop it and loop back for another */
-				InstrCountFiltered2(node, 1);
-				ExecClearTuple(slot);
-				continue;
-			}
-		}
+		if (node->pstate == NULL &&
+			node->prefetch_iterator &&
+			node->pfblockno < node->blockno)
+			elog(ERROR, "prefetch and main iterators are out of sync");
 
-		/* OK to return this tuple */
-		return slot;
+		/* Adjust the prefetch target */
+		BitmapAdjustPrefetchTarget(node);
 	}
 
 	/*
@@ -331,13 +310,17 @@ BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
 
 /*
  *	BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
+ *
+ *	We keep track of how far the prefetch iterator is ahead of the main
+ *	iterator in prefetch_pages. For each block the main iterator returns, we
+ *	decrement prefetch_pages.
  */
 static inline void
-BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
-							 BlockNumber blockno)
+BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
 {
 #ifdef USE_PREFETCH
 	ParallelBitmapHeapState *pstate = node->pstate;
+	TBMIterateResult *tbmpre;
 
 	if (pstate == NULL)
 	{
@@ -351,14 +334,21 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
 		else if (prefetch_iterator)
 		{
 			/* Do not let the prefetch iterator get behind the main one */
-			TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
-
-			if (tbmpre == NULL || tbmpre->blockno != blockno)
-				elog(ERROR, "prefetch and main iterators are out of sync");
+			tbmpre = tbm_iterate(prefetch_iterator);
+			node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber;
 		}
 		return;
 	}
 
+	/*
+	 * XXX: There is a known issue with keeping the prefetch and current block
+	 * iterators in sync for parallel bitmapheapscans. This can lead to
+	 * prefetching blocks that have already been read. See the discussion
+	 * here:
+	 * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de
+	 * Note that moving the call site of BitmapAdjustPrefetchIterator()
+	 * exacerbates the effects of this bug.
+	 */
 	if (node->prefetch_maximum > 0)
 	{
 		TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
@@ -383,7 +373,10 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
 			 * case.
 			 */
 			if (prefetch_iterator)
-				tbm_shared_iterate(prefetch_iterator);
+			{
+				tbmpre = tbm_shared_iterate(prefetch_iterator);
+				node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber;
+			}
 		}
 	}
 #endif							/* USE_PREFETCH */
@@ -461,6 +454,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
 					break;
 				}
 				node->prefetch_pages++;
+				node->pfblockno = tbmpre->blockno;
 
 				/*
 				 * If we expect not to have to actually read this heap page,
@@ -518,6 +512,8 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
 					break;
 				}
 
+				node->pfblockno = tbmpre->blockno;
+
 				/* As above, skip prefetch if we expect not to need page */
 				skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
 							  !tbmpre->recheck &&
@@ -579,12 +575,8 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
 		table_rescan(node->ss.ss_currentScanDesc, NULL);
 
 	/* release bitmaps and buffers if any */
-	if (node->tbmiterator)
-		tbm_end_iterate(node->tbmiterator);
 	if (node->prefetch_iterator)
 		tbm_end_iterate(node->prefetch_iterator);
-	if (node->shared_tbmiterator)
-		tbm_end_shared_iterate(node->shared_tbmiterator);
 	if (node->shared_prefetch_iterator)
 		tbm_end_shared_iterate(node->shared_prefetch_iterator);
 	if (node->tbm)
@@ -592,13 +584,13 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
 	if (node->pvmbuffer != InvalidBuffer)
 		ReleaseBuffer(node->pvmbuffer);
 	node->tbm = NULL;
-	node->tbmiterator = NULL;
-	node->tbmres = NULL;
 	node->prefetch_iterator = NULL;
 	node->initialized = false;
-	node->shared_tbmiterator = NULL;
 	node->shared_prefetch_iterator = NULL;
 	node->pvmbuffer = InvalidBuffer;
+	node->recheck = true;
+	node->blockno = InvalidBlockNumber;
+	node->pfblockno = InvalidBlockNumber;
 
 	ExecScanReScan(&node->ss);
 
@@ -629,28 +621,24 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 	 */
 	ExecEndNode(outerPlanState(node));
 
+
+	/*
+	 * close heap scan
+	 */
+	if (scanDesc)
+		table_endscan(scanDesc);
+
 	/*
 	 * release bitmaps and buffers if any
 	 */
-	if (node->tbmiterator)
-		tbm_end_iterate(node->tbmiterator);
 	if (node->prefetch_iterator)
 		tbm_end_iterate(node->prefetch_iterator);
 	if (node->tbm)
 		tbm_free(node->tbm);
-	if (node->shared_tbmiterator)
-		tbm_end_shared_iterate(node->shared_tbmiterator);
 	if (node->shared_prefetch_iterator)
 		tbm_end_shared_iterate(node->shared_prefetch_iterator);
 	if (node->pvmbuffer != InvalidBuffer)
 		ReleaseBuffer(node->pvmbuffer);
-
-	/*
-	 * close heap scan
-	 */
-	if (scanDesc)
-		table_endscan(scanDesc);
-
 }
 
 /* ----------------------------------------------------------------
@@ -683,8 +671,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
 
 	scanstate->tbm = NULL;
-	scanstate->tbmiterator = NULL;
-	scanstate->tbmres = NULL;
 	scanstate->pvmbuffer = InvalidBuffer;
 	scanstate->exact_pages = 0;
 	scanstate->lossy_pages = 0;
@@ -692,9 +678,11 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->prefetch_pages = 0;
 	scanstate->prefetch_target = 0;
 	scanstate->initialized = false;
-	scanstate->shared_tbmiterator = NULL;
 	scanstate->shared_prefetch_iterator = NULL;
 	scanstate->pstate = NULL;
+	scanstate->recheck = true;
+	scanstate->blockno = InvalidBlockNumber;
+	scanstate->pfblockno = InvalidBlockNumber;
 
 	/*
 	 * Miscellaneous initialization
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 521043304ab..024dc08c420 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -24,6 +24,9 @@
 
 struct ParallelTableScanDescData;
 
+struct TBMIterator;
+struct TBMSharedIterator;
+
 /*
  * Generic descriptor for table scans. This is the base-class for table scans,
  * which needs to be embedded in the scans of individual AMs.
@@ -36,6 +39,10 @@ typedef struct TableScanDescData
 	int			rs_nkeys;		/* number of scan keys */
 	struct ScanKeyData *rs_key; /* array of scan key descriptors */
 
+	/* Iterators for Bitmap Table Scans */
+	struct TBMIterator *tbmiterator;
+	struct TBMSharedIterator *shared_tbmiterator;
+
 	/* Range of ItemPointers for table_scan_getnextslot_tidrange() to scan. */
 	ItemPointerData rs_mintid;
 	ItemPointerData rs_maxtid;
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 697b5488b10..68a479496d7 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -22,6 +22,7 @@
 #include "access/xact.h"
 #include "commands/vacuum.h"
 #include "executor/tuptable.h"
+#include "nodes/tidbitmap.h"
 #include "utils/rel.h"
 #include "utils/snapshot.h"
 
@@ -773,19 +774,14 @@ typedef struct TableAmRoutine
 	 */
 
 	/*
-	 * Prepare to fetch / check / return tuples from `tbmres->blockno` as part
-	 * of a bitmap table scan. `scan` was started via table_beginscan_bm().
-	 * Return false if there are no tuples to be found on the page, true
-	 * otherwise.
+	 * Prepare to fetch / check / return tuples from `blockno` as part of a
+	 * bitmap table scan. `scan` was started via table_beginscan_bm(). Return
+	 * false if the bitmap is exhausted and true otherwise.
 	 *
 	 * This will typically read and pin the target block, and do the necessary
 	 * work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
 	 * make sense to perform tuple visibility checks at this time).
 	 *
-	 * If `tbmres->blockno` is -1, this is a lossy scan and all visible tuples
-	 * on the page have to be returned, otherwise the tuples at offsets in
-	 * `tbmres->offsets` need to be returned.
-	 *
 	 * lossy_pages is incremented if the bitmap is lossy for the selected
 	 * block; otherwise, exact_pages is incremented.
 	 *
@@ -804,7 +800,7 @@ typedef struct TableAmRoutine
 	 * scan_bitmap_next_tuple need to exist, or neither.
 	 */
 	bool		(*scan_bitmap_next_block) (TableScanDesc scan,
-										   struct TBMIterateResult *tbmres,
+										   BlockNumber *blockno, bool *recheck,
 										   long *lossy_pages, long *exact_pages);
 
 	/*
@@ -942,12 +938,16 @@ static inline TableScanDesc
 table_beginscan_bm(Relation rel, Snapshot snapshot,
 				   int nkeys, struct ScanKeyData *key, bool need_tuple)
 {
+	TableScanDesc result;
 	uint32		flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
 
 	if (need_tuple)
 		flags |= SO_NEED_TUPLES;
 
-	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+	result = rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+	result->shared_tbmiterator = NULL;
+	result->tbmiterator = NULL;
+	return result;
 }
 
 /*
@@ -994,6 +994,21 @@ table_beginscan_tid(Relation rel, Snapshot snapshot)
 static inline void
 table_endscan(TableScanDesc scan)
 {
+	if (scan->rs_flags & SO_TYPE_BITMAPSCAN)
+	{
+		if (scan->shared_tbmiterator)
+		{
+			tbm_end_shared_iterate(scan->shared_tbmiterator);
+			scan->shared_tbmiterator = NULL;
+		}
+
+		if (scan->tbmiterator)
+		{
+			tbm_end_iterate(scan->tbmiterator);
+			scan->tbmiterator = NULL;
+		}
+	}
+
 	scan->rs_rd->rd_tableam->scan_end(scan);
 }
 
@@ -1004,6 +1019,21 @@ static inline void
 table_rescan(TableScanDesc scan,
 			 struct ScanKeyData *key)
 {
+	if (scan->rs_flags & SO_TYPE_BITMAPSCAN)
+	{
+		if (scan->shared_tbmiterator)
+		{
+			tbm_end_shared_iterate(scan->shared_tbmiterator);
+			scan->shared_tbmiterator = NULL;
+		}
+
+		if (scan->tbmiterator)
+		{
+			tbm_end_iterate(scan->tbmiterator);
+			scan->tbmiterator = NULL;
+		}
+	}
+
 	scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
 }
 
@@ -1967,19 +1997,18 @@ table_relation_estimate_size(Relation rel, int32 *attr_widths,
  */
 
 /*
- * Prepare to fetch / check / return tuples from `tbmres->blockno` as part of a
- * bitmap table scan. `scan` needs to have been started via
- * table_beginscan_bm(). Returns false if there are no tuples to be found on
- * the page, true otherwise. lossy_pages is incremented is the block's
- * representation in the bitmap is lossy; otherwise, exact_pages is
- * incremented.
+ * Prepare to fetch / check / return tuples as part of a bitmap table scan.
+ * `scan` needs to have been started via table_beginscan_bm(). Returns false if
+ * there are no more blocks in the bitmap, true otherwise. lossy_pages is
+ * incremented is the block's representation in the bitmap is lossy; otherwise,
+ * exact_pages is incremented.
  *
  * Note, this is an optionally implemented function, therefore should only be
  * used after verifying the presence (at plan time or such).
  */
 static inline bool
 table_scan_bitmap_next_block(TableScanDesc scan,
-							 struct TBMIterateResult *tbmres,
+							 BlockNumber *blockno, bool *recheck,
 							 long *lossy_pages,
 							 long *exact_pages)
 {
@@ -1992,9 +2021,8 @@ table_scan_bitmap_next_block(TableScanDesc scan,
 		elog(ERROR, "unexpected table_scan_bitmap_next_block call during logical decoding");
 
 	return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan,
-														   tbmres,
-														   lossy_pages,
-														   exact_pages);
+														   blockno, recheck,
+														   lossy_pages, exact_pages);
 }
 
 /*
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index fa2f70b7a48..d96703b04d4 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1792,8 +1792,6 @@ typedef struct ParallelBitmapHeapState
  *
  *		bitmapqualorig	   execution state for bitmapqualorig expressions
  *		tbm				   bitmap obtained from child index scan(s)
- *		tbmiterator		   iterator for scanning current pages
- *		tbmres			   current-page data
  *		pvmbuffer		   buffer for visibility-map lookups of prefetched pages
  *		exact_pages		   total number of exact pages retrieved
  *		lossy_pages		   total number of lossy pages retrieved
@@ -1802,9 +1800,11 @@ typedef struct ParallelBitmapHeapState
  *		prefetch_target    current target prefetch distance
  *		prefetch_maximum   maximum value for prefetch_target
  *		initialized		   is node is ready to iterate
- *		shared_tbmiterator	   shared iterator
  *		shared_prefetch_iterator shared iterator for prefetching
  *		pstate			   shared state for parallel bitmap scan
+ *		recheck			   do current page's tuples need recheck
+ *		blockno			   used to validate pf and current block in sync
+ *		pfblockno		   used to validate pf stays ahead of current block
  * ----------------
  */
 typedef struct BitmapHeapScanState
@@ -1812,8 +1812,6 @@ typedef struct BitmapHeapScanState
 	ScanState	ss;				/* its first field is NodeTag */
 	ExprState  *bitmapqualorig;
 	TIDBitmap  *tbm;
-	TBMIterator *tbmiterator;
-	TBMIterateResult *tbmres;
 	Buffer		pvmbuffer;
 	long		exact_pages;
 	long		lossy_pages;
@@ -1822,9 +1820,11 @@ typedef struct BitmapHeapScanState
 	int			prefetch_target;
 	int			prefetch_maximum;
 	bool		initialized;
-	TBMSharedIterator *shared_tbmiterator;
 	TBMSharedIterator *shared_prefetch_iterator;
 	ParallelBitmapHeapState *pstate;
+	bool		recheck;
+	BlockNumber blockno;
+	BlockNumber pfblockno;
 } BitmapHeapScanState;
 
 /* ----------------
-- 
2.40.1

