From d141ad447470458de47c6c9b54c97491bb9f168c Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Fri, 22 Mar 2024 09:42:23 -0400
Subject: [PATCH v15 11/13] Push BitmapHeapScan prefetch code into heapam.c

7566751f0ce9 eliminated a table AM layering violation for the current
block in a BitmapHeapScan but left the violation in BitmapHeapScan
prefetching code.

To resolve this, move the BitmapHeapScan prefetching logic entirely into
heap AM code. There is a fair amount of setup that is specific to
BitmapHeapScan and once the state associated with that lives in the scan
descriptor, it made more sense to make dedicated
begin_scan()/rescan()/endscan() routines for BitmapHeapScan.

Author: Melanie Plageman
Discussion: https://postgr.es/m/CAAKRu_ZwCwWFeL_H3ia26bP2e7HiKLWt0ZmGXPVwPO6uXq0vaA%40mail.gmail.com
---
 src/backend/access/heap/heapam.c          | 129 ++++++-
 src/backend/access/heap/heapam_handler.c  | 306 ++++++++++++++--
 src/backend/executor/nodeBitmapHeapscan.c | 410 +++-------------------
 src/include/access/heapam.h               |  62 +++-
 src/include/access/relscan.h              |   3 -
 src/include/access/tableam.h              |  74 ++--
 src/include/executor/nodeBitmapHeapscan.h |   6 +
 src/include/nodes/execnodes.h             |  21 --
 src/tools/pgindent/typedefs.list          |   1 +
 9 files changed, 553 insertions(+), 459 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 10f2faaa60..a21ec92d71 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -938,6 +938,48 @@ continue_page:
  * ----------------------------------------------------------------
  */
 
+TableScanDesc
+heap_beginscan_bm(Relation relation, Snapshot snapshot, uint32 flags)
+{
+	BitmapHeapScanDesc scan;
+
+	/*
+	 * increment relation ref count while scanning relation
+	 *
+	 * This is just to make really sure the relcache entry won't go away while
+	 * the scan has a pointer to it.  Caller should be holding the rel open
+	 * anyway, so this is redundant in all normal scenarios...
+	 */
+	RelationIncrementReferenceCount(relation);
+	scan = (BitmapHeapScanDesc) palloc(sizeof(BitmapHeapScanDescData));
+
+	scan->heap_common.rs_base.rs_rd = relation;
+	scan->heap_common.rs_base.rs_snapshot = snapshot;
+	scan->heap_common.rs_base.rs_nkeys = 0;
+	scan->heap_common.rs_base.rs_flags = flags;
+	scan->heap_common.rs_base.rs_parallel = NULL;
+	scan->heap_common.rs_strategy = NULL;
+
+	Assert(snapshot && IsMVCCSnapshot(snapshot));
+
+	/* we only need to set this up once */
+	scan->heap_common.rs_ctup.t_tableOid = RelationGetRelid(relation);
+
+	scan->heap_common.rs_parallelworkerdata = NULL;
+	scan->heap_common.rs_base.rs_key = NULL;
+
+	initscan(&scan->heap_common, NULL, false);
+
+	scan->iterator.serial = NULL;
+	scan->iterator.parallel = NULL;
+	scan->vmbuffer = InvalidBuffer;
+	scan->pf_iterator.serial = NULL;
+	scan->pf_iterator.parallel = NULL;
+	scan->pvmbuffer = InvalidBuffer;
+
+	return (TableScanDesc) scan;
+}
+
 
 TableScanDesc
 heap_beginscan(Relation relation, Snapshot snapshot,
@@ -967,8 +1009,6 @@ heap_beginscan(Relation relation, Snapshot snapshot,
 	scan->rs_base.rs_flags = flags;
 	scan->rs_base.rs_parallel = parallel_scan;
 	scan->rs_strategy = NULL;	/* set in initscan */
-	scan->rs_vmbuffer = InvalidBuffer;
-	scan->rs_empty_tuples_pending = 0;
 
 	/*
 	 * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
@@ -1026,6 +1066,35 @@ heap_beginscan(Relation relation, Snapshot snapshot,
 	return (TableScanDesc) scan;
 }
 
+/*
+ * Cleanup BitmapHeapScan table state
+ */
+void
+heap_endscan_bm(TableScanDesc sscan)
+{
+	BitmapHeapScanDesc scan = (BitmapHeapScanDesc) sscan;
+
+	if (BufferIsValid(scan->heap_common.rs_cbuf))
+		ReleaseBuffer(scan->heap_common.rs_cbuf);
+
+	if (BufferIsValid(scan->vmbuffer))
+		ReleaseBuffer(scan->vmbuffer);
+
+	if (BufferIsValid(scan->pvmbuffer))
+		ReleaseBuffer(scan->pvmbuffer);
+
+	bhs_end_iterate(&scan->pf_iterator);
+	bhs_end_iterate(&scan->iterator);
+
+	/*
+	 * decrement relation reference count and free scan descriptor storage
+	 */
+	RelationDecrementReferenceCount(scan->heap_common.rs_base.rs_rd);
+
+	pfree(scan);
+}
+
+
 void
 heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
 			bool allow_strat, bool allow_sync, bool allow_pagemode)
@@ -1057,12 +1126,6 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
 	if (BufferIsValid(scan->rs_cbuf))
 		ReleaseBuffer(scan->rs_cbuf);
 
-	if (BufferIsValid(scan->rs_vmbuffer))
-	{
-		ReleaseBuffer(scan->rs_vmbuffer);
-		scan->rs_vmbuffer = InvalidBuffer;
-	}
-
 	/*
 	 * reinitialize scan descriptor
 	 */
@@ -1082,12 +1145,6 @@ heap_endscan(TableScanDesc sscan)
 	if (BufferIsValid(scan->rs_cbuf))
 		ReleaseBuffer(scan->rs_cbuf);
 
-	if (BufferIsValid(scan->rs_vmbuffer))
-	{
-		ReleaseBuffer(scan->rs_vmbuffer);
-		scan->rs_vmbuffer = InvalidBuffer;
-	}
-
 	/*
 	 * decrement relation reference count and free scan descriptor storage
 	 */
@@ -1334,6 +1391,50 @@ heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
 	return true;
 }
 
+void
+heap_rescan_bm(TableScanDesc sscan, TIDBitmap *tbm,
+			   ParallelBitmapHeapState *pstate, dsa_area *dsa, int pf_maximum)
+{
+	BitmapHeapScanDesc scan = (BitmapHeapScanDesc) sscan;
+
+	if (BufferIsValid(scan->vmbuffer))
+		ReleaseBuffer(scan->vmbuffer);
+	scan->vmbuffer = InvalidBuffer;
+
+	if (BufferIsValid(scan->pvmbuffer))
+		ReleaseBuffer(scan->pvmbuffer);
+	scan->pvmbuffer = InvalidBuffer;
+
+	bhs_end_iterate(&scan->pf_iterator);
+	bhs_end_iterate(&scan->iterator);
+
+	scan->prefetch_maximum = 0;
+	scan->empty_tuples_pending = 0;
+	scan->pstate = NULL;
+	scan->prefetch_target = -1;
+	scan->prefetch_pages = 0;
+	scan->pfblockno = InvalidBlockNumber;
+
+	bhs_begin_iterate(&scan->iterator,
+					  tbm, dsa,
+					  pstate ?
+					  pstate->tbmiterator :
+					  InvalidDsaPointer);
+#ifdef USE_PREFETCH
+	if (pf_maximum > 0)
+	{
+		bhs_begin_iterate(&scan->pf_iterator, tbm, dsa,
+						  pstate ?
+						  pstate->prefetch_iterator :
+						  InvalidDsaPointer);
+	}
+#endif							/* USE_PREFETCH */
+
+	scan->pstate = pstate;
+	scan->prefetch_maximum = pf_maximum;
+}
+
+
 /*
  *	heap_fetch		- retrieve tuple with given tid
  *
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 5e57d19d91..c513cbd2da 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -60,6 +60,9 @@ static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
 								   OffsetNumber tupoffset);
 
 static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan);
+static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanDesc scan);
+static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanDesc scan);
+static inline void BitmapPrefetch(BitmapHeapScanDesc scan);
 
 static const TableAmRoutine heapam_methods;
 
@@ -2186,12 +2189,80 @@ heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
  * ------------------------------------------------------------------------
  */
 
+/*
+ *	BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
+ *
+ *	We keep track of how far the prefetch iterator is ahead of the main
+ *	iterator in prefetch_pages. For each block the main iterator returns, we
+ *	decrement prefetch_pages.
+ */
+static inline void
+BitmapAdjustPrefetchIterator(BitmapHeapScanDesc scan)
+{
+#ifdef USE_PREFETCH
+	BitmapHeapIterator *prefetch_iterator = &scan->pf_iterator;
+	ParallelBitmapHeapState *pstate = scan->pstate;
+	TBMIterateResult *tbmpre;
+
+	if (pstate == NULL)
+	{
+		if (scan->prefetch_pages > 0)
+		{
+			/* The main iterator has closed the distance by one page */
+			scan->prefetch_pages--;
+		}
+		else if (!prefetch_iterator->exhausted)
+		{
+			/* Do not let the prefetch iterator get behind the main one */
+			tbmpre = bhs_iterate(prefetch_iterator);
+			scan->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber;
+		}
+		return;
+	}
+
+	/*
+	 * Adjusting the prefetch iterator before invoking
+	 * table_scan_bitmap_next_block() keeps prefetch distance higher across
+	 * the parallel workers.
+	 */
+	if (scan->prefetch_maximum > 0)
+	{
+		SpinLockAcquire(&pstate->mutex);
+		if (pstate->prefetch_pages > 0)
+		{
+			pstate->prefetch_pages--;
+			SpinLockRelease(&pstate->mutex);
+		}
+		else
+		{
+			/* Release the mutex before iterating */
+			SpinLockRelease(&pstate->mutex);
+
+			/*
+			 * In case of shared mode, we can not ensure that the current
+			 * blockno of the main iterator and that of the prefetch iterator
+			 * are same.  It's possible that whatever blockno we are
+			 * prefetching will be processed by another process.  Therefore,
+			 * we don't validate the blockno here as we do in non-parallel
+			 * case.
+			 */
+			if (!prefetch_iterator->exhausted)
+			{
+				tbmpre = bhs_iterate(prefetch_iterator);
+				scan->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber;
+			}
+		}
+	}
+#endif							/* USE_PREFETCH */
+}
+
 static bool
-heapam_scan_bitmap_next_block(TableScanDesc scan,
-							  BlockNumber *blockno, bool *recheck,
+heapam_scan_bitmap_next_block(TableScanDesc sscan,
+							  bool *recheck,
 							  long *lossy_pages, long *exact_pages)
 {
-	HeapScanDesc hscan = (HeapScanDesc) scan;
+	BitmapHeapScanDesc scan = (BitmapHeapScanDesc) sscan;
+	HeapScanDesc hscan = &scan->heap_common;
 	BlockNumber block;
 	Buffer		buffer;
 	Snapshot	snapshot;
@@ -2201,19 +2272,21 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 	hscan->rs_cindex = 0;
 	hscan->rs_ntuples = 0;
 
-	*blockno = InvalidBlockNumber;
 	*recheck = true;
 
+	BitmapAdjustPrefetchIterator(scan);
+
 	do
 	{
 		CHECK_FOR_INTERRUPTS();
 
-		tbmres = bhs_iterate(&scan->rs_bhs_iterator);
+		Assert(!scan->iterator.exhausted);
+		tbmres = bhs_iterate(&scan->iterator);
 
 		if (tbmres == NULL)
 		{
 			/* no more entries in the bitmap */
-			Assert(hscan->rs_empty_tuples_pending == 0);
+			Assert(scan->empty_tuples_pending == 0);
 			return false;
 		}
 
@@ -2228,7 +2301,6 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 	} while (!IsolationIsSerializable() && tbmres->blockno >= hscan->rs_nblocks);
 
 	/* Got a valid block */
-	*blockno = tbmres->blockno;
 	*recheck = tbmres->recheck;
 
 	/*
@@ -2236,15 +2308,15 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 	 * heap, the bitmap entries don't need rechecking, and all tuples on the
 	 * page are visible to our transaction.
 	 */
-	if (!(scan->rs_flags & SO_NEED_TUPLE) &&
+	if (!(hscan->rs_base.rs_flags & SO_NEED_TUPLE) &&
 		!tbmres->recheck &&
-		VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &hscan->rs_vmbuffer))
+		VM_ALL_VISIBLE(hscan->rs_base.rs_rd, tbmres->blockno, &scan->vmbuffer))
 	{
 		/* can't be lossy in the skip_fetch case */
 		Assert(tbmres->ntuples >= 0);
-		Assert(hscan->rs_empty_tuples_pending >= 0);
+		Assert(scan->empty_tuples_pending >= 0);
 
-		hscan->rs_empty_tuples_pending += tbmres->ntuples;
+		scan->empty_tuples_pending += tbmres->ntuples;
 
 		return true;
 	}
@@ -2255,18 +2327,18 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 	 * Acquire pin on the target heap page, trading in any pin we held before.
 	 */
 	hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
-										  scan->rs_rd,
+										  hscan->rs_base.rs_rd,
 										  block);
 	hscan->rs_cblock = block;
 	buffer = hscan->rs_cbuf;
-	snapshot = scan->rs_snapshot;
+	snapshot = hscan->rs_base.rs_snapshot;
 
 	ntup = 0;
 
 	/*
 	 * Prune and repair fragmentation for the whole page, if possible.
 	 */
-	heap_page_prune_opt(scan->rs_rd, buffer);
+	heap_page_prune_opt(hscan->rs_base.rs_rd, buffer);
 
 	/*
 	 * We must hold share lock on the buffer content while examining tuple
@@ -2294,7 +2366,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 			HeapTupleData heapTuple;
 
 			ItemPointerSet(&tid, block, offnum);
-			if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
+			if (heap_hot_search_buffer(&tid, hscan->rs_base.rs_rd, buffer, snapshot,
 									   &heapTuple, NULL, true))
 				hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
 		}
@@ -2320,16 +2392,16 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 				continue;
 			loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
 			loctup.t_len = ItemIdGetLength(lp);
-			loctup.t_tableOid = scan->rs_rd->rd_id;
+			loctup.t_tableOid = hscan->rs_base.rs_rd->rd_id;
 			ItemPointerSet(&loctup.t_self, block, offnum);
 			valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
 			if (valid)
 			{
 				hscan->rs_vistuples[ntup++] = offnum;
-				PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
+				PredicateLockTID(hscan->rs_base.rs_rd, &loctup.t_self, snapshot,
 								 HeapTupleHeaderGetXmin(loctup.t_data));
 			}
-			HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
+			HeapCheckForSerializableConflictOut(valid, hscan->rs_base.rs_rd, &loctup,
 												buffer, snapshot);
 		}
 	}
@@ -2344,6 +2416,18 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 	else
 		(*exact_pages)++;
 
+	/*
+	 * If serial, we can error out if the the prefetch block doesn't stay
+	 * ahead of the current block.
+	 */
+	if (scan->pstate == NULL &&
+		!scan->pf_iterator.exhausted &&
+		scan->pfblockno < block)
+		elog(ERROR, "prefetch and main iterators are out of sync");
+
+	/* Adjust the prefetch target */
+	BitmapAdjustPrefetchTarget(scan);
+
 	/*
 	 * Return true to indicate that a valid block was found and the bitmap is
 	 * not exhausted. If there are no visible tuples on this page,
@@ -2354,22 +2438,168 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 	return true;
 }
 
+/*
+ * BitmapAdjustPrefetchTarget - Adjust the prefetch target
+ *
+ * Increase prefetch target if it's not yet at the max.  Note that
+ * we will increase it to zero after fetching the very first
+ * page/tuple, then to one after the second tuple is fetched, then
+ * it doubles as later pages are fetched.
+ */
+static inline void
+BitmapAdjustPrefetchTarget(BitmapHeapScanDesc scan)
+{
+#ifdef USE_PREFETCH
+	ParallelBitmapHeapState *pstate = scan->pstate;
+	int			prefetch_maximum = scan->prefetch_maximum;
+
+	if (pstate == NULL)
+	{
+		if (scan->prefetch_target >= prefetch_maximum)
+			 /* don't increase any further */ ;
+		else if (scan->prefetch_target >= prefetch_maximum / 2)
+			scan->prefetch_target = prefetch_maximum;
+		else if (scan->prefetch_target > 0)
+			scan->prefetch_target *= 2;
+		else
+			scan->prefetch_target++;
+		return;
+	}
+
+	/* Do an unlocked check first to save spinlock acquisitions. */
+	if (pstate->prefetch_target < prefetch_maximum)
+	{
+		SpinLockAcquire(&pstate->mutex);
+		if (pstate->prefetch_target >= prefetch_maximum)
+			 /* don't increase any further */ ;
+		else if (pstate->prefetch_target >= prefetch_maximum / 2)
+			pstate->prefetch_target = prefetch_maximum;
+		else if (pstate->prefetch_target > 0)
+			pstate->prefetch_target *= 2;
+		else
+			pstate->prefetch_target++;
+		SpinLockRelease(&pstate->mutex);
+	}
+#endif							/* USE_PREFETCH */
+}
+
+
+/*
+ * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
+ */
+static inline void
+BitmapPrefetch(BitmapHeapScanDesc scan)
+{
+#ifdef USE_PREFETCH
+	ParallelBitmapHeapState *pstate = scan->pstate;
+	BitmapHeapIterator *prefetch_iterator = &scan->pf_iterator;
+	Relation	rel = scan->heap_common.rs_base.rs_rd;
+
+	if (pstate == NULL)
+	{
+		if (!prefetch_iterator->exhausted)
+		{
+			while (scan->prefetch_pages < scan->prefetch_target)
+			{
+				TBMIterateResult *tbmpre = bhs_iterate(prefetch_iterator);
+				bool		skip_fetch;
+
+				if (tbmpre == NULL)
+				{
+					/* No more pages to prefetch */
+					bhs_end_iterate(prefetch_iterator);
+					break;
+				}
+				scan->prefetch_pages++;
+				scan->pfblockno = tbmpre->blockno;
+
+				/*
+				 * If we expect not to have to actually read this heap page,
+				 * skip this prefetch call, but continue to run the prefetch
+				 * logic normally.  (Would it be better not to increment
+				 * prefetch_pages?)
+				 */
+				skip_fetch = (!(scan->heap_common.rs_base.rs_flags & SO_NEED_TUPLE) &&
+							  !tbmpre->recheck &&
+							  VM_ALL_VISIBLE(rel,
+											 tbmpre->blockno,
+											 &scan->pvmbuffer));
+
+				if (!skip_fetch)
+					PrefetchBuffer(rel, MAIN_FORKNUM, tbmpre->blockno);
+			}
+		}
+
+		return;
+	}
+
+	if (pstate->prefetch_pages < pstate->prefetch_target)
+	{
+		if (!prefetch_iterator->exhausted)
+		{
+			while (1)
+			{
+				TBMIterateResult *tbmpre;
+				bool		do_prefetch = false;
+				bool		skip_fetch;
+
+				/*
+				 * Recheck under the mutex. If some other process has already
+				 * done enough prefetching then we need not to do anything.
+				 */
+				SpinLockAcquire(&pstate->mutex);
+				if (pstate->prefetch_pages < pstate->prefetch_target)
+				{
+					pstate->prefetch_pages++;
+					do_prefetch = true;
+				}
+				SpinLockRelease(&pstate->mutex);
+
+				if (!do_prefetch)
+					return;
+
+				tbmpre = bhs_iterate(prefetch_iterator);
+				if (tbmpre == NULL)
+				{
+					/* No more pages to prefetch */
+					bhs_end_iterate(prefetch_iterator);
+					break;
+				}
+
+				scan->pfblockno = tbmpre->blockno;
+
+				/* As above, skip prefetch if we expect not to need page */
+				skip_fetch = (!(scan->heap_common.rs_base.rs_flags & SO_NEED_TUPLE) &&
+							  !tbmpre->recheck &&
+							  VM_ALL_VISIBLE(rel,
+											 tbmpre->blockno,
+											 &scan->pvmbuffer));
+
+				if (!skip_fetch)
+					PrefetchBuffer(rel, MAIN_FORKNUM, tbmpre->blockno);
+			}
+		}
+	}
+#endif							/* USE_PREFETCH */
+}
+
 static bool
 heapam_scan_bitmap_next_tuple(TableScanDesc scan,
 							  TupleTableSlot *slot)
 {
-	HeapScanDesc hscan = (HeapScanDesc) scan;
+	BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan;
+	HeapScanDesc hscan = &bscan->heap_common;
 	OffsetNumber targoffset;
 	Page		page;
 	ItemId		lp;
 
-	if (hscan->rs_empty_tuples_pending > 0)
+	if (bscan->empty_tuples_pending > 0)
 	{
 		/*
 		 * If we don't have to fetch the tuple, just return nulls.
 		 */
 		ExecStoreAllNullTuple(slot);
-		hscan->rs_empty_tuples_pending--;
+		bscan->empty_tuples_pending--;
 		return true;
 	}
 
@@ -2379,6 +2609,36 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan,
 	if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
 		return false;
 
+#ifdef USE_PREFETCH
+
+	/*
+	 * Try to prefetch at least a few pages even before we get to the second
+	 * page if we don't stop reading after the first tuple.
+	 */
+	if (!bscan->pstate)
+	{
+		if (bscan->prefetch_target < bscan->prefetch_maximum)
+			bscan->prefetch_target++;
+	}
+	else if (bscan->pstate->prefetch_target < bscan->prefetch_maximum)
+	{
+		/* take spinlock while updating shared state */
+		SpinLockAcquire(&bscan->pstate->mutex);
+		if (bscan->pstate->prefetch_target < bscan->prefetch_maximum)
+			bscan->pstate->prefetch_target++;
+		SpinLockRelease(&bscan->pstate->mutex);
+	}
+
+	/*
+	 * We issue prefetch requests *after* fetching the current page to try to
+	 * avoid having prefetching interfere with the main I/O. Also, this should
+	 * happen only when we have determined there is still something to do on
+	 * the current page, else we may uselessly prefetch the same page we are
+	 * just about to request for real.
+	 */
+	BitmapPrefetch(bscan);
+#endif							/* USE_PREFETCH */
+
 	targoffset = hscan->rs_vistuples[hscan->rs_cindex];
 	page = BufferGetPage(hscan->rs_cbuf);
 	lp = PageGetItemId(page, targoffset);
@@ -2704,6 +2964,10 @@ static const TableAmRoutine heapam_methods = {
 	.scan_set_tidrange = heap_set_tidrange,
 	.scan_getnextslot_tidrange = heap_getnextslot_tidrange,
 
+	.scan_rescan_bm = heap_rescan_bm,
+	.scan_begin_bm = heap_beginscan_bm,
+	.scan_end_bm = heap_endscan_bm,
+
 	.parallelscan_estimate = table_block_parallelscan_estimate,
 	.parallelscan_initialize = table_block_parallelscan_initialize,
 	.parallelscan_reinitialize = table_block_parallelscan_reinitialize,
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 70b560b8ee..d22a433f06 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -51,10 +51,6 @@
 
 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
 static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
-static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node);
-static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
-static inline void BitmapPrefetch(BitmapHeapScanState *node,
-								  TableScanDesc scan);
 static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
 
 /*
@@ -122,20 +118,9 @@ bhs_end_iterate(BitmapHeapIterator *iterator)
 static TupleTableSlot *
 BitmapHeapNext(BitmapHeapScanState *node)
 {
-	ExprContext *econtext;
-	TableScanDesc scan;
-	TIDBitmap  *tbm;
-	TupleTableSlot *slot;
-	ParallelBitmapHeapState *pstate = node->pstate;
-	dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
-
-	/*
-	 * extract necessary information from index scan node
-	 */
-	econtext = node->ss.ps.ps_ExprContext;
-	slot = node->ss.ss_ScanTupleSlot;
-	scan = node->ss.ss_currentScanDesc;
-	tbm = node->tbm;
+	ExprContext *econtext = node->ss.ps.ps_ExprContext;
+	TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
+	TableScanDesc scan = node->ss.ss_currentScanDesc;
 
 	/*
 	 * If we haven't yet performed the underlying index scan, do it, and begin
@@ -146,25 +131,37 @@ BitmapHeapNext(BitmapHeapScanState *node)
 	 * prefetching.  node->prefetch_pages tracks exactly how many pages ahead
 	 * the prefetch iterator is.  Also, node->prefetch_target tracks the
 	 * desired prefetch distance, which starts small and increases up to the
-	 * node->prefetch_maximum.  This is to avoid doing a lot of prefetching in
+	 * scan->prefetch_maximum.  This is to avoid doing a lot of prefetching in
 	 * a scan that stops after a few tuples because of a LIMIT.
 	 */
 	if (!node->initialized)
 	{
+		Relation	rel = node->ss.ss_currentRelation;
+		bool		pf_maximum = 0;
+		bool		init_shared_state = false;
+		uint32		extra_flags = 0;
+
 		/*
 		 * The leader will immediately come out of the function, but others
 		 * will be blocked until leader populates the TBM and wakes them up.
 		 */
-		bool		init_shared_state = node->pstate ?
+		init_shared_state = node->pstate ?
 			BitmapShouldInitializeSharedState(node->pstate) : false;
 
-		if (!pstate || init_shared_state)
+		/*
+		 * Maximum number of prefetches for the tablespace if configured,
+		 * otherwise the current value of the effective_io_concurrency GUC.
+		 */
+#ifdef USE_PREFETCH
+		pf_maximum = get_tablespace_io_concurrency(rel->rd_rel->reltablespace);
+#endif
+
+		if (!node->pstate || init_shared_state)
 		{
-			tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
+			node->tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
 
-			if (!tbm || !IsA(tbm, TIDBitmap))
+			if (!node->tbm || !IsA(node->tbm, TIDBitmap))
 				elog(ERROR, "unrecognized result from subplan");
-			node->tbm = tbm;
 
 			if (init_shared_state)
 			{
@@ -173,113 +170,52 @@ BitmapHeapNext(BitmapHeapScanState *node)
 				 * dsa_pointer of the iterator state which will be used by
 				 * multiple processes to iterate jointly.
 				 */
-				pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
+				node->pstate->tbmiterator = tbm_prepare_shared_iterate(node->tbm);
+
 #ifdef USE_PREFETCH
-				if (node->prefetch_maximum > 0)
+				if (pf_maximum > 0)
 				{
-					pstate->prefetch_iterator =
-						tbm_prepare_shared_iterate(tbm);
-
-					/*
-					 * We don't need the mutex here as we haven't yet woke up
-					 * others.
-					 */
-					pstate->prefetch_pages = 0;
-					pstate->prefetch_target = -1;
+					node->pstate->prefetch_iterator =
+						tbm_prepare_shared_iterate(node->tbm);
 				}
 #endif
 				/* We have initialized the shared state so wake up others. */
-				BitmapDoneInitializingSharedState(pstate);
+				BitmapDoneInitializingSharedState(node->pstate);
 			}
 		}
 
 		/*
-		 * If this is the first scan of the underlying table, create the table
-		 * scan descriptor and begin the scan.
+		 * We can potentially skip fetching heap pages if we do not need any
+		 * columns of the table, either for checking non-indexable quals or
+		 * for returning data.  This test is a bit simplistic, as it checks
+		 * the stronger condition that there's no qual or return tlist at all.
+		 * But in most cases it's probably not worth working harder than that.
 		 */
-		if (!scan)
-		{
-			uint32		extra_flags = 0;
-
-			/*
-			 * We can potentially skip fetching heap pages if we do not need
-			 * any columns of the table, either for checking non-indexable
-			 * quals or for returning data.  This test is a bit simplistic, as
-			 * it checks the stronger condition that there's no qual or return
-			 * tlist at all. But in most cases it's probably not worth working
-			 * harder than that.
-			 */
-			if (node->ss.ps.plan->qual != NIL || node->ss.ps.plan->targetlist != NIL)
-				extra_flags |= SO_NEED_TUPLE;
-
-			scan = table_beginscan_bm(node->ss.ss_currentRelation,
-									  node->ss.ps.state->es_snapshot,
-									  0,
-									  NULL,
-									  extra_flags);
-
-			node->ss.ss_currentScanDesc = scan;
-		}
-
-		bhs_begin_iterate(&scan->rs_bhs_iterator, tbm, dsa,
-						  pstate ?
-						  pstate->tbmiterator :
-						  InvalidDsaPointer);
-
-#ifdef USE_PREFETCH
-		if (node->prefetch_maximum > 0)
-		{
-			bhs_begin_iterate(&node->pf_iterator, tbm, dsa,
-							  pstate ?
-							  pstate->prefetch_iterator :
-							  InvalidDsaPointer);
-			/* Only used for serial BHS */
-			node->prefetch_pages = 0;
-			node->prefetch_target = -1;
-		}
-#endif							/* USE_PREFETCH */
-
+		if (node->ss.ps.plan->qual != NIL || node->ss.ps.plan->targetlist != NIL)
+			extra_flags |= SO_NEED_TUPLE;
+
+		scan = table_beginscan_bm(node->ss.ss_currentScanDesc,
+								  rel,
+								  node->ss.ps.state->es_snapshot,
+								  extra_flags,
+								  pf_maximum,
+								  node->tbm,
+								  node->pstate,
+								  node->ss.ps.state->es_query_dsa);
+
+		node->ss.ss_currentScanDesc = scan;
 		node->initialized = true;
 
 		goto new_page;
 	}
 
+
 	for (;;)
 	{
 		while (table_scan_bitmap_next_tuple(scan, slot))
 		{
 			CHECK_FOR_INTERRUPTS();
 
-#ifdef USE_PREFETCH
-
-			/*
-			 * Try to prefetch at least a few pages even before we get to the
-			 * second page if we don't stop reading after the first tuple.
-			 */
-			if (!pstate)
-			{
-				if (node->prefetch_target < node->prefetch_maximum)
-					node->prefetch_target++;
-			}
-			else if (pstate->prefetch_target < node->prefetch_maximum)
-			{
-				/* take spinlock while updating shared state */
-				SpinLockAcquire(&pstate->mutex);
-				if (pstate->prefetch_target < node->prefetch_maximum)
-					pstate->prefetch_target++;
-				SpinLockRelease(&pstate->mutex);
-			}
-#endif							/* USE_PREFETCH */
-
-			/*
-			 * We issue prefetch requests *after* fetching the current page to
-			 * try to avoid having prefetching interfere with the main I/O.
-			 * Also, this should happen only when we have determined there is
-			 * still something to do on the current page, else we may
-			 * uselessly prefetch the same page we are just about to request
-			 * for real.
-			 */
-			BitmapPrefetch(node, scan);
 
 			/*
 			 * If we are using lossy info, we have to recheck the qual
@@ -303,23 +239,9 @@ BitmapHeapNext(BitmapHeapScanState *node)
 
 new_page:
 
-		BitmapAdjustPrefetchIterator(node);
-
-		if (!table_scan_bitmap_next_block(scan, &node->blockno, &node->recheck,
+		if (!table_scan_bitmap_next_block(scan, &node->recheck,
 										  &node->lossy_pages, &node->exact_pages))
 			break;
-
-		/*
-		 * If serial, we can error out if the the prefetch block doesn't stay
-		 * ahead of the current block.
-		 */
-		if (node->pstate == NULL &&
-			!node->pf_iterator.exhausted &&
-			node->pfblockno < node->blockno)
-			elog(ERROR, "prefetch and main iterators are out of sync");
-
-		/* Adjust the prefetch target */
-		BitmapAdjustPrefetchTarget(node);
 	}
 
 	/*
@@ -343,215 +265,6 @@ BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
 	ConditionVariableBroadcast(&pstate->cv);
 }
 
-/*
- *	BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
- *
- *	We keep track of how far the prefetch iterator is ahead of the main
- *	iterator in prefetch_pages. For each block the main iterator returns, we
- *	decrement prefetch_pages.
- */
-static inline void
-BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
-{
-#ifdef USE_PREFETCH
-	ParallelBitmapHeapState *pstate = node->pstate;
-	BitmapHeapIterator *prefetch_iterator = &node->pf_iterator;
-	TBMIterateResult *tbmpre;
-
-	if (pstate == NULL)
-	{
-		if (node->prefetch_pages > 0)
-		{
-			/* The main iterator has closed the distance by one page */
-			node->prefetch_pages--;
-		}
-		else if (!prefetch_iterator->exhausted)
-		{
-			/* Do not let the prefetch iterator get behind the main one */
-			tbmpre = bhs_iterate(prefetch_iterator);
-			node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber;
-		}
-		return;
-	}
-
-	/*
-	 * Adjusting the prefetch iterator before invoking
-	 * table_scan_bitmap_next_block() keeps prefetch distance higher across
-	 * the parallel workers.
-	 */
-	if (node->prefetch_maximum > 0)
-	{
-		SpinLockAcquire(&pstate->mutex);
-		if (pstate->prefetch_pages > 0)
-		{
-			pstate->prefetch_pages--;
-			SpinLockRelease(&pstate->mutex);
-		}
-		else
-		{
-			/* Release the mutex before iterating */
-			SpinLockRelease(&pstate->mutex);
-
-			/*
-			 * In case of shared mode, we can not ensure that the current
-			 * blockno of the main iterator and that of the prefetch iterator
-			 * are same.  It's possible that whatever blockno we are
-			 * prefetching will be processed by another process.  Therefore,
-			 * we don't validate the blockno here as we do in non-parallel
-			 * case.
-			 */
-			if (!prefetch_iterator->exhausted)
-			{
-				tbmpre = bhs_iterate(prefetch_iterator);
-				node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber;
-			}
-		}
-	}
-#endif							/* USE_PREFETCH */
-}
-
-/*
- * BitmapAdjustPrefetchTarget - Adjust the prefetch target
- *
- * Increase prefetch target if it's not yet at the max.  Note that
- * we will increase it to zero after fetching the very first
- * page/tuple, then to one after the second tuple is fetched, then
- * it doubles as later pages are fetched.
- */
-static inline void
-BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
-{
-#ifdef USE_PREFETCH
-	ParallelBitmapHeapState *pstate = node->pstate;
-
-	if (pstate == NULL)
-	{
-		if (node->prefetch_target >= node->prefetch_maximum)
-			 /* don't increase any further */ ;
-		else if (node->prefetch_target >= node->prefetch_maximum / 2)
-			node->prefetch_target = node->prefetch_maximum;
-		else if (node->prefetch_target > 0)
-			node->prefetch_target *= 2;
-		else
-			node->prefetch_target++;
-		return;
-	}
-
-	/* Do an unlocked check first to save spinlock acquisitions. */
-	if (pstate->prefetch_target < node->prefetch_maximum)
-	{
-		SpinLockAcquire(&pstate->mutex);
-		if (pstate->prefetch_target >= node->prefetch_maximum)
-			 /* don't increase any further */ ;
-		else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
-			pstate->prefetch_target = node->prefetch_maximum;
-		else if (pstate->prefetch_target > 0)
-			pstate->prefetch_target *= 2;
-		else
-			pstate->prefetch_target++;
-		SpinLockRelease(&pstate->mutex);
-	}
-#endif							/* USE_PREFETCH */
-}
-
-/*
- * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
- */
-static inline void
-BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
-{
-#ifdef USE_PREFETCH
-	ParallelBitmapHeapState *pstate = node->pstate;
-	BitmapHeapIterator *prefetch_iterator = &node->pf_iterator;
-
-	if (pstate == NULL)
-	{
-		if (!prefetch_iterator->exhausted)
-		{
-			while (node->prefetch_pages < node->prefetch_target)
-			{
-				TBMIterateResult *tbmpre = bhs_iterate(prefetch_iterator);
-				bool		skip_fetch;
-
-				if (tbmpre == NULL)
-				{
-					/* No more pages to prefetch */
-					bhs_end_iterate(prefetch_iterator);
-					break;
-				}
-				node->prefetch_pages++;
-				node->pfblockno = tbmpre->blockno;
-
-				/*
-				 * If we expect not to have to actually read this heap page,
-				 * skip this prefetch call, but continue to run the prefetch
-				 * logic normally.  (Would it be better not to increment
-				 * prefetch_pages?)
-				 */
-				skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLE) &&
-							  !tbmpre->recheck &&
-							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-											 tbmpre->blockno,
-											 &node->pvmbuffer));
-
-				if (!skip_fetch)
-					PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
-			}
-		}
-
-		return;
-	}
-
-	if (pstate->prefetch_pages < pstate->prefetch_target)
-	{
-		if (!prefetch_iterator->exhausted)
-		{
-			while (1)
-			{
-				TBMIterateResult *tbmpre;
-				bool		do_prefetch = false;
-				bool		skip_fetch;
-
-				/*
-				 * Recheck under the mutex. If some other process has already
-				 * done enough prefetching then we need not to do anything.
-				 */
-				SpinLockAcquire(&pstate->mutex);
-				if (pstate->prefetch_pages < pstate->prefetch_target)
-				{
-					pstate->prefetch_pages++;
-					do_prefetch = true;
-				}
-				SpinLockRelease(&pstate->mutex);
-
-				if (!do_prefetch)
-					return;
-
-				tbmpre = bhs_iterate(prefetch_iterator);
-				if (tbmpre == NULL)
-				{
-					/* No more pages to prefetch */
-					bhs_end_iterate(prefetch_iterator);
-					break;
-				}
-
-				node->pfblockno = tbmpre->blockno;
-
-				/* As above, skip prefetch if we expect not to need page */
-				skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLE) &&
-							  !tbmpre->recheck &&
-							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-											 tbmpre->blockno,
-											 &node->pvmbuffer));
-
-				if (!skip_fetch)
-					PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
-			}
-		}
-	}
-#endif							/* USE_PREFETCH */
-}
-
 /*
  * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
  */
@@ -597,19 +310,12 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
 	if (node->ss.ss_currentScanDesc)
 		table_rescan(node->ss.ss_currentScanDesc, NULL);
 
-	/* release bitmaps and buffers if any */
-	if (node->pf_iterator.exhausted)
-		bhs_end_iterate(&node->pf_iterator);
+	/* release bitmaps if any */
 	if (node->tbm)
 		tbm_free(node->tbm);
-	if (node->pvmbuffer != InvalidBuffer)
-		ReleaseBuffer(node->pvmbuffer);
 	node->tbm = NULL;
 	node->initialized = false;
-	node->pvmbuffer = InvalidBuffer;
 	node->recheck = true;
-	node->blockno = InvalidBlockNumber;
-	node->pfblockno = InvalidBlockNumber;
 
 	ExecScanReScan(&node->ss);
 
@@ -648,14 +354,10 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 		table_endscan(scanDesc);
 
 	/*
-	 * release bitmaps and buffers if any
+	 * release bitmaps if any
 	 */
-	if (!node->pf_iterator.exhausted)
-		bhs_end_iterate(&node->pf_iterator);
 	if (node->tbm)
 		tbm_free(node->tbm);
-	if (node->pvmbuffer != InvalidBuffer)
-		ReleaseBuffer(node->pvmbuffer);
 }
 
 /* ----------------------------------------------------------------
@@ -688,16 +390,11 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
 
 	scanstate->tbm = NULL;
-	scanstate->pvmbuffer = InvalidBuffer;
 	scanstate->exact_pages = 0;
 	scanstate->lossy_pages = 0;
-	scanstate->prefetch_pages = 0;
-	scanstate->prefetch_target = 0;
 	scanstate->initialized = false;
 	scanstate->pstate = NULL;
 	scanstate->recheck = true;
-	scanstate->blockno = InvalidBlockNumber;
-	scanstate->pfblockno = InvalidBlockNumber;
 
 	/*
 	 * Miscellaneous initialization
@@ -737,13 +434,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->bitmapqualorig =
 		ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
 
-	/*
-	 * Maximum number of prefetches for the tablespace if configured,
-	 * otherwise the current value of the effective_io_concurrency GUC.
-	 */
-	scanstate->prefetch_maximum =
-		get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
-
 	scanstate->ss.ss_currentRelation = currentRelation;
 
 	/*
@@ -827,7 +517,7 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
 	/* Initialize the mutex */
 	SpinLockInit(&pstate->mutex);
 	pstate->prefetch_pages = 0;
-	pstate->prefetch_target = 0;
+	pstate->prefetch_target = -1;
 	pstate->state = BM_INITIAL;
 
 	ConditionVariableInit(&pstate->cv);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 750ea30852..73690d15c5 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -76,22 +76,60 @@ typedef struct HeapScanDescData
 	 */
 	ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
 
+	/* these fields only used in page-at-a-time mode and for bitmap scans */
+	int			rs_cindex;		/* current tuple's index in vistuples */
+	int			rs_ntuples;		/* number of visible tuples on page */
+	OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];	/* their offsets */
+
+}			HeapScanDescData;
+typedef struct HeapScanDescData *HeapScanDesc;
+
+typedef struct BitmapHeapScanDescData
+{
+	/* All the non-BitmapHeapScan specific members */
+	HeapScanDescData heap_common;
+
+	/*
+	 * Members common to Parallel and Serial BitmapHeapScan
+	 */
+	struct BitmapHeapIterator iterator;
+	struct BitmapHeapIterator pf_iterator;
+
+	/* maximum value for prefetch_target */
+	int			prefetch_maximum;
+
 	/*
 	 * These fields are only used for bitmap scans for the "skip fetch"
 	 * optimization. Bitmap scans needing no fields from the heap may skip
 	 * fetching an all visible block, instead using the number of tuples per
 	 * block reported by the bitmap to determine how many NULL-filled tuples
-	 * to return.
+	 * to return. They are common to parallel and serial BitmapHeapScans
 	 */
-	Buffer		rs_vmbuffer;
-	int			rs_empty_tuples_pending;
+	Buffer		vmbuffer;
+	/* buffer for visibility-map lookups of prefetched pages */
+	Buffer		pvmbuffer;
+	int			empty_tuples_pending;
+
+	/*
+	 * Parallel-only members
+	 */
+
+	struct ParallelBitmapHeapState *pstate;
+
+	/*
+	 * Serial-only members
+	 */
+
+	/* Current target for prefetch distance */
+	int			prefetch_target;
+	/* # pages prefetch iterator is ahead of current */
+	int			prefetch_pages;
+	/* used to validate prefetch block stays ahead of current block  */
+	BlockNumber pfblockno;
+}			BitmapHeapScanDescData;
+
+typedef struct BitmapHeapScanDescData *BitmapHeapScanDesc;
 
-	/* these fields only used in page-at-a-time mode and for bitmap scans */
-	int			rs_cindex;		/* current tuple's index in vistuples */
-	int			rs_ntuples;		/* number of visible tuples on page */
-	OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];	/* their offsets */
-}			HeapScanDescData;
-typedef struct HeapScanDescData *HeapScanDesc;
 
 /*
  * Descriptor for fetches from heap via an index.
@@ -289,6 +327,12 @@ extern void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
 extern bool heap_getnextslot_tidrange(TableScanDesc sscan,
 									  ScanDirection direction,
 									  TupleTableSlot *slot);
+extern TableScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot, uint32 flags);
+
+extern void heap_endscan_bm(TableScanDesc scan);
+extern void heap_rescan_bm(TableScanDesc sscan, TIDBitmap *tbm,
+						   ParallelBitmapHeapState *pstate, dsa_area *dsa, int pf_maximum);
+
 extern bool heap_fetch(Relation relation, Snapshot snapshot,
 					   HeapTuple tuple, Buffer *userbuf, bool keep_buf);
 extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index e520186b41..855b9558bf 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -41,9 +41,6 @@ typedef struct TableScanDescData
 	ItemPointerData rs_mintid;
 	ItemPointerData rs_maxtid;
 
-	/* Only used for Bitmap table scans */
-	BitmapHeapIterator rs_bhs_iterator;
-
 	/*
 	 * Information about type and behaviour of the scan, a bitmask of members
 	 * of the ScanOptions enum (see tableam.h).
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 618ab2b449..138bf5f6ed 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -391,6 +391,23 @@ typedef struct TableAmRoutine
 											  ScanDirection direction,
 											  TupleTableSlot *slot);
 
+	/*
+	 * TODO: add comment
+	 */
+	TableScanDesc (*scan_begin_bm) (Relation rel,
+									Snapshot snapshot,
+									uint32 flags);
+
+	void		(*scan_rescan_bm) (TableScanDesc scan, TIDBitmap *tbm,
+								   ParallelBitmapHeapState *pstate, dsa_area *dsa,
+								   int pf_maximum);
+
+	/*
+	 * Release resources and deallocate scan. If TableScanDesc.temp_snap,
+	 * TableScanDesc.rs_snapshot needs to be unregistered.
+	 */
+	void		(*scan_end_bm) (TableScanDesc scan);
+
 	/* ------------------------------------------------------------------------
 	 * Parallel table scan related functions.
 	 * ------------------------------------------------------------------------
@@ -774,9 +791,9 @@ typedef struct TableAmRoutine
 	 */
 
 	/*
-	 * Prepare to fetch / check / return tuples from `blockno` as part of a
-	 * bitmap table scan. `scan` was started via table_beginscan_bm(). Return
-	 * false if the bitmap is exhausted and true otherwise.
+	 * Prepare to fetch / check / return tuples as part of a bitmap table
+	 * scan. `scan` was started via table_beginscan_bm(). Return false if the
+	 * bitmap is exhausted and true otherwise.
 	 *
 	 * This will typically read and pin the target block, and do the necessary
 	 * work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
@@ -785,22 +802,11 @@ typedef struct TableAmRoutine
 	 * lossy_pages is incremented if the bitmap is lossy for the selected
 	 * block; otherwise, exact_pages is incremented.
 	 *
-	 * XXX: Currently this may only be implemented if the AM uses md.c as its
-	 * storage manager, and uses ItemPointer->ip_blkid in a manner that maps
-	 * blockids directly to the underlying storage. nodeBitmapHeapscan.c
-	 * performs prefetching directly using that interface.  This probably
-	 * needs to be rectified at a later point.
-	 *
-	 * XXX: Currently this may only be implemented if the AM uses the
-	 * visibilitymap, as nodeBitmapHeapscan.c unconditionally accesses it to
-	 * perform prefetching.  This probably needs to be rectified at a later
-	 * point.
-	 *
 	 * Optional callback, but either both scan_bitmap_next_block and
 	 * scan_bitmap_next_tuple need to exist, or neither.
 	 */
 	bool		(*scan_bitmap_next_block) (TableScanDesc scan,
-										   BlockNumber *blockno, bool *recheck,
+										   bool *recheck,
 										   long *lossy_pages, long *exact_pages);
 
 	/*
@@ -929,18 +935,26 @@ table_beginscan_strat(Relation rel, Snapshot snapshot,
 }
 
 /*
- * table_beginscan_bm is an alternative entry point for setting up a
- * TableScanDesc for a bitmap heap scan.  Although that scan technology is
- * really quite unlike a standard seqscan, there is just enough commonality to
- * make it worth using the same data structure.
+ * table_beginscan_bm is the entry point for setting up a TableScanDesc for a
+ * bitmap heap scan.
  */
 static inline TableScanDesc
-table_beginscan_bm(Relation rel, Snapshot snapshot,
-				   int nkeys, struct ScanKeyData *key, uint32 extra_flags)
+table_beginscan_bm(TableScanDesc scan, Relation rel, Snapshot snapshot,
+				   uint32 extra_flags, int pf_maximum, TIDBitmap *tbm,
+				   ParallelBitmapHeapState *pstate, dsa_area *dsa)
 {
 	uint32		flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE | extra_flags;
 
-	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+	/*
+	 * If this is the first scan of the underlying table, create the table
+	 * scan descriptor and begin the scan.
+	 */
+	if (!scan)
+		scan = rel->rd_tableam->scan_begin_bm(rel, snapshot, flags);
+
+	scan->rs_rd->rd_tableam->scan_rescan_bm(scan, tbm, pstate, dsa, pf_maximum);
+
+	return scan;
 }
 
 /*
@@ -987,9 +1001,11 @@ table_beginscan_tid(Relation rel, Snapshot snapshot)
 static inline void
 table_endscan(TableScanDesc scan)
 {
-	if (scan->rs_flags & SO_TYPE_BITMAPSCAN &&
-		!scan->rs_bhs_iterator.exhausted)
-		bhs_end_iterate(&scan->rs_bhs_iterator);
+	if (scan->rs_flags & SO_TYPE_BITMAPSCAN)
+	{
+		scan->rs_rd->rd_tableam->scan_end_bm(scan);
+		return;
+	}
 
 	scan->rs_rd->rd_tableam->scan_end(scan);
 }
@@ -1001,10 +1017,6 @@ static inline void
 table_rescan(TableScanDesc scan,
 			 struct ScanKeyData *key)
 {
-	if (scan->rs_flags & SO_TYPE_BITMAPSCAN &&
-		!scan->rs_bhs_iterator.exhausted)
-		bhs_end_iterate(&scan->rs_bhs_iterator);
-
 	scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
 }
 
@@ -1979,7 +1991,7 @@ table_relation_estimate_size(Relation rel, int32 *attr_widths,
  */
 static inline bool
 table_scan_bitmap_next_block(TableScanDesc scan,
-							 BlockNumber *blockno, bool *recheck,
+							 bool *recheck,
 							 long *lossy_pages,
 							 long *exact_pages)
 {
@@ -1992,7 +2004,7 @@ table_scan_bitmap_next_block(TableScanDesc scan,
 		elog(ERROR, "unexpected table_scan_bitmap_next_block call during logical decoding");
 
 	return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan,
-														   blockno, recheck,
+														   recheck,
 														   lossy_pages, exact_pages);
 }
 
diff --git a/src/include/executor/nodeBitmapHeapscan.h b/src/include/executor/nodeBitmapHeapscan.h
index 7064f54686..f3c92f7a8a 100644
--- a/src/include/executor/nodeBitmapHeapscan.h
+++ b/src/include/executor/nodeBitmapHeapscan.h
@@ -28,6 +28,12 @@ extern void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
 										  ParallelContext *pcxt);
 extern void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
 										   ParallelWorkerContext *pwcxt);
+typedef struct BitmapHeapIterator
+{
+	struct TBMIterator *serial;
+	struct TBMSharedIterator *parallel;
+	bool		exhausted;
+} BitmapHeapIterator;
 
 extern TBMIterateResult *bhs_iterate(BitmapHeapIterator *iterator);
 extern void bhs_begin_iterate(BitmapHeapIterator *iterator, TIDBitmap *tbm,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 714eb3e534..49ce4ff9a7 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1787,30 +1787,16 @@ typedef struct ParallelBitmapHeapState
 	ConditionVariable cv;
 } ParallelBitmapHeapState;
 
-typedef struct BitmapHeapIterator
-{
-	struct TBMIterator *serial;
-	struct TBMSharedIterator *parallel;
-	bool		exhausted;
-} BitmapHeapIterator;
-
 /* ----------------
  *	 BitmapHeapScanState information
  *
  *		bitmapqualorig	   execution state for bitmapqualorig expressions
  *		tbm				   bitmap obtained from child index scan(s)
- *		pvmbuffer		   buffer for visibility-map lookups of prefetched pages
  *		exact_pages		   total number of exact pages retrieved
  *		lossy_pages		   total number of lossy pages retrieved
- *		pf_iterator        for prefetching ahead of current page
- *		prefetch_pages	   # pages prefetch iterator is ahead of current
- *		prefetch_target    current target prefetch distance
- *		prefetch_maximum   maximum value for prefetch_target
  *		initialized		   is node is ready to iterate
  *		pstate			   shared state for parallel bitmap scan
  *		recheck			   do current page's tuples need recheck
- *		blockno			   used to validate pf and current block in sync
- *		pfblockno		   used to validate pf stays ahead of current block
  * ----------------
  */
 typedef struct BitmapHeapScanState
@@ -1818,18 +1804,11 @@ typedef struct BitmapHeapScanState
 	ScanState	ss;				/* its first field is NodeTag */
 	ExprState  *bitmapqualorig;
 	TIDBitmap  *tbm;
-	Buffer		pvmbuffer;
 	long		exact_pages;
 	long		lossy_pages;
-	int			prefetch_pages;
-	int			prefetch_target;
-	int			prefetch_maximum;
 	bool		initialized;
-	BitmapHeapIterator pf_iterator;
 	ParallelBitmapHeapState *pstate;
 	bool		recheck;
-	BlockNumber blockno;
-	BlockNumber pfblockno;
 } BitmapHeapScanState;
 
 /* ----------------
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 2348a8793e..443b01c053 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -263,6 +263,7 @@ BitmapHeapIterator
 BitmapHeapPath
 BitmapHeapScan
 BitmapHeapScanState
+BitmapHeapScanDesc
 BitmapIndexScan
 BitmapIndexScanState
 BitmapOr
-- 
2.40.1

