From c2c532ae8073187549261438e15f80ee37e9076f Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Tue, 22 Aug 2023 16:48:30 +1200
Subject: [PATCH v5 10/10] WIP: Use streaming reads in bitmap heapscan.

XXX Cherry-picked from
https://github.com/melanieplageman/postgres/tree/bhs_aio and lightly
modified by TM, for demonstration purposes.

Author: Melanie Plageman <melanieplageman@gmail.com>
---
 src/backend/access/gin/ginget.c           |  17 +-
 src/backend/access/gin/ginscan.c          |   7 +
 src/backend/access/heap/heapam.c          |  71 +++
 src/backend/access/heap/heapam_handler.c  |  78 ++--
 src/backend/commands/explain.c            |  21 +-
 src/backend/executor/nodeBitmapHeapscan.c | 521 ++--------------------
 src/backend/nodes/tidbitmap.c             |  76 ++--
 src/include/access/heapam.h               |   7 +
 src/include/access/relscan.h              |   8 +
 src/include/access/tableam.h              |  71 ++-
 src/include/executor/nodeBitmapHeapscan.h |   1 +
 src/include/nodes/execnodes.h             |  39 +-
 src/include/nodes/tidbitmap.h             |   4 +-
 13 files changed, 284 insertions(+), 637 deletions(-)

diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index 0b4f2ebadb..f238a3b071 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -373,7 +373,10 @@ restartScanEntry:
 			if (entry->matchBitmap)
 			{
 				if (entry->matchIterator)
+				{
 					tbm_end_iterate(entry->matchIterator);
+					pfree(entry->matchResult);
+				}
 				entry->matchIterator = NULL;
 				tbm_free(entry->matchBitmap);
 				entry->matchBitmap = NULL;
@@ -386,6 +389,9 @@ restartScanEntry:
 		if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
 		{
 			entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
+			entry->matchResult = (TBMIterateResult *)
+				palloc0(sizeof(TBMIterateResult) + MaxHeapTuplesPerPage *
+						sizeof(OffsetNumber));
 			entry->isFinished = false;
 		}
 	}
@@ -823,21 +829,24 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
 		{
 			/*
 			 * If we've exhausted all items on this block, move to next block
-			 * in the bitmap.
+			 * in the bitmap. tbm_iterate() sets matchResult->blockno to
+			 * InvalidBlockNumber when the bitmap is exhausted.
 			 */
-			while (entry->matchResult == NULL ||
+			while ((!BlockNumberIsValid(entry->matchResult->blockno)) ||
 				   (entry->matchResult->ntuples >= 0 &&
 					entry->offset >= entry->matchResult->ntuples) ||
 				   entry->matchResult->blockno < advancePastBlk ||
 				   (ItemPointerIsLossyPage(&advancePast) &&
 					entry->matchResult->blockno == advancePastBlk))
 			{
-				entry->matchResult = tbm_iterate(entry->matchIterator);
 
-				if (entry->matchResult == NULL)
+				tbm_iterate(entry->matchIterator, entry->matchResult);
+				if (!BlockNumberIsValid(entry->matchResult->blockno))
 				{
 					ItemPointerSetInvalid(&entry->curItem);
 					tbm_end_iterate(entry->matchIterator);
+					pfree(entry->matchResult);
+					entry->matchResult = NULL;
 					entry->matchIterator = NULL;
 					entry->isFinished = true;
 					break;
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c
index af24d38544..be27f9fe07 100644
--- a/src/backend/access/gin/ginscan.c
+++ b/src/backend/access/gin/ginscan.c
@@ -246,7 +246,14 @@ ginFreeScanKeys(GinScanOpaque so)
 		if (entry->list)
 			pfree(entry->list);
 		if (entry->matchIterator)
+		{
 			tbm_end_iterate(entry->matchIterator);
+			if (entry->matchResult)
+			{
+				pfree(entry->matchResult);
+				entry->matchResult = NULL;
+			}
+		}
 		if (entry->matchBitmap)
 			tbm_free(entry->matchBitmap);
 	}
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 76a53f68b6..b174b5c675 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -226,6 +226,53 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] =
  * ----------------------------------------------------------------
  */
 
+static BlockNumber
+bitmapheap_pgsr_next_single(PgStreamingRead *pgsr, void *pgsr_private,
+							void *per_buffer_data)
+{
+	TBMIterateResult *tbmres;
+	HeapScanDesc hdesc = (HeapScanDesc) pgsr_private;
+
+	tbmres = (TBMIterateResult *) per_buffer_data;
+
+	for (;;)
+	{
+		if (hdesc->rs_base.shared_tbmiterator)
+			tbm_shared_iterate(hdesc->rs_base.shared_tbmiterator, tbmres);
+		else
+			tbm_iterate(hdesc->rs_base.tbmiterator, tbmres);
+
+		if (!BlockNumberIsValid(tbmres->blockno))
+			return InvalidBlockNumber;
+
+		/*
+		 * Ignore any claimed entries past what we think is the end of the
+		 * relation. It may have been extended after the start of our scan (we
+		 * only hold an AccessShareLock, and it could be inserts from this
+		 * backend).  We don't take this optimization in SERIALIZABLE
+		 * isolation though, as we need to examine all invisible tuples
+		 * reachable by the index.
+		 */
+		if (!IsolationIsSerializable() && tbmres->blockno >= hdesc->rs_nblocks)
+			continue;
+
+		if (tbmres->ntuples >= 0)
+			hdesc->rs_base.exact_pages++;
+		else
+			hdesc->rs_base.lossy_pages++;
+
+		if (hdesc->rs_base.rs_flags & SO_CAN_SKIP_FETCH &&
+			!tbmres->recheck &&
+			VM_ALL_VISIBLE(hdesc->rs_base.rs_rd, tbmres->blockno, &hdesc->vmbuffer))
+		{
+			hdesc->empty_tuples += tbmres->ntuples;
+			continue;
+		}
+
+		return tbmres->blockno;
+	}
+}
+
 static BlockNumber
 heap_pgsr_next_single(PgStreamingRead *pgsr, void *pgsr_private,
 					  void *per_buffer_data)
@@ -433,6 +480,11 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
 		pg_streaming_read_free(scan->pgsr);
 		scan->pgsr = NULL;
 	}
+	if (BufferIsValid(scan->vmbuffer))
+	{
+		ReleaseBuffer(scan->vmbuffer);
+		scan->vmbuffer = InvalidBuffer;
+	}
 
 	/*
 	 * FIXME: This probably should be done in the !rs_inited blocks instead.
@@ -446,6 +498,19 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
 		else
 			scan->pgsr = heap_pgsr_single_alloc(scan);
 	}
+	else if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN)
+	{
+		scan->pgsr = pg_streaming_read_buffer_alloc(PGSR_FLAG_DEFAULT,
+													scan,
+													offsetof(TBMIterateResult, offsets) + MaxHeapTuplesPerPage * sizeof(OffsetNumber),
+													scan->rs_strategy,
+													BMR_REL(scan->rs_base.rs_rd),
+													MAIN_FORKNUM,
+													bitmapheap_pgsr_next_single);
+
+
+		scan->rs_inited = true;
+	}
 }
 
 /*
@@ -1118,6 +1183,12 @@ heap_beginscan(Relation relation, Snapshot snapshot,
 	scan->rs_strategy = NULL;	/* set in initscan */
 
 	scan->pgsr = NULL;
+	scan->vmbuffer = InvalidBuffer;
+	scan->empty_tuples = 0;
+	scan->rs_base.exact_pages = 0;
+	scan->rs_base.lossy_pages = 0;
+	scan->rs_base.tbmiterator = NULL;
+	scan->rs_base.shared_tbmiterator = NULL;
 
 	/*
 	 * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 6127f9d75a..d5e267f45f 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -44,6 +44,7 @@
 #include "storage/smgr.h"
 #include "utils/builtins.h"
 #include "utils/rel.h"
+#include "storage/streaming_read.h"
 
 static void reform_and_rewrite_tuple(HeapTuple tuple,
 									 Relation OldHeap, Relation NewHeap,
@@ -2110,38 +2111,55 @@ heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
  * Executor related callbacks for the heap AM
  * ------------------------------------------------------------------------
  */
-
 static bool
-heapam_scan_bitmap_next_block(TableScanDesc scan,
-							  TBMIterateResult *tbmres)
+heapam_scan_bitmap_next_block(TableScanDesc scan, bool *recheck)
 {
 	HeapScanDesc hscan = (HeapScanDesc) scan;
-	BlockNumber block = tbmres->blockno;
+	TBMIterateResult *tbmres;
+	void	   *io_private;
 	Buffer		buffer;
 	Snapshot	snapshot;
 	int			ntup;
 
-	hscan->rs_cindex = 0;
-	hscan->rs_ntuples = 0;
+	Assert(hscan->pgsr);
 
 	/*
-	 * Ignore any claimed entries past what we think is the end of the
-	 * relation. It may have been extended after the start of our scan (we
-	 * only hold an AccessShareLock, and it could be inserts from this
-	 * backend).  We don't take this optimization in SERIALIZABLE isolation
-	 * though, as we need to examine all invisible tuples reachable by the
-	 * index.
+	 * Release the buffer containing the previous block and reset the per-page
+	 * counters. Reset hscan->rs_ntuples here just to be safe.
 	 */
-	if (!IsolationIsSerializable() && block >= hscan->rs_nblocks)
-		return false;
+	if (BufferIsValid(hscan->rs_cbuf))
+	{
+		ReleaseBuffer(hscan->rs_cbuf);
+		hscan->rs_cbuf = InvalidBuffer;
+	}
+
+	hscan->rs_cindex = 0;
+	hscan->rs_ntuples = 0;
+
+	hscan->rs_cbuf = pg_streaming_read_buffer_get_next(hscan->pgsr, &io_private);
+
+	if (BufferIsInvalid(hscan->rs_cbuf))
+	{
+		if (BufferIsValid(hscan->vmbuffer))
+		{
+			ReleaseBuffer(hscan->vmbuffer);
+			hscan->vmbuffer = InvalidBuffer;
+		}
+
+		return hscan->empty_tuples > 0;
+	}
+
+	Assert(io_private);
+
+	tbmres = (TBMIterateResult *) io_private;
+
+	Assert(BufferGetBlockNumber(hscan->rs_cbuf) == tbmres->blockno);
+
+	*recheck = tbmres->recheck;
+
+	hscan->rs_cblock = tbmres->blockno;
+	hscan->rs_ntuples = tbmres->ntuples;
 
-	/*
-	 * Acquire pin on the target heap page, trading in any pin we held before.
-	 */
-	hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
-										  scan->rs_rd,
-										  block);
-	hscan->rs_cblock = block;
 	buffer = hscan->rs_cbuf;
 	snapshot = scan->rs_snapshot;
 
@@ -2177,7 +2195,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 			ItemPointerData tid;
 			HeapTupleData heapTuple;
 
-			ItemPointerSet(&tid, block, offnum);
+			ItemPointerSet(&tid, tbmres->blockno, offnum);
 			if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
 									   &heapTuple, NULL, true))
 				hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
@@ -2205,7 +2223,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 			loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
 			loctup.t_len = ItemIdGetLength(lp);
 			loctup.t_tableOid = scan->rs_rd->rd_id;
-			ItemPointerSet(&loctup.t_self, block, offnum);
+			ItemPointerSet(&loctup.t_self, tbmres->blockno, offnum);
 			valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
 			if (valid)
 			{
@@ -2223,25 +2241,31 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 	Assert(ntup <= MaxHeapTuplesPerPage);
 	hscan->rs_ntuples = ntup;
 
-	return ntup > 0;
+	return true;
 }
 
 static bool
-heapam_scan_bitmap_next_tuple(TableScanDesc scan,
-							  TBMIterateResult *tbmres,
-							  TupleTableSlot *slot)
+heapam_scan_bitmap_next_tuple(TableScanDesc scan, TupleTableSlot *slot)
 {
 	HeapScanDesc hscan = (HeapScanDesc) scan;
 	OffsetNumber targoffset;
 	Page		page;
 	ItemId		lp;
 
+	if (hscan->empty_tuples)
+	{
+		ExecStoreAllNullTuple(slot);
+		hscan->empty_tuples--;
+		return true;
+	}
+
 	/*
 	 * Out of range?  If so, nothing more to look at on this page
 	 */
 	if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
 		return false;
 
+	Assert(BufferIsValid(hscan->rs_cbuf));
 	targoffset = hscan->rs_vistuples[hscan->rs_cindex];
 	page = BufferGetPage(hscan->rs_cbuf);
 	lp = PageGetItemId(page, targoffset);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 3d590a6b9f..7257308195 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -13,6 +13,7 @@
  */
 #include "postgres.h"
 
+#include "access/relscan.h"
 #include "access/xact.h"
 #include "catalog/pg_type.h"
 #include "commands/createas.h"
@@ -112,7 +113,7 @@ static void show_hash_info(HashState *hashstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
-static void show_tidbitmap_info(BitmapHeapScanState *planstate,
+static void show_tidbitmap_info(TableScanDesc tdesc,
 								ExplainState *es);
 static void show_instrumentation_count(const char *qlabel, int which,
 									   PlanState *planstate, ExplainState *es);
@@ -1827,7 +1828,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
 			if (es->analyze)
-				show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
+				show_tidbitmap_info(((BitmapHeapScanState *) planstate)->ss.ss_currentScanDesc, es);
 			break;
 		case T_SampleScan:
 			show_tablesample(((SampleScan *) plan)->tablesample,
@@ -3416,25 +3417,25 @@ show_hashagg_info(AggState *aggstate, ExplainState *es)
  * If it's EXPLAIN ANALYZE, show exact/lossy pages for a BitmapHeapScan node
  */
 static void
-show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
+show_tidbitmap_info(TableScanDesc tdesc, ExplainState *es)
 {
 	if (es->format != EXPLAIN_FORMAT_TEXT)
 	{
 		ExplainPropertyInteger("Exact Heap Blocks", NULL,
-							   planstate->exact_pages, es);
+							   tdesc->exact_pages, es);
 		ExplainPropertyInteger("Lossy Heap Blocks", NULL,
-							   planstate->lossy_pages, es);
+							   tdesc->lossy_pages, es);
 	}
 	else
 	{
-		if (planstate->exact_pages > 0 || planstate->lossy_pages > 0)
+		if (tdesc->exact_pages > 0 || tdesc->lossy_pages > 0)
 		{
 			ExplainIndentText(es);
 			appendStringInfoString(es->str, "Heap Blocks:");
-			if (planstate->exact_pages > 0)
-				appendStringInfo(es->str, " exact=%ld", planstate->exact_pages);
-			if (planstate->lossy_pages > 0)
-				appendStringInfo(es->str, " lossy=%ld", planstate->lossy_pages);
+			if (tdesc->exact_pages > 0)
+				appendStringInfo(es->str, " exact=%ld", tdesc->exact_pages);
+			if (tdesc->lossy_pages > 0)
+				appendStringInfo(es->str, " lossy=%ld", tdesc->lossy_pages);
 			appendStringInfoChar(es->str, '\n');
 		}
 	}
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index c1e81ebed6..d94ded30f2 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -54,11 +54,6 @@
 
 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
 static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
-static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
-												TBMIterateResult *tbmres);
-static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
-static inline void BitmapPrefetch(BitmapHeapScanState *node,
-								  TableScanDesc scan);
 static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
 
 
@@ -74,10 +69,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
 	ExprContext *econtext;
 	TableScanDesc scan;
 	TIDBitmap  *tbm;
-	TBMIterator *tbmiterator = NULL;
-	TBMSharedIterator *shared_tbmiterator = NULL;
-	TBMIterateResult *tbmres;
-	TupleTableSlot *slot;
+	TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
 	ParallelBitmapHeapState *pstate = node->pstate;
 	dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
 
@@ -88,23 +80,10 @@ BitmapHeapNext(BitmapHeapScanState *node)
 	slot = node->ss.ss_ScanTupleSlot;
 	scan = node->ss.ss_currentScanDesc;
 	tbm = node->tbm;
-	if (pstate == NULL)
-		tbmiterator = node->tbmiterator;
-	else
-		shared_tbmiterator = node->shared_tbmiterator;
-	tbmres = node->tbmres;
 
 	/*
 	 * If we haven't yet performed the underlying index scan, do it, and begin
 	 * the iteration over the bitmap.
-	 *
-	 * For prefetching, we use *two* iterators, one for the pages we are
-	 * actually scanning and another that runs ahead of the first for
-	 * prefetching.  node->prefetch_pages tracks exactly how many pages ahead
-	 * the prefetch iterator is.  Also, node->prefetch_target tracks the
-	 * desired prefetch distance, which starts small and increases up to the
-	 * node->prefetch_maximum.  This is to avoid doing a lot of prefetching in
-	 * a scan that stops after a few tuples because of a LIMIT.
 	 */
 	if (!node->initialized)
 	{
@@ -116,17 +95,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
 				elog(ERROR, "unrecognized result from subplan");
 
 			node->tbm = tbm;
-			node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
-			node->tbmres = tbmres = NULL;
-
-#ifdef USE_PREFETCH
-			if (node->prefetch_maximum > 0)
-			{
-				node->prefetch_iterator = tbm_begin_iterate(tbm);
-				node->prefetch_pages = 0;
-				node->prefetch_target = -1;
-			}
-#endif							/* USE_PREFETCH */
+			scan->tbmiterator = tbm_begin_iterate(tbm);
 		}
 		else
 		{
@@ -148,194 +117,59 @@ BitmapHeapNext(BitmapHeapScanState *node)
 				 * dsa_pointer of the iterator state which will be used by
 				 * multiple processes to iterate jointly.
 				 */
-				pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
-#ifdef USE_PREFETCH
-				if (node->prefetch_maximum > 0)
-				{
-					pstate->prefetch_iterator =
-						tbm_prepare_shared_iterate(tbm);
-
-					/*
-					 * We don't need the mutex here as we haven't yet woke up
-					 * others.
-					 */
-					pstate->prefetch_pages = 0;
-					pstate->prefetch_target = -1;
-				}
-#endif
+				pstate->tbmiterator =
+					tbm_prepare_shared_iterate(tbm);
 
 				/* We have initialized the shared state so wake up others. */
 				BitmapDoneInitializingSharedState(pstate);
 			}
 
 			/* Allocate a private iterator and attach the shared state to it */
-			node->shared_tbmiterator = shared_tbmiterator =
+			scan->shared_tbmiterator =
 				tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
-			node->tbmres = tbmres = NULL;
-
-#ifdef USE_PREFETCH
-			if (node->prefetch_maximum > 0)
-			{
-				node->shared_prefetch_iterator =
-					tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
-			}
-#endif							/* USE_PREFETCH */
 		}
 		node->initialized = true;
+
+		/* Get the first block. if none, end of scan */
+		if (!table_scan_bitmap_next_block(scan, &node->recheck))
+			goto exit;
 	}
 
-	for (;;)
+	do
 	{
-		bool		skip_fetch;
-
-		CHECK_FOR_INTERRUPTS();
-
-		/*
-		 * Get next page of results if needed
-		 */
-		if (tbmres == NULL)
-		{
-			if (!pstate)
-				node->tbmres = tbmres = tbm_iterate(tbmiterator);
-			else
-				node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
-			if (tbmres == NULL)
-			{
-				/* no more entries in the bitmap */
-				break;
-			}
-
-			BitmapAdjustPrefetchIterator(node, tbmres);
-
-			/*
-			 * We can skip fetching the heap page if we don't need any fields
-			 * from the heap, and the bitmap entries don't need rechecking,
-			 * and all tuples on the page are visible to our transaction.
-			 *
-			 * XXX: It's a layering violation that we do these checks above
-			 * tableam, they should probably moved below it at some point.
-			 */
-			skip_fetch = (node->can_skip_fetch &&
-						  !tbmres->recheck &&
-						  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-										 tbmres->blockno,
-										 &node->vmbuffer));
-
-			if (skip_fetch)
-			{
-				/* can't be lossy in the skip_fetch case */
-				Assert(tbmres->ntuples >= 0);
-
-				/*
-				 * The number of tuples on this page is put into
-				 * node->return_empty_tuples.
-				 */
-				node->return_empty_tuples = tbmres->ntuples;
-			}
-			else if (!table_scan_bitmap_next_block(scan, tbmres))
-			{
-				/* AM doesn't think this block is valid, skip */
-				continue;
-			}
-
-			if (tbmres->ntuples >= 0)
-				node->exact_pages++;
-			else
-				node->lossy_pages++;
-
-			/* Adjust the prefetch target */
-			BitmapAdjustPrefetchTarget(node);
-		}
-		else
+		while (table_scan_bitmap_next_tuple(scan, slot))
 		{
-			/*
-			 * Continuing in previously obtained page.
-			 */
-
-#ifdef USE_PREFETCH
-
-			/*
-			 * Try to prefetch at least a few pages even before we get to the
-			 * second page if we don't stop reading after the first tuple.
-			 */
-			if (!pstate)
-			{
-				if (node->prefetch_target < node->prefetch_maximum)
-					node->prefetch_target++;
-			}
-			else if (pstate->prefetch_target < node->prefetch_maximum)
-			{
-				/* take spinlock while updating shared state */
-				SpinLockAcquire(&pstate->mutex);
-				if (pstate->prefetch_target < node->prefetch_maximum)
-					pstate->prefetch_target++;
-				SpinLockRelease(&pstate->mutex);
-			}
-#endif							/* USE_PREFETCH */
-		}
+			CHECK_FOR_INTERRUPTS();
 
-		/*
-		 * We issue prefetch requests *after* fetching the current page to try
-		 * to avoid having prefetching interfere with the main I/O. Also, this
-		 * should happen only when we have determined there is still something
-		 * to do on the current page, else we may uselessly prefetch the same
-		 * page we are just about to request for real.
-		 *
-		 * XXX: It's a layering violation that we do these checks above
-		 * tableam, they should probably moved below it at some point.
-		 */
-		BitmapPrefetch(node, scan);
-
-		if (node->return_empty_tuples > 0)
-		{
-			/*
-			 * If we don't have to fetch the tuple, just return nulls.
-			 */
-			ExecStoreAllNullTuple(slot);
+			if (!node->recheck)
+				return slot;
 
-			if (--node->return_empty_tuples == 0)
-			{
-				/* no more tuples to return in the next round */
-				node->tbmres = tbmres = NULL;
-			}
-		}
-		else
-		{
-			/*
-			 * Attempt to fetch tuple from AM.
-			 */
-			if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
-			{
-				/* nothing more to look at on this page */
-				node->tbmres = tbmres = NULL;
-				continue;
-			}
+			econtext->ecxt_scantuple = slot;
+			if (ExecQualAndReset(node->bitmapqualorig, econtext))
+				return slot;
 
-			/*
-			 * If we are using lossy info, we have to recheck the qual
-			 * conditions at every tuple.
-			 */
-			if (tbmres->recheck)
-			{
-				econtext->ecxt_scantuple = slot;
-				if (!ExecQualAndReset(node->bitmapqualorig, econtext))
-				{
-					/* Fails recheck, so drop it and loop back for another */
-					InstrCountFiltered2(node, 1);
-					ExecClearTuple(slot);
-					continue;
-				}
-			}
+			/* Fails recheck, so drop it and loop back for another */
+			InstrCountFiltered2(node, 1);
+			ExecClearTuple(slot);
 		}
+	} while (table_scan_bitmap_next_block(scan, &node->recheck));
 
-		/* OK to return this tuple */
-		return slot;
-	}
+exit:
 
 	/*
-	 * if we get here it means we are at the end of the scan..
+	 * Release iterator
 	 */
-	return ExecClearTuple(slot);
+	if (scan->tbmiterator)
+	{
+		tbm_end_iterate(scan->tbmiterator);
+		scan->tbmiterator = NULL;
+	}
+	else if (scan->shared_tbmiterator)
+	{
+		tbm_end_shared_iterate(scan->shared_tbmiterator);
+		scan->shared_tbmiterator = NULL;
+	}
+	return NULL;
 }
 
 /*
@@ -353,215 +187,6 @@ BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
 	ConditionVariableBroadcast(&pstate->cv);
 }
 
-/*
- *	BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
- */
-static inline void
-BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
-							 TBMIterateResult *tbmres)
-{
-#ifdef USE_PREFETCH
-	ParallelBitmapHeapState *pstate = node->pstate;
-
-	if (pstate == NULL)
-	{
-		TBMIterator *prefetch_iterator = node->prefetch_iterator;
-
-		if (node->prefetch_pages > 0)
-		{
-			/* The main iterator has closed the distance by one page */
-			node->prefetch_pages--;
-		}
-		else if (prefetch_iterator)
-		{
-			/* Do not let the prefetch iterator get behind the main one */
-			TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
-
-			if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
-				elog(ERROR, "prefetch and main iterators are out of sync");
-		}
-		return;
-	}
-
-	if (node->prefetch_maximum > 0)
-	{
-		TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
-
-		SpinLockAcquire(&pstate->mutex);
-		if (pstate->prefetch_pages > 0)
-		{
-			pstate->prefetch_pages--;
-			SpinLockRelease(&pstate->mutex);
-		}
-		else
-		{
-			/* Release the mutex before iterating */
-			SpinLockRelease(&pstate->mutex);
-
-			/*
-			 * In case of shared mode, we can not ensure that the current
-			 * blockno of the main iterator and that of the prefetch iterator
-			 * are same.  It's possible that whatever blockno we are
-			 * prefetching will be processed by another process.  Therefore,
-			 * we don't validate the blockno here as we do in non-parallel
-			 * case.
-			 */
-			if (prefetch_iterator)
-				tbm_shared_iterate(prefetch_iterator);
-		}
-	}
-#endif							/* USE_PREFETCH */
-}
-
-/*
- * BitmapAdjustPrefetchTarget - Adjust the prefetch target
- *
- * Increase prefetch target if it's not yet at the max.  Note that
- * we will increase it to zero after fetching the very first
- * page/tuple, then to one after the second tuple is fetched, then
- * it doubles as later pages are fetched.
- */
-static inline void
-BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
-{
-#ifdef USE_PREFETCH
-	ParallelBitmapHeapState *pstate = node->pstate;
-
-	if (pstate == NULL)
-	{
-		if (node->prefetch_target >= node->prefetch_maximum)
-			 /* don't increase any further */ ;
-		else if (node->prefetch_target >= node->prefetch_maximum / 2)
-			node->prefetch_target = node->prefetch_maximum;
-		else if (node->prefetch_target > 0)
-			node->prefetch_target *= 2;
-		else
-			node->prefetch_target++;
-		return;
-	}
-
-	/* Do an unlocked check first to save spinlock acquisitions. */
-	if (pstate->prefetch_target < node->prefetch_maximum)
-	{
-		SpinLockAcquire(&pstate->mutex);
-		if (pstate->prefetch_target >= node->prefetch_maximum)
-			 /* don't increase any further */ ;
-		else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
-			pstate->prefetch_target = node->prefetch_maximum;
-		else if (pstate->prefetch_target > 0)
-			pstate->prefetch_target *= 2;
-		else
-			pstate->prefetch_target++;
-		SpinLockRelease(&pstate->mutex);
-	}
-#endif							/* USE_PREFETCH */
-}
-
-/*
- * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
- */
-static inline void
-BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
-{
-#ifdef USE_PREFETCH
-	ParallelBitmapHeapState *pstate = node->pstate;
-
-	if (pstate == NULL)
-	{
-		TBMIterator *prefetch_iterator = node->prefetch_iterator;
-
-		if (prefetch_iterator)
-		{
-			while (node->prefetch_pages < node->prefetch_target)
-			{
-				TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
-				bool		skip_fetch;
-
-				if (tbmpre == NULL)
-				{
-					/* No more pages to prefetch */
-					tbm_end_iterate(prefetch_iterator);
-					node->prefetch_iterator = NULL;
-					break;
-				}
-				node->prefetch_pages++;
-
-				/*
-				 * If we expect not to have to actually read this heap page,
-				 * skip this prefetch call, but continue to run the prefetch
-				 * logic normally.  (Would it be better not to increment
-				 * prefetch_pages?)
-				 *
-				 * This depends on the assumption that the index AM will
-				 * report the same recheck flag for this future heap page as
-				 * it did for the current heap page; which is not a certainty
-				 * but is true in many cases.
-				 */
-				skip_fetch = (node->can_skip_fetch &&
-							  (node->tbmres ? !node->tbmres->recheck : false) &&
-							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-											 tbmpre->blockno,
-											 &node->pvmbuffer));
-
-				if (!skip_fetch)
-					PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
-			}
-		}
-
-		return;
-	}
-
-	if (pstate->prefetch_pages < pstate->prefetch_target)
-	{
-		TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
-
-		if (prefetch_iterator)
-		{
-			while (1)
-			{
-				TBMIterateResult *tbmpre;
-				bool		do_prefetch = false;
-				bool		skip_fetch;
-
-				/*
-				 * Recheck under the mutex. If some other process has already
-				 * done enough prefetching then we need not to do anything.
-				 */
-				SpinLockAcquire(&pstate->mutex);
-				if (pstate->prefetch_pages < pstate->prefetch_target)
-				{
-					pstate->prefetch_pages++;
-					do_prefetch = true;
-				}
-				SpinLockRelease(&pstate->mutex);
-
-				if (!do_prefetch)
-					return;
-
-				tbmpre = tbm_shared_iterate(prefetch_iterator);
-				if (tbmpre == NULL)
-				{
-					/* No more pages to prefetch */
-					tbm_end_shared_iterate(prefetch_iterator);
-					node->shared_prefetch_iterator = NULL;
-					break;
-				}
-
-				/* As above, skip prefetch if we expect not to need page */
-				skip_fetch = (node->can_skip_fetch &&
-							  (node->tbmres ? !node->tbmres->recheck : false) &&
-							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-											 tbmpre->blockno,
-											 &node->pvmbuffer));
-
-				if (!skip_fetch)
-					PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
-			}
-		}
-	}
-#endif							/* USE_PREFETCH */
-}
-
 /*
  * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
  */
@@ -607,29 +232,10 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
 	table_rescan(node->ss.ss_currentScanDesc, NULL);
 
 	/* release bitmaps and buffers if any */
-	if (node->tbmiterator)
-		tbm_end_iterate(node->tbmiterator);
-	if (node->prefetch_iterator)
-		tbm_end_iterate(node->prefetch_iterator);
-	if (node->shared_tbmiterator)
-		tbm_end_shared_iterate(node->shared_tbmiterator);
-	if (node->shared_prefetch_iterator)
-		tbm_end_shared_iterate(node->shared_prefetch_iterator);
 	if (node->tbm)
 		tbm_free(node->tbm);
-	if (node->vmbuffer != InvalidBuffer)
-		ReleaseBuffer(node->vmbuffer);
-	if (node->pvmbuffer != InvalidBuffer)
-		ReleaseBuffer(node->pvmbuffer);
 	node->tbm = NULL;
-	node->tbmiterator = NULL;
-	node->tbmres = NULL;
-	node->prefetch_iterator = NULL;
 	node->initialized = false;
-	node->shared_tbmiterator = NULL;
-	node->shared_prefetch_iterator = NULL;
-	node->vmbuffer = InvalidBuffer;
-	node->pvmbuffer = InvalidBuffer;
 
 	ExecScanReScan(&node->ss);
 
@@ -663,20 +269,8 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 	/*
 	 * release bitmaps and buffers if any
 	 */
-	if (node->tbmiterator)
-		tbm_end_iterate(node->tbmiterator);
-	if (node->prefetch_iterator)
-		tbm_end_iterate(node->prefetch_iterator);
 	if (node->tbm)
 		tbm_free(node->tbm);
-	if (node->shared_tbmiterator)
-		tbm_end_shared_iterate(node->shared_tbmiterator);
-	if (node->shared_prefetch_iterator)
-		tbm_end_shared_iterate(node->shared_prefetch_iterator);
-	if (node->vmbuffer != InvalidBuffer)
-		ReleaseBuffer(node->vmbuffer);
-	if (node->pvmbuffer != InvalidBuffer)
-		ReleaseBuffer(node->pvmbuffer);
 
 	/*
 	 * close heap scan
@@ -695,6 +289,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 {
 	BitmapHeapScanState *scanstate;
 	Relation	currentRelation;
+	bool		can_skip_fetch;
 
 	/* check for unsupported flags */
 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -714,32 +309,10 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
 
 	scanstate->tbm = NULL;
-	scanstate->tbmiterator = NULL;
-	scanstate->tbmres = NULL;
-	scanstate->return_empty_tuples = 0;
-	scanstate->vmbuffer = InvalidBuffer;
-	scanstate->pvmbuffer = InvalidBuffer;
-	scanstate->exact_pages = 0;
-	scanstate->lossy_pages = 0;
-	scanstate->prefetch_iterator = NULL;
-	scanstate->prefetch_pages = 0;
-	scanstate->prefetch_target = 0;
 	scanstate->pscan_len = 0;
 	scanstate->initialized = false;
-	scanstate->shared_tbmiterator = NULL;
-	scanstate->shared_prefetch_iterator = NULL;
 	scanstate->pstate = NULL;
 
-	/*
-	 * We can potentially skip fetching heap pages if we do not need any
-	 * columns of the table, either for checking non-indexable quals or for
-	 * returning data.  This test is a bit simplistic, as it checks the
-	 * stronger condition that there's no qual or return tlist at all.  But in
-	 * most cases it's probably not worth working harder than that.
-	 */
-	scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
-								 node->scan.plan.targetlist == NIL);
-
 	/*
 	 * Miscellaneous initialization
 	 *
@@ -778,19 +351,22 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->bitmapqualorig =
 		ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
 
+	scanstate->ss.ss_currentRelation = currentRelation;
+
 	/*
-	 * Maximum number of prefetches for the tablespace if configured,
-	 * otherwise the current value of the effective_io_concurrency GUC.
+	 * We can potentially skip fetching heap pages if we do not need any
+	 * columns of the table, either for checking non-indexable quals or for
+	 * returning data.  This test is a bit simplistic, as it checks the
+	 * stronger condition that there's no qual or return tlist at all.  But in
+	 * most cases it's probably not worth working harder than that.
 	 */
-	scanstate->prefetch_maximum =
-		get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
-
-	scanstate->ss.ss_currentRelation = currentRelation;
+	can_skip_fetch = (node->scan.plan.qual == NIL &&
+					  node->scan.plan.targetlist == NIL);
 
 	scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation,
 														  estate->es_snapshot,
 														  0,
-														  NULL);
+														  NULL, can_skip_fetch);
 
 	/*
 	 * all done.
@@ -875,12 +451,9 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
 	pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
 
 	pstate->tbmiterator = 0;
-	pstate->prefetch_iterator = 0;
 
 	/* Initialize the mutex */
 	SpinLockInit(&pstate->mutex);
-	pstate->prefetch_pages = 0;
-	pstate->prefetch_target = 0;
 	pstate->state = BM_INITIAL;
 
 	ConditionVariableInit(&pstate->cv);
@@ -912,11 +485,7 @@ ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
 	if (DsaPointerIsValid(pstate->tbmiterator))
 		tbm_free_shared_area(dsa, pstate->tbmiterator);
 
-	if (DsaPointerIsValid(pstate->prefetch_iterator))
-		tbm_free_shared_area(dsa, pstate->prefetch_iterator);
-
 	pstate->tbmiterator = InvalidDsaPointer;
-	pstate->prefetch_iterator = InvalidDsaPointer;
 }
 
 /* ----------------------------------------------------------------
diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c
index 0f4850065f..c0f337c265 100644
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@@ -180,7 +180,6 @@ struct TBMIterator
 	int			spageptr;		/* next spages index */
 	int			schunkptr;		/* next schunks index */
 	int			schunkbit;		/* next bit to check in current schunk */
-	TBMIterateResult output;	/* MUST BE LAST (because variable-size) */
 };
 
 /*
@@ -221,7 +220,6 @@ struct TBMSharedIterator
 	PTEntryArray *ptbase;		/* pagetable element array */
 	PTIterationArray *ptpages;	/* sorted exact page index list */
 	PTIterationArray *ptchunks; /* sorted lossy page index list */
-	TBMIterateResult output;	/* MUST BE LAST (because variable-size) */
 };
 
 /* Local function prototypes */
@@ -695,8 +693,7 @@ tbm_begin_iterate(TIDBitmap *tbm)
 	 * Create the TBMIterator struct, with enough trailing space to serve the
 	 * needs of the TBMIterateResult sub-struct.
 	 */
-	iterator = (TBMIterator *) palloc(sizeof(TBMIterator) +
-									  MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber));
+	iterator = (TBMIterator *) palloc(sizeof(TBMIterator));
 	iterator->tbm = tbm;
 
 	/*
@@ -957,20 +954,21 @@ tbm_advance_schunkbit(PagetableEntry *chunk, int *schunkbitp)
 /*
  * tbm_iterate - scan through next page of a TIDBitmap
  *
- * Returns a TBMIterateResult representing one page, or NULL if there are
- * no more pages to scan.  Pages are guaranteed to be delivered in numerical
- * order.  If result->ntuples < 0, then the bitmap is "lossy" and failed to
- * remember the exact tuples to look at on this page --- the caller must
- * examine all tuples on the page and check if they meet the intended
- * condition.  If result->recheck is true, only the indicated tuples need
- * be examined, but the condition must be rechecked anyway.  (For ease of
- * testing, recheck is always set true when ntuples < 0.)
+ * Caller must pass in a TBMIterateResult to be filled.
+ *
+ * Pages are guaranteed to be delivered in numerical order.  tbmres->blockno is
+ * set to InvalidBlockNumber when there are no more pages to scan. If
+ * tbmres->ntuples < 0, then the bitmap is "lossy" and failed to remember the
+ * exact tuples to look at on this page --- the caller must examine all tuples
+ * on the page and check if they meet the intended condition.  If
+ * tbmres->recheck is true, only the indicated tuples need be examined, but the
+ * condition must be rechecked anyway.  (For ease of testing, recheck is always
+ * set true when ntuples < 0.)
  */
-TBMIterateResult *
-tbm_iterate(TBMIterator *iterator)
+void
+tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres)
 {
 	TIDBitmap  *tbm = iterator->tbm;
-	TBMIterateResult *output = &(iterator->output);
 
 	Assert(tbm->iterating == TBM_ITERATING_PRIVATE);
 
@@ -998,6 +996,7 @@ tbm_iterate(TBMIterator *iterator)
 	 * If both chunk and per-page data remain, must output the numerically
 	 * earlier page.
 	 */
+	Assert(tbmres);
 	if (iterator->schunkptr < tbm->nchunks)
 	{
 		PagetableEntry *chunk = tbm->schunks[iterator->schunkptr];
@@ -1008,11 +1007,11 @@ tbm_iterate(TBMIterator *iterator)
 			chunk_blockno < tbm->spages[iterator->spageptr]->blockno)
 		{
 			/* Return a lossy page indicator from the chunk */
-			output->blockno = chunk_blockno;
-			output->ntuples = -1;
-			output->recheck = true;
+			tbmres->blockno = chunk_blockno;
+			tbmres->ntuples = -1;
+			tbmres->recheck = true;
 			iterator->schunkbit++;
-			return output;
+			return;
 		}
 	}
 
@@ -1028,16 +1027,17 @@ tbm_iterate(TBMIterator *iterator)
 			page = tbm->spages[iterator->spageptr];
 
 		/* scan bitmap to extract individual offset numbers */
-		ntuples = tbm_extract_page_tuple(page, output);
-		output->blockno = page->blockno;
-		output->ntuples = ntuples;
-		output->recheck = page->recheck;
+		ntuples = tbm_extract_page_tuple(page, tbmres);
+		tbmres->blockno = page->blockno;
+		tbmres->ntuples = ntuples;
+		tbmres->recheck = page->recheck;
 		iterator->spageptr++;
-		return output;
+		return;
 	}
 
 	/* Nothing more in the bitmap */
-	return NULL;
+	tbmres->blockno = InvalidBlockNumber;
+	return;
 }
 
 /*
@@ -1047,10 +1047,9 @@ tbm_iterate(TBMIterator *iterator)
  *	across multiple processes.  We need to acquire the iterator LWLock,
  *	before accessing the shared members.
  */
-TBMIterateResult *
-tbm_shared_iterate(TBMSharedIterator *iterator)
+void
+tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres)
 {
-	TBMIterateResult *output = &iterator->output;
 	TBMSharedIteratorState *istate = iterator->state;
 	PagetableEntry *ptbase = NULL;
 	int		   *idxpages = NULL;
@@ -1101,13 +1100,13 @@ tbm_shared_iterate(TBMSharedIterator *iterator)
 			chunk_blockno < ptbase[idxpages[istate->spageptr]].blockno)
 		{
 			/* Return a lossy page indicator from the chunk */
-			output->blockno = chunk_blockno;
-			output->ntuples = -1;
-			output->recheck = true;
+			tbmres->blockno = chunk_blockno;
+			tbmres->ntuples = -1;
+			tbmres->recheck = true;
 			istate->schunkbit++;
 
 			LWLockRelease(&istate->lock);
-			return output;
+			return;
 		}
 	}
 
@@ -1117,21 +1116,22 @@ tbm_shared_iterate(TBMSharedIterator *iterator)
 		int			ntuples;
 
 		/* scan bitmap to extract individual offset numbers */
-		ntuples = tbm_extract_page_tuple(page, output);
-		output->blockno = page->blockno;
-		output->ntuples = ntuples;
-		output->recheck = page->recheck;
+		ntuples = tbm_extract_page_tuple(page, tbmres);
+		tbmres->blockno = page->blockno;
+		tbmres->ntuples = ntuples;
+		tbmres->recheck = page->recheck;
 		istate->spageptr++;
 
 		LWLockRelease(&istate->lock);
 
-		return output;
+		return;
 	}
 
 	LWLockRelease(&istate->lock);
 
 	/* Nothing more in the bitmap */
-	return NULL;
+	tbmres->blockno = InvalidBlockNumber;
+	return;
 }
 
 /*
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index bc53f9f4d2..fdbbeb17c7 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -20,6 +20,7 @@
 #include "access/skey.h"
 #include "access/table.h"		/* for backward compatibility */
 #include "access/tableam.h"
+#include "nodes/execnodes.h"
 #include "nodes/lockoptions.h"
 #include "nodes/primnodes.h"
 #include "storage/bufpage.h"
@@ -76,9 +77,15 @@ typedef struct HeapScanDescData
 	struct PgStreamingRead *pgsr;
 
 	/* these fields only used in page-at-a-time mode and for bitmap scans */
+
+	/*
+	 * MFIXME: not sure if vmbuffer is being released in the right place.
+	 */
+	Buffer		vmbuffer;		/* current VM buffer for BHS */
 	int			rs_cindex;		/* current tuple's index in vistuples */
 	int			rs_ntuples;		/* number of visible tuples on page */
 	OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];	/* their offsets */
+	int			empty_tuples;
 }			HeapScanDescData;
 typedef struct HeapScanDescData *HeapScanDesc;
 
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 521043304a..cdb44316a3 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -16,6 +16,7 @@
 
 #include "access/htup_details.h"
 #include "access/itup.h"
+#include "nodes/tidbitmap.h"
 #include "port/atomics.h"
 #include "storage/buf.h"
 #include "storage/spin.h"
@@ -40,6 +41,13 @@ typedef struct TableScanDescData
 	ItemPointerData rs_mintid;
 	ItemPointerData rs_maxtid;
 
+	/* Only used for Bitmap table scans */
+	TBMIterator *tbmiterator;
+	TBMSharedIterator *shared_tbmiterator;
+	long		exact_pages;
+	long		lossy_pages;
+	int			empty_tuples;
+
 	/*
 	 * Information about type and behaviour of the scan, a bitmask of members
 	 * of the ScanOptions enum (see tableam.h).
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 5f8474871d..b5626805a6 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -32,6 +32,7 @@ extern PGDLLIMPORT char *default_table_access_method;
 extern PGDLLIMPORT bool synchronize_seqscans;
 
 
+struct BitmapHeapScanState;
 struct BulkInsertStateData;
 struct IndexInfo;
 struct SampleScanState;
@@ -62,7 +63,8 @@ typedef enum ScanOptions
 
 	/* unregister snapshot at scan end? */
 	SO_TEMP_SNAPSHOT = 1 << 9,
-}			ScanOptions;
+	SO_CAN_SKIP_FETCH = 1 << 10
+} ScanOptions;
 
 /*
  * Result codes for table_{update,delete,lock_tuple}, and for visibility
@@ -773,53 +775,36 @@ typedef struct TableAmRoutine
 	 */
 
 	/*
-	 * Prepare to fetch / check / return tuples from `tbmres->blockno` as part
-	 * of a bitmap table scan. `scan` was started via table_beginscan_bm().
-	 * Return false if there are no tuples to be found on the page, true
-	 * otherwise.
+	 * Prepare to fetch / check / return tuples obtained from the streaming
+	 * read helper as part of a bitmap table scan. `scan` was started via
+	 * table_beginscan_bm(). Return false if the relation is exhausted and
+	 * true otherwise.
 	 *
 	 * This will typically read and pin the target block, and do the necessary
 	 * work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
 	 * make sense to perform tuple visibility checks at this time). For some
-	 * AMs it will make more sense to do all the work referencing `tbmres`
-	 * contents here, for others it might be better to defer more work to
-	 * scan_bitmap_next_tuple.
+	 * AMs, it might be better to defer more work to scan_bitmap_next_tuple().
 	 *
-	 * If `tbmres->blockno` is -1, this is a lossy scan and all visible tuples
-	 * on the page have to be returned, otherwise the tuples at offsets in
-	 * `tbmres->offsets` need to be returned.
+	 * After examining the page from the TBMIterateResult returned by the
+	 * streaming read helper, most AMs will set relevant fields in the `scan`
+	 * for the benefit of scan_bitmap_next_tuple(). All AMs must set
+	 * `recheck`, passed in by the caller, based on the value in the
+	 * TBMIterateResult so that BitmapHeapNext() can determine whether or not
+	 * to yield tuples.
 	 *
-	 * XXX: Currently this may only be implemented if the AM uses md.c as its
-	 * storage manager, and uses ItemPointer->ip_blkid in a manner that maps
-	 * blockids directly to the underlying storage. nodeBitmapHeapscan.c
-	 * performs prefetching directly using that interface.  This probably
-	 * needs to be rectified at a later point.
-	 *
-	 * XXX: Currently this may only be implemented if the AM uses the
-	 * visibilitymap, as nodeBitmapHeapscan.c unconditionally accesses it to
-	 * perform prefetching.  This probably needs to be rectified at a later
-	 * point.
-	 *
-	 * Optional callback, but either both scan_bitmap_next_block and
-	 * scan_bitmap_next_tuple need to exist, or neither.
+	 * Optional callback, but scan_bitmap_setup, scan_bitmap_next_block, and
+	 * scan_bitmap_next_tuple need to exist, or none of them.
 	 */
-	bool		(*scan_bitmap_next_block) (TableScanDesc scan,
-										   struct TBMIterateResult *tbmres);
+	bool		(*scan_bitmap_next_block) (TableScanDesc scan, bool *recheck);
 
 	/*
 	 * Fetch the next tuple of a bitmap table scan into `slot` and return true
 	 * if a visible tuple was found, false otherwise.
 	 *
-	 * For some AMs it will make more sense to do all the work referencing
-	 * `tbmres` contents in scan_bitmap_next_block, for others it might be
-	 * better to defer more work to this callback.
-	 *
-	 * Optional callback, but either both scan_bitmap_next_block and
-	 * scan_bitmap_next_tuple need to exist, or neither.
+	 * Optional callback, but scan_bitmap_setup, scan_bitmap_next_block, and
+	 * scan_bitmap_next_tuple need to exist, or none of them.
 	 */
-	bool		(*scan_bitmap_next_tuple) (TableScanDesc scan,
-										   struct TBMIterateResult *tbmres,
-										   TupleTableSlot *slot);
+	bool		(*scan_bitmap_next_tuple) (TableScanDesc scan, TupleTableSlot *slot);
 
 	/*
 	 * Prepare to fetch tuples from the next block in a sample scan. Return
@@ -944,10 +929,13 @@ table_beginscan_strat(Relation rel, Snapshot snapshot,
  */
 static inline TableScanDesc
 table_beginscan_bm(Relation rel, Snapshot snapshot,
-				   int nkeys, struct ScanKeyData *key)
+				   int nkeys, struct ScanKeyData *key, bool can_skip_fetch)
 {
 	uint32		flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
 
+	if (can_skip_fetch)
+		flags |= SO_CAN_SKIP_FETCH;
+
 	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
 }
 
@@ -1955,8 +1943,7 @@ table_relation_estimate_size(Relation rel, int32 *attr_widths,
  * used after verifying the presence (at plan time or such).
  */
 static inline bool
-table_scan_bitmap_next_block(TableScanDesc scan,
-							 struct TBMIterateResult *tbmres)
+table_scan_bitmap_next_block(TableScanDesc scan, bool *recheck)
 {
 	/*
 	 * We don't expect direct calls to table_scan_bitmap_next_block with valid
@@ -1966,8 +1953,7 @@ table_scan_bitmap_next_block(TableScanDesc scan,
 	if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
 		elog(ERROR, "unexpected table_scan_bitmap_next_block call during logical decoding");
 
-	return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan,
-														   tbmres);
+	return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan, recheck);
 }
 
 /*
@@ -1979,9 +1965,7 @@ table_scan_bitmap_next_block(TableScanDesc scan,
  * returned false.
  */
 static inline bool
-table_scan_bitmap_next_tuple(TableScanDesc scan,
-							 struct TBMIterateResult *tbmres,
-							 TupleTableSlot *slot)
+table_scan_bitmap_next_tuple(TableScanDesc scan, TupleTableSlot *slot)
 {
 	/*
 	 * We don't expect direct calls to table_scan_bitmap_next_tuple with valid
@@ -1992,7 +1976,6 @@ table_scan_bitmap_next_tuple(TableScanDesc scan,
 		elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding");
 
 	return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
-														   tbmres,
 														   slot);
 }
 
diff --git a/src/include/executor/nodeBitmapHeapscan.h b/src/include/executor/nodeBitmapHeapscan.h
index ea003a9caa..39ae7b29ef 100644
--- a/src/include/executor/nodeBitmapHeapscan.h
+++ b/src/include/executor/nodeBitmapHeapscan.h
@@ -28,5 +28,6 @@ extern void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
 										  ParallelContext *pcxt);
 extern void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
 										   ParallelWorkerContext *pwcxt);
+extern void table_bitmap_scan_setup(BitmapHeapScanState *scanstate);
 
 #endif							/* NODEBITMAPHEAPSCAN_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 561fdd98f1..535dfef6f0 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1675,11 +1675,7 @@ typedef enum
 /* ----------------
  *	 ParallelBitmapHeapState information
  *		tbmiterator				iterator for scanning current pages
- *		prefetch_iterator		iterator for prefetching ahead of current page
- *		mutex					mutual exclusion for the prefetching variable
- *								and state
- *		prefetch_pages			# pages prefetch iterator is ahead of current
- *		prefetch_target			current target prefetch distance
+ *		mutex					mutual exclusion for the state
  *		state					current state of the TIDBitmap
  *		cv						conditional wait variable
  *		phs_snapshot_data		snapshot data shared to workers
@@ -1688,10 +1684,7 @@ typedef enum
 typedef struct ParallelBitmapHeapState
 {
 	dsa_pointer tbmiterator;
-	dsa_pointer prefetch_iterator;
 	slock_t		mutex;
-	int			prefetch_pages;
-	int			prefetch_target;
 	SharedBitmapState state;
 	ConditionVariable cv;
 	char		phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
@@ -1702,22 +1695,9 @@ typedef struct ParallelBitmapHeapState
  *
  *		bitmapqualorig	   execution state for bitmapqualorig expressions
  *		tbm				   bitmap obtained from child index scan(s)
- *		tbmiterator		   iterator for scanning current pages
- *		tbmres			   current-page data
- *		can_skip_fetch	   can we potentially skip tuple fetches in this scan?
- *		return_empty_tuples number of empty tuples to return
- *		vmbuffer		   buffer for visibility-map lookups
- *		pvmbuffer		   ditto, for prefetched pages
- *		exact_pages		   total number of exact pages retrieved
- *		lossy_pages		   total number of lossy pages retrieved
- *		prefetch_iterator  iterator for prefetching ahead of current page
- *		prefetch_pages	   # pages prefetch iterator is ahead of current
- *		prefetch_target    current target prefetch distance
- *		prefetch_maximum   maximum value for prefetch_target
+ *		recheck			   whether or not the tuple needs to be rechecked before returning
  *		pscan_len		   size of the shared memory for parallel bitmap
  *		initialized		   is node is ready to iterate
- *		shared_tbmiterator	   shared iterator
- *		shared_prefetch_iterator shared iterator for prefetching
  *		pstate			   shared state for parallel bitmap scan
  * ----------------
  */
@@ -1726,22 +1706,9 @@ typedef struct BitmapHeapScanState
 	ScanState	ss;				/* its first field is NodeTag */
 	ExprState  *bitmapqualorig;
 	TIDBitmap  *tbm;
-	TBMIterator *tbmiterator;
-	TBMIterateResult *tbmres;
-	bool		can_skip_fetch;
-	int			return_empty_tuples;
-	Buffer		vmbuffer;
-	Buffer		pvmbuffer;
-	long		exact_pages;
-	long		lossy_pages;
-	TBMIterator *prefetch_iterator;
-	int			prefetch_pages;
-	int			prefetch_target;
-	int			prefetch_maximum;
+	bool		recheck;
 	Size		pscan_len;
 	bool		initialized;
-	TBMSharedIterator *shared_tbmiterator;
-	TBMSharedIterator *shared_prefetch_iterator;
 	ParallelBitmapHeapState *pstate;
 } BitmapHeapScanState;
 
diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h
index 1945f0639b..97e40097fd 100644
--- a/src/include/nodes/tidbitmap.h
+++ b/src/include/nodes/tidbitmap.h
@@ -64,8 +64,8 @@ extern bool tbm_is_empty(const TIDBitmap *tbm);
 
 extern TBMIterator *tbm_begin_iterate(TIDBitmap *tbm);
 extern dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm);
-extern TBMIterateResult *tbm_iterate(TBMIterator *iterator);
-extern TBMIterateResult *tbm_shared_iterate(TBMSharedIterator *iterator);
+extern void tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres);
+extern void tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres);
 extern void tbm_end_iterate(TBMIterator *iterator);
 extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
 extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
-- 
2.39.2

