From 202b16d3a381210e8dbee69e68a8310be8ee11d2 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Mon, 12 Feb 2024 20:15:05 -0500
Subject: [PATCH v1 06/11] Push BitmapHeapScan skip fetch optimization into
 table AM

This resolves the long-standing FIXME in BitmapHeapNext() which said that
the optmization to skip fetching blocks of the underlying table when
none of the column data was needed should be pushed into the table AM
specific code.

heapam_scan_bitmap_next_block() now does the visibility check and
accounting of empty tuples to be returned; while
heapam_scan_bitmap_next_tuple() prepares the slot to return empty
tuples.

The table AM agnostic functions for prefetching still need to know if
skipping fetching is permitted for this scan. However, this dependency
will be removed when that prefetching code is removed in favor of the
upcoming streaming read API.
---
 src/backend/access/heap/heapam.c          |  10 +++
 src/backend/access/heap/heapam_handler.c  |  29 +++++++
 src/backend/executor/nodeBitmapHeapscan.c | 100 ++++++----------------
 src/include/access/heapam.h               |   2 +
 src/include/access/tableam.h              |  17 ++--
 src/include/nodes/execnodes.h             |   6 --
 6 files changed, 74 insertions(+), 90 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 707460a5364..7aae1ecf0a9 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -955,6 +955,8 @@ heap_beginscan(Relation relation, Snapshot snapshot,
 	scan->rs_base.rs_flags = flags;
 	scan->rs_base.rs_parallel = parallel_scan;
 	scan->rs_strategy = NULL;	/* set in initscan */
+	scan->vmbuffer = InvalidBuffer;
+	scan->empty_tuples = 0;
 
 	/*
 	 * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
@@ -1043,6 +1045,10 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
 	if (BufferIsValid(scan->rs_cbuf))
 		ReleaseBuffer(scan->rs_cbuf);
 
+	if (BufferIsValid(scan->vmbuffer))
+		ReleaseBuffer(scan->vmbuffer);
+	scan->vmbuffer = InvalidBuffer;
+
 	/*
 	 * reinitialize scan descriptor
 	 */
@@ -1062,6 +1068,10 @@ heap_endscan(TableScanDesc sscan)
 	if (BufferIsValid(scan->rs_cbuf))
 		ReleaseBuffer(scan->rs_cbuf);
 
+	if (BufferIsValid(scan->vmbuffer))
+		ReleaseBuffer(scan->vmbuffer);
+	scan->vmbuffer = InvalidBuffer;
+
 	/*
 	 * decrement relation reference count and free scan descriptor storage
 	 */
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 716d477e271..baba09c87c0 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -27,6 +27,7 @@
 #include "access/syncscan.h"
 #include "access/tableam.h"
 #include "access/tsmapi.h"
+#include "access/visibilitymap.h"
 #include "access/xact.h"
 #include "catalog/catalog.h"
 #include "catalog/index.h"
@@ -2124,6 +2125,24 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 	hscan->rs_cindex = 0;
 	hscan->rs_ntuples = 0;
 
+	/*
+	 * We can skip fetching the heap page if we don't need any fields from the
+	 * heap, and the bitmap entries don't need rechecking, and all tuples on
+	 * the page are visible to our transaction.
+	 */
+	if (scan->rs_flags & SO_CAN_SKIP_FETCH &&
+		!tbmres->recheck &&
+		VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &hscan->vmbuffer))
+	{
+		/* can't be lossy in the skip_fetch case */
+		Assert(tbmres->ntuples >= 0);
+		Assert(hscan->empty_tuples >= 0);
+
+		hscan->empty_tuples += tbmres->ntuples;
+
+		return true;
+	}
+
 	/*
 	 * Ignore any claimed entries past what we think is the end of the
 	 * relation. It may have been extended after the start of our scan (we
@@ -2235,6 +2254,16 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan,
 	Page		page;
 	ItemId		lp;
 
+	if (hscan->empty_tuples > 0)
+	{
+		/*
+		 * If we don't have to fetch the tuple, just return nulls.
+		 */
+		ExecStoreAllNullTuple(slot);
+		hscan->empty_tuples--;
+		return true;
+	}
+
 	/*
 	 * Out of range?  If so, nothing more to look at on this page
 	 */
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 9372b49bfaa..c0fb06c9688 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -108,6 +108,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
 	 */
 	if (!node->initialized)
 	{
+		bool can_skip_fetch;
 		/*
 		 * We can potentially skip fetching heap pages if we do not need any
 		 * columns of the table, either for checking non-indexable quals or
@@ -115,7 +116,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
 		 * the stronger condition that there's no qual or return tlist at all.
 		 * But in most cases it's probably not worth working harder than that.
 		 */
-		node->can_skip_fetch = (node->ss.ps.plan->qual == NIL &&
+		can_skip_fetch = (node->ss.ps.plan->qual == NIL &&
 								node->ss.ps.plan->targetlist == NIL);
 
 		if (!pstate)
@@ -199,7 +200,8 @@ BitmapHeapNext(BitmapHeapScanState *node)
 																	node->ss.ps.state->es_snapshot,
 																	node->worker_snapshot,
 																	0,
-																	NULL);
+																	NULL,
+																	can_skip_fetch);
 		}
 
 		node->initialized = true;
@@ -207,8 +209,6 @@ BitmapHeapNext(BitmapHeapScanState *node)
 
 	for (;;)
 	{
-		bool		skip_fetch;
-
 		CHECK_FOR_INTERRUPTS();
 
 		/*
@@ -228,32 +228,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
 
 			BitmapAdjustPrefetchIterator(node, tbmres->blockno);
 
-			/*
-			 * We can skip fetching the heap page if we don't need any fields
-			 * from the heap, and the bitmap entries don't need rechecking,
-			 * and all tuples on the page are visible to our transaction.
-			 *
-			 * XXX: It's a layering violation that we do these checks above
-			 * tableam, they should probably moved below it at some point.
-			 */
-			skip_fetch = (node->can_skip_fetch &&
-						  !tbmres->recheck &&
-						  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-										 tbmres->blockno,
-										 &node->vmbuffer));
-
-			if (skip_fetch)
-			{
-				/* can't be lossy in the skip_fetch case */
-				Assert(tbmres->ntuples >= 0);
-
-				/*
-				 * The number of tuples on this page is put into
-				 * node->return_empty_tuples.
-				 */
-				node->return_empty_tuples = tbmres->ntuples;
-			}
-			else if (!table_scan_bitmap_next_block(scan, tbmres))
+			if (!table_scan_bitmap_next_block(scan, tbmres))
 			{
 				/* AM doesn't think this block is valid, skip */
 				continue;
@@ -307,46 +282,30 @@ BitmapHeapNext(BitmapHeapScanState *node)
 		 */
 		BitmapPrefetch(node, scan);
 
-		if (node->return_empty_tuples > 0)
+		/*
+		 * Attempt to fetch tuple from AM.
+		 */
+		if (!table_scan_bitmap_next_tuple(scan, slot))
 		{
-			/*
-			 * If we don't have to fetch the tuple, just return nulls.
-			 */
-			ExecStoreAllNullTuple(slot);
-
-			if (--node->return_empty_tuples == 0)
-			{
-				/* no more tuples to return in the next round */
-				node->tbmres = tbmres = NULL;
-			}
+			/* nothing more to look at on this page */
+			node->tbmres = tbmres = NULL;
+			continue;
 		}
-		else
+
+		/*
+		 * If we are using lossy info, we have to recheck the qual conditions
+		 * at every tuple.
+		 */
+		if (tbmres->recheck)
 		{
-			/*
-			 * Attempt to fetch tuple from AM.
-			 */
-			if (!table_scan_bitmap_next_tuple(scan, slot))
+			econtext->ecxt_scantuple = slot;
+			if (!ExecQualAndReset(node->bitmapqualorig, econtext))
 			{
-				/* nothing more to look at on this page */
-				node->tbmres = tbmres = NULL;
+				/* Fails recheck, so drop it and loop back for another */
+				InstrCountFiltered2(node, 1);
+				ExecClearTuple(slot);
 				continue;
 			}
-
-			/*
-			 * If we are using lossy info, we have to recheck the qual
-			 * conditions at every tuple.
-			 */
-			if (tbmres->recheck)
-			{
-				econtext->ecxt_scantuple = slot;
-				if (!ExecQualAndReset(node->bitmapqualorig, econtext))
-				{
-					/* Fails recheck, so drop it and loop back for another */
-					InstrCountFiltered2(node, 1);
-					ExecClearTuple(slot);
-					continue;
-				}
-			}
 		}
 
 		/* OK to return this tuple */
@@ -518,7 +477,8 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
 				 * it did for the current heap page; which is not a certainty
 				 * but is true in many cases.
 				 */
-				skip_fetch = (node->can_skip_fetch &&
+
+				skip_fetch = (scan->rs_flags & SO_CAN_SKIP_FETCH &&
 							  !tbmpre->recheck &&
 							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
 											 tbmpre->blockno,
@@ -569,7 +529,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
 				}
 
 				/* As above, skip prefetch if we expect not to need page */
-				skip_fetch = (node->can_skip_fetch &&
+				skip_fetch = (scan->rs_flags & SO_CAN_SKIP_FETCH &&
 							  !tbmpre->recheck &&
 							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
 											 tbmpre->blockno,
@@ -639,8 +599,6 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
 		tbm_end_shared_iterate(node->shared_prefetch_iterator);
 	if (node->tbm)
 		tbm_free(node->tbm);
-	if (node->vmbuffer != InvalidBuffer)
-		ReleaseBuffer(node->vmbuffer);
 	if (node->pvmbuffer != InvalidBuffer)
 		ReleaseBuffer(node->pvmbuffer);
 	node->tbm = NULL;
@@ -650,7 +608,6 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
 	node->initialized = false;
 	node->shared_tbmiterator = NULL;
 	node->shared_prefetch_iterator = NULL;
-	node->vmbuffer = InvalidBuffer;
 	node->pvmbuffer = InvalidBuffer;
 
 	ExecScanReScan(&node->ss);
@@ -695,8 +652,6 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 		tbm_end_shared_iterate(node->shared_tbmiterator);
 	if (node->shared_prefetch_iterator)
 		tbm_end_shared_iterate(node->shared_prefetch_iterator);
-	if (node->vmbuffer != InvalidBuffer)
-		ReleaseBuffer(node->vmbuffer);
 	if (node->pvmbuffer != InvalidBuffer)
 		ReleaseBuffer(node->pvmbuffer);
 
@@ -739,8 +694,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->tbm = NULL;
 	scanstate->tbmiterator = NULL;
 	scanstate->tbmres = NULL;
-	scanstate->return_empty_tuples = 0;
-	scanstate->vmbuffer = InvalidBuffer;
 	scanstate->pvmbuffer = InvalidBuffer;
 	scanstate->exact_pages = 0;
 	scanstate->lossy_pages = 0;
@@ -752,7 +705,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->shared_tbmiterator = NULL;
 	scanstate->shared_prefetch_iterator = NULL;
 	scanstate->pstate = NULL;
-	scanstate->can_skip_fetch = false;
 	scanstate->worker_snapshot = NULL;
 
 	/*
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 4b133f68593..2fc369a18ff 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -73,6 +73,8 @@ typedef struct HeapScanDescData
 	ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
 
 	/* these fields only used in page-at-a-time mode and for bitmap scans */
+	Buffer		vmbuffer;		/* for checking if can skip fetch */
+	int			empty_tuples;	/* count of all NULL tuples to be returned */
 	int			rs_cindex;		/* current tuple's index in vistuples */
 	int			rs_ntuples;		/* number of visible tuples on page */
 	OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];	/* their offsets */
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 77f32a7472d..05e700c5055 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -62,6 +62,7 @@ typedef enum ScanOptions
 
 	/* unregister snapshot at scan end? */
 	SO_TEMP_SNAPSHOT = 1 << 9,
+	SO_CAN_SKIP_FETCH = 1 << 10,
 }			ScanOptions;
 
 /*
@@ -780,10 +781,8 @@ typedef struct TableAmRoutine
 	 *
 	 * This will typically read and pin the target block, and do the necessary
 	 * work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
-	 * make sense to perform tuple visibility checks at this time). For some
-	 * AMs it will make more sense to do all the work referencing `tbmres`
-	 * contents here, for others it might be better to defer more work to
-	 * scan_bitmap_next_tuple.
+	 * make sense to perform tuple visibility checks at this time). All work
+	 * referencing `tbmres` must be done here.
 	 *
 	 * If `tbmres->blockno` is -1, this is a lossy scan and all visible tuples
 	 * on the page have to be returned, otherwise the tuples at offsets in
@@ -795,11 +794,6 @@ typedef struct TableAmRoutine
 	 * performs prefetching directly using that interface.  This probably
 	 * needs to be rectified at a later point.
 	 *
-	 * XXX: Currently this may only be implemented if the AM uses the
-	 * visibilitymap, as nodeBitmapHeapscan.c unconditionally accesses it to
-	 * perform prefetching.  This probably needs to be rectified at a later
-	 * point.
-	 *
 	 * Optional callback, but either both scan_bitmap_next_block and
 	 * scan_bitmap_next_tuple need to exist, or neither.
 	 */
@@ -944,11 +938,14 @@ extern void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot);
  */
 static inline TableScanDesc
 table_beginscan_bm(Relation rel, Snapshot snapshot, Snapshot worker_snapshot,
-				   int nkeys, struct ScanKeyData *key)
+				   int nkeys, struct ScanKeyData *key, bool can_skip_fetch)
 {
 	TableScanDesc result;
 	uint32		flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
 
+	if (can_skip_fetch)
+		flags |= SO_CAN_SKIP_FETCH;
+
 	result = rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
 	if (worker_snapshot)
 		table_scan_update_snapshot(result, worker_snapshot);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 00c75fb10e2..9392923eb32 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1711,9 +1711,6 @@ typedef struct ParallelBitmapHeapState
  *		tbm				   bitmap obtained from child index scan(s)
  *		tbmiterator		   iterator for scanning current pages
  *		tbmres			   current-page data
- *		can_skip_fetch	   can we potentially skip tuple fetches in this scan?
- *		return_empty_tuples number of empty tuples to return
- *		vmbuffer		   buffer for visibility-map lookups
  *		pvmbuffer		   ditto, for prefetched pages
  *		exact_pages		   total number of exact pages retrieved
  *		lossy_pages		   total number of lossy pages retrieved
@@ -1736,9 +1733,6 @@ typedef struct BitmapHeapScanState
 	TIDBitmap  *tbm;
 	TBMIterator *tbmiterator;
 	TBMIterateResult *tbmres;
-	bool		can_skip_fetch;
-	int			return_empty_tuples;
-	Buffer		vmbuffer;
 	Buffer		pvmbuffer;
 	long		exact_pages;
 	long		lossy_pages;
-- 
2.37.2

