From f1540b476ba02ae962f8d44fce53317219543697 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Sat, 6 Apr 2024 15:46:05 -0400
Subject: [PATCH v19 18/21] Lift and shift BitmapHeapScan prefetch funcs to
 heap AM

We will soon move all the members for managing prefetching from the
BitmapHeapScanState to the BitmapHeapScanDesc. First move the prefetch
functions as-is to heapam_handler.c

Author: Melanie Plageman
Discussion: https://postgr.es/m/CAAKRu_ZwCwWFeL_H3ia26bP2e7HiKLWt0ZmGXPVwPO6uXq0vaA%40mail.gmail.com
---
 src/backend/access/heap/heapam_handler.c  | 214 +++++++++++++++++++++
 src/backend/executor/nodeBitmapHeapscan.c | 218 +---------------------
 src/include/access/heapam.h               |   3 +
 3 files changed, 218 insertions(+), 217 deletions(-)

diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 15fc58f4fb7..85564fef2e0 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -2180,6 +2180,220 @@ heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
 									   HEAP_USABLE_BYTES_PER_PAGE);
 }
 
+/*
+ *	BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
+ *
+ *	We keep track of how far the prefetch iterator is ahead of the main
+ *	iterator in prefetch_pages. For each block the main iterator returns, we
+ *	decrement prefetch_pages.
+ */
+void
+BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
+{
+#ifdef USE_PREFETCH
+	ParallelBitmapHeapState *pstate = node->pstate;
+	UnifiedTBMIterator *prefetch_iterator = &node->prefetch_iterator;
+	TBMIterateResult *tbmpre;
+
+	if (pstate == NULL)
+	{
+		if (node->prefetch_pages > 0)
+		{
+			/* The main iterator has closed the distance by one page */
+			node->prefetch_pages--;
+		}
+		else if (!prefetch_iterator->exhausted)
+		{
+			/* Do not let the prefetch iterator get behind the main one */
+			tbmpre = unified_tbm_iterate(prefetch_iterator);
+			node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber;
+		}
+		return;
+	}
+
+	/*
+	 * XXX: There is a known issue with keeping the prefetch and current block
+	 * iterators in sync for parallel bitmapheapscans. This can lead to
+	 * prefetching blocks that have already been read. See the discussion
+	 * here:
+	 * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de
+	 * Note that moving the call site of BitmapAdjustPrefetchIterator()
+	 * exacerbates the effects of this bug.
+	 */
+	if (node->prefetch_maximum > 0)
+	{
+		SpinLockAcquire(&pstate->mutex);
+		if (pstate->prefetch_pages > 0)
+		{
+			pstate->prefetch_pages--;
+			SpinLockRelease(&pstate->mutex);
+		}
+		else
+		{
+			/* Release the mutex before iterating */
+			SpinLockRelease(&pstate->mutex);
+
+			/*
+			 * In case of shared mode, we can not ensure that the current
+			 * blockno of the main iterator and that of the prefetch iterator
+			 * are same.  It's possible that whatever blockno we are
+			 * prefetching will be processed by another process.  Therefore,
+			 * we don't validate the blockno here as we do in non-parallel
+			 * case.
+			 */
+			if (!prefetch_iterator->exhausted)
+			{
+				tbmpre = unified_tbm_iterate(prefetch_iterator);
+				node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber;
+			}
+		}
+	}
+#endif							/* USE_PREFETCH */
+}
+
+/*
+ * BitmapAdjustPrefetchTarget - Adjust the prefetch target
+ *
+ * Increase prefetch target if it's not yet at the max.  Note that
+ * we will increase it to zero after fetching the very first
+ * page/tuple, then to one after the second tuple is fetched, then
+ * it doubles as later pages are fetched.
+ */
+void
+BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
+{
+#ifdef USE_PREFETCH
+	ParallelBitmapHeapState *pstate = node->pstate;
+
+	if (pstate == NULL)
+	{
+		if (node->prefetch_target >= node->prefetch_maximum)
+			 /* don't increase any further */ ;
+		else if (node->prefetch_target >= node->prefetch_maximum / 2)
+			node->prefetch_target = node->prefetch_maximum;
+		else if (node->prefetch_target > 0)
+			node->prefetch_target *= 2;
+		else
+			node->prefetch_target++;
+		return;
+	}
+
+	/* Do an unlocked check first to save spinlock acquisitions. */
+	if (pstate->prefetch_target < node->prefetch_maximum)
+	{
+		SpinLockAcquire(&pstate->mutex);
+		if (pstate->prefetch_target >= node->prefetch_maximum)
+			 /* don't increase any further */ ;
+		else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
+			pstate->prefetch_target = node->prefetch_maximum;
+		else if (pstate->prefetch_target > 0)
+			pstate->prefetch_target *= 2;
+		else
+			pstate->prefetch_target++;
+		SpinLockRelease(&pstate->mutex);
+	}
+#endif							/* USE_PREFETCH */
+}
+
+/*
+ * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
+ */
+void
+BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
+{
+#ifdef USE_PREFETCH
+	ParallelBitmapHeapState *pstate = node->pstate;
+	UnifiedTBMIterator *prefetch_iterator = &node->prefetch_iterator;
+
+	if (pstate == NULL)
+	{
+		if (!prefetch_iterator->exhausted)
+		{
+			while (node->prefetch_pages < node->prefetch_target)
+			{
+				TBMIterateResult *tbmpre = unified_tbm_iterate(prefetch_iterator);
+				bool		skip_fetch;
+
+				if (tbmpre == NULL)
+				{
+					/* No more pages to prefetch */
+					unified_tbm_end_iterate(prefetch_iterator);
+					break;
+				}
+				node->prefetch_pages++;
+				node->pfblockno = tbmpre->blockno;
+
+				/*
+				 * If we expect not to have to actually read this heap page,
+				 * skip this prefetch call, but continue to run the prefetch
+				 * logic normally.  (Would it be better not to increment
+				 * prefetch_pages?)
+				 */
+				skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
+							  !tbmpre->recheck &&
+							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
+											 tbmpre->blockno,
+											 &node->pvmbuffer));
+
+				if (!skip_fetch)
+					PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
+			}
+		}
+
+		return;
+	}
+
+	if (pstate->prefetch_pages < pstate->prefetch_target)
+	{
+		if (!prefetch_iterator->exhausted)
+		{
+			while (1)
+			{
+				TBMIterateResult *tbmpre;
+				bool		do_prefetch = false;
+				bool		skip_fetch;
+
+				/*
+				 * Recheck under the mutex. If some other process has already
+				 * done enough prefetching then we need not to do anything.
+				 */
+				SpinLockAcquire(&pstate->mutex);
+				if (pstate->prefetch_pages < pstate->prefetch_target)
+				{
+					pstate->prefetch_pages++;
+					do_prefetch = true;
+				}
+				SpinLockRelease(&pstate->mutex);
+
+				if (!do_prefetch)
+					return;
+
+				tbmpre = unified_tbm_iterate(prefetch_iterator);
+				if (tbmpre == NULL)
+				{
+					/* No more pages to prefetch */
+					unified_tbm_end_iterate(prefetch_iterator);
+					break;
+				}
+
+				node->pfblockno = tbmpre->blockno;
+
+				/* As above, skip prefetch if we expect not to need page */
+				skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
+							  !tbmpre->recheck &&
+							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
+											 tbmpre->blockno,
+											 &node->pvmbuffer));
+
+				if (!skip_fetch)
+					PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
+			}
+		}
+	}
+#endif							/* USE_PREFETCH */
+}
+
+
 
 /* ------------------------------------------------------------------------
  * Executor related callbacks for the heap AM
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 012cba2c125..fda5645e4ae 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -40,6 +40,7 @@
 #include "access/relscan.h"
 #include "access/tableam.h"
 #include "access/visibilitymap.h"
+#include "access/heapam.h"
 #include "executor/executor.h"
 #include "executor/nodeBitmapHeapscan.h"
 #include "miscadmin.h"
@@ -51,10 +52,6 @@
 
 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
 static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
-static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node);
-static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
-static inline void BitmapPrefetch(BitmapHeapScanState *node,
-								  TableScanDesc scan);
 static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
 
 
@@ -281,219 +278,6 @@ BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
 	ConditionVariableBroadcast(&pstate->cv);
 }
 
-/*
- *	BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
- *
- *	We keep track of how far the prefetch iterator is ahead of the main
- *	iterator in prefetch_pages. For each block the main iterator returns, we
- *	decrement prefetch_pages.
- */
-static inline void
-BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
-{
-#ifdef USE_PREFETCH
-	ParallelBitmapHeapState *pstate = node->pstate;
-	UnifiedTBMIterator *prefetch_iterator = &node->prefetch_iterator;
-	TBMIterateResult *tbmpre;
-
-	if (pstate == NULL)
-	{
-		if (node->prefetch_pages > 0)
-		{
-			/* The main iterator has closed the distance by one page */
-			node->prefetch_pages--;
-		}
-		else if (!prefetch_iterator->exhausted)
-		{
-			/* Do not let the prefetch iterator get behind the main one */
-			tbmpre = unified_tbm_iterate(prefetch_iterator);
-			node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber;
-		}
-		return;
-	}
-
-	/*
-	 * XXX: There is a known issue with keeping the prefetch and current block
-	 * iterators in sync for parallel bitmapheapscans. This can lead to
-	 * prefetching blocks that have already been read. See the discussion
-	 * here:
-	 * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de
-	 * Note that moving the call site of BitmapAdjustPrefetchIterator()
-	 * exacerbates the effects of this bug.
-	 */
-	if (node->prefetch_maximum > 0)
-	{
-		SpinLockAcquire(&pstate->mutex);
-		if (pstate->prefetch_pages > 0)
-		{
-			pstate->prefetch_pages--;
-			SpinLockRelease(&pstate->mutex);
-		}
-		else
-		{
-			/* Release the mutex before iterating */
-			SpinLockRelease(&pstate->mutex);
-
-			/*
-			 * In case of shared mode, we can not ensure that the current
-			 * blockno of the main iterator and that of the prefetch iterator
-			 * are same.  It's possible that whatever blockno we are
-			 * prefetching will be processed by another process.  Therefore,
-			 * we don't validate the blockno here as we do in non-parallel
-			 * case.
-			 */
-			if (!prefetch_iterator->exhausted)
-			{
-				tbmpre = unified_tbm_iterate(prefetch_iterator);
-				node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber;
-			}
-		}
-	}
-#endif							/* USE_PREFETCH */
-}
-
-/*
- * BitmapAdjustPrefetchTarget - Adjust the prefetch target
- *
- * Increase prefetch target if it's not yet at the max.  Note that
- * we will increase it to zero after fetching the very first
- * page/tuple, then to one after the second tuple is fetched, then
- * it doubles as later pages are fetched.
- */
-static inline void
-BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
-{
-#ifdef USE_PREFETCH
-	ParallelBitmapHeapState *pstate = node->pstate;
-
-	if (pstate == NULL)
-	{
-		if (node->prefetch_target >= node->prefetch_maximum)
-			 /* don't increase any further */ ;
-		else if (node->prefetch_target >= node->prefetch_maximum / 2)
-			node->prefetch_target = node->prefetch_maximum;
-		else if (node->prefetch_target > 0)
-			node->prefetch_target *= 2;
-		else
-			node->prefetch_target++;
-		return;
-	}
-
-	/* Do an unlocked check first to save spinlock acquisitions. */
-	if (pstate->prefetch_target < node->prefetch_maximum)
-	{
-		SpinLockAcquire(&pstate->mutex);
-		if (pstate->prefetch_target >= node->prefetch_maximum)
-			 /* don't increase any further */ ;
-		else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
-			pstate->prefetch_target = node->prefetch_maximum;
-		else if (pstate->prefetch_target > 0)
-			pstate->prefetch_target *= 2;
-		else
-			pstate->prefetch_target++;
-		SpinLockRelease(&pstate->mutex);
-	}
-#endif							/* USE_PREFETCH */
-}
-
-/*
- * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
- */
-static inline void
-BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
-{
-#ifdef USE_PREFETCH
-	ParallelBitmapHeapState *pstate = node->pstate;
-	UnifiedTBMIterator *prefetch_iterator = &node->prefetch_iterator;
-
-	if (pstate == NULL)
-	{
-		if (!prefetch_iterator->exhausted)
-		{
-			while (node->prefetch_pages < node->prefetch_target)
-			{
-				TBMIterateResult *tbmpre = unified_tbm_iterate(prefetch_iterator);
-				bool		skip_fetch;
-
-				if (tbmpre == NULL)
-				{
-					/* No more pages to prefetch */
-					unified_tbm_end_iterate(prefetch_iterator);
-					break;
-				}
-				node->prefetch_pages++;
-				node->pfblockno = tbmpre->blockno;
-
-				/*
-				 * If we expect not to have to actually read this heap page,
-				 * skip this prefetch call, but continue to run the prefetch
-				 * logic normally.  (Would it be better not to increment
-				 * prefetch_pages?)
-				 */
-				skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
-							  !tbmpre->recheck &&
-							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-											 tbmpre->blockno,
-											 &node->pvmbuffer));
-
-				if (!skip_fetch)
-					PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
-			}
-		}
-
-		return;
-	}
-
-	if (pstate->prefetch_pages < pstate->prefetch_target)
-	{
-		if (!prefetch_iterator->exhausted)
-		{
-			while (1)
-			{
-				TBMIterateResult *tbmpre;
-				bool		do_prefetch = false;
-				bool		skip_fetch;
-
-				/*
-				 * Recheck under the mutex. If some other process has already
-				 * done enough prefetching then we need not to do anything.
-				 */
-				SpinLockAcquire(&pstate->mutex);
-				if (pstate->prefetch_pages < pstate->prefetch_target)
-				{
-					pstate->prefetch_pages++;
-					do_prefetch = true;
-				}
-				SpinLockRelease(&pstate->mutex);
-
-				if (!do_prefetch)
-					return;
-
-				tbmpre = unified_tbm_iterate(prefetch_iterator);
-				if (tbmpre == NULL)
-				{
-					/* No more pages to prefetch */
-					unified_tbm_end_iterate(prefetch_iterator);
-					break;
-				}
-
-				node->pfblockno = tbmpre->blockno;
-
-				/* As above, skip prefetch if we expect not to need page */
-				skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
-							  !tbmpre->recheck &&
-							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-											 tbmpre->blockno,
-											 &node->pvmbuffer));
-
-				if (!skip_fetch)
-					PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
-			}
-		}
-	}
-#endif							/* USE_PREFETCH */
-}
-
 /*
  * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
  */
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 853a042c418..f0cd0041460 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -422,6 +422,9 @@ extern bool heapam_scan_analyze_next_tuple(TableScanDesc scan,
 										   TransactionId OldestXmin,
 										   double *liverows, double *deadrows,
 										   TupleTableSlot *slot);
+extern void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node);
+extern void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
+extern void BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan);
 
 /*
  * To avoid leaking too much knowledge about reorderbuffer implementation
-- 
2.40.1

