From 2c200ceb43899301fd0a6ad079aa9d4d48c24afb Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Mon, 7 Jul 2025 17:30:14 -0400
Subject: [PATCH v3 12/13] Allow on-access pruning to set pages all-visible

Many queries do not modify the underlying relation. For such queries, if
on-access pruning occurs during the scan, we can check whether the page
has become all-visible and update the visibility map accordingly.
Previously, only vacuum marked pages as all-visible or all-frozen.

Supporting this requires passing information about whether the relation
is modified from the executor down to the scan descriptor.

This commit implements on-access VM setting for sequential scans as well
as for the underlying heap relation in index scans and bitmap heap
scans.
---
 src/backend/access/heap/heapam.c          | 15 +++++-
 src/backend/access/heap/heapam_handler.c  | 17 ++++++-
 src/backend/access/heap/pruneheap.c       | 59 +++++++++++++++++------
 src/backend/access/index/indexam.c        | 46 ++++++++++++++++++
 src/backend/access/table/tableam.c        | 39 +++++++++++++--
 src/backend/executor/execMain.c           |  4 ++
 src/backend/executor/execUtils.c          |  2 +
 src/backend/executor/nodeBitmapHeapscan.c |  6 ++-
 src/backend/executor/nodeIndexscan.c      | 17 ++++---
 src/backend/executor/nodeSeqscan.c        | 17 +++++--
 src/backend/storage/ipc/procarray.c       | 12 +++++
 src/include/access/genam.h                | 11 +++++
 src/include/access/heapam.h               | 24 +++++++--
 src/include/access/relscan.h              |  6 +++
 src/include/access/tableam.h              | 30 +++++++++++-
 src/include/nodes/execnodes.h             | 17 +++++++
 src/include/utils/snapmgr.h               |  1 +
 17 files changed, 285 insertions(+), 38 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 48f7b84156a..50b0d169d54 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -560,6 +560,7 @@ heap_prepare_pagescan(TableScanDesc sscan)
 	int			lines;
 	bool		all_visible;
 	bool		check_serializable;
+	bool		allow_vmset;
 
 	Assert(BufferGetBlockNumber(buffer) == block);
 
@@ -570,7 +571,9 @@ heap_prepare_pagescan(TableScanDesc sscan)
 	/*
 	 * Prune and repair fragmentation for the whole page, if possible.
 	 */
-	heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
+	allow_vmset = sscan->rs_flags & SO_ALLOW_VM_SET;
+	heap_page_prune_opt(scan->rs_base.rs_rd, buffer,
+						allow_vmset ? &scan->rs_vmbuffer : NULL, allow_vmset);
 
 	/*
 	 * We must hold share lock on the buffer content while examining tuple
@@ -1236,6 +1239,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
 														  sizeof(TBMIterateResult));
 	}
 
+	scan->rs_vmbuffer = InvalidBuffer;
 
 	return (TableScanDesc) scan;
 }
@@ -1274,6 +1278,12 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
 		scan->rs_cbuf = InvalidBuffer;
 	}
 
+	if (BufferIsValid(scan->rs_vmbuffer))
+	{
+		ReleaseBuffer(scan->rs_vmbuffer);
+		scan->rs_vmbuffer = InvalidBuffer;
+	}
+
 	/*
 	 * SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any
 	 * additional data vs a normal HeapScan
@@ -1306,6 +1316,9 @@ heap_endscan(TableScanDesc sscan)
 	if (BufferIsValid(scan->rs_cbuf))
 		ReleaseBuffer(scan->rs_cbuf);
 
+	if (BufferIsValid(scan->rs_vmbuffer))
+		ReleaseBuffer(scan->rs_vmbuffer);
+
 	/*
 	 * Must free the read stream before freeing the BufferAccessStrategy.
 	 */
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index cb4bc35c93e..fb450c5a84f 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -85,6 +85,7 @@ heapam_index_fetch_begin(Relation rel)
 
 	hscan->xs_base.rel = rel;
 	hscan->xs_cbuf = InvalidBuffer;
+	hscan->xs_vmbuffer = InvalidBuffer;
 
 	return &hscan->xs_base;
 }
@@ -99,6 +100,12 @@ heapam_index_fetch_reset(IndexFetchTableData *scan)
 		ReleaseBuffer(hscan->xs_cbuf);
 		hscan->xs_cbuf = InvalidBuffer;
 	}
+
+	if (BufferIsValid(hscan->xs_vmbuffer))
+	{
+		ReleaseBuffer(hscan->xs_vmbuffer);
+		hscan->xs_vmbuffer = InvalidBuffer;
+	}
 }
 
 static void
@@ -138,7 +145,9 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
 		 * Prune page, but only if we weren't already on this page
 		 */
 		if (prev_buf != hscan->xs_cbuf)
-			heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
+			heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf,
+								scan->modifies_base_rel ? NULL : &hscan->xs_vmbuffer,
+								!scan->modifies_base_rel);
 	}
 
 	/* Obtain share-lock on the buffer so we can examine visibility */
@@ -2471,6 +2480,7 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
 	TBMIterateResult *tbmres;
 	OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE];
 	int			noffsets = -1;
+	bool		allow_vmset = false;
 
 	Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN);
 	Assert(hscan->rs_read_stream);
@@ -2517,7 +2527,10 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
 	/*
 	 * Prune and repair fragmentation for the whole page, if possible.
 	 */
-	heap_page_prune_opt(scan->rs_rd, buffer);
+	allow_vmset = scan->rs_flags & SO_ALLOW_VM_SET;
+	heap_page_prune_opt(scan->rs_rd, buffer,
+						allow_vmset ? &hscan->rs_vmbuffer : NULL,
+						allow_vmset);
 
 	/*
 	 * We must hold share lock on the buffer content while examining tuple
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index f6509695e3a..af23008ddf7 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -158,6 +158,7 @@ typedef struct
 	bool		all_visible;
 	bool		all_frozen;
 	TransactionId visibility_cutoff_xid;
+	TransactionId oldest_xmin;
 } PruneState;
 
 /* Local functions */
@@ -203,9 +204,13 @@ static bool identify_and_fix_vm_corruption(Relation relation,
  * if there's not any use in pruning.
  *
  * Caller must have pin on the buffer, and must *not* have a lock on it.
+ *
+ * If allow_vmset is true, it is okay for pruning to set the visibility map if
+ * the page is all visible.
  */
 void
-heap_page_prune_opt(Relation relation, Buffer buffer)
+heap_page_prune_opt(Relation relation, Buffer buffer,
+					Buffer *vmbuffer, bool allow_vmset)
 {
 	Page		page = BufferGetPage(buffer);
 	TransactionId prune_xid;
@@ -260,6 +265,9 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
 		if (!ConditionalLockBufferForCleanup(buffer))
 			return;
 
+		/* Caller should not pass a vmbuffer if allow_vmset is false. */
+		Assert(allow_vmset || vmbuffer == NULL);
+
 		/*
 		 * Now that we have buffer lock, get accurate information about the
 		 * page's free space, and recheck the heuristic about whether to
@@ -269,6 +277,13 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
 		{
 			OffsetNumber dummy_off_loc;
 			PruneFreezeResult presult;
+			int			options = 0;
+
+			if (allow_vmset)
+			{
+				visibilitymap_pin(relation, BufferGetBlockNumber(buffer), vmbuffer);
+				options = HEAP_PAGE_PRUNE_UPDATE_VM;
+			}
 
 			/*
 			 * For now, pass mark_unused_now as false regardless of whether or
@@ -276,8 +291,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
 			 * that during on-access pruning with the current implementation.
 			 */
 			heap_page_prune_and_freeze(relation, buffer, false,
-									   InvalidBuffer,
-									   vistest, 0,
+									   vmbuffer ? *vmbuffer : InvalidBuffer,
+									   vistest, options,
 									   NULL, &presult, PRUNE_ON_ACCESS, &dummy_off_loc, NULL, NULL);
 
 			/*
@@ -467,6 +482,10 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 	prstate.mark_unused_now = (options & HEAP_PAGE_PRUNE_MARK_UNUSED_NOW) != 0;
 	prstate.freeze = (options & HEAP_PAGE_PRUNE_FREEZE) != 0;
 	prstate.update_vm = (options & HEAP_PAGE_PRUNE_UPDATE_VM) != 0;
+	if (cutoffs)
+		prstate.oldest_xmin = cutoffs->OldestXmin;
+	else
+		prstate.oldest_xmin = OldestXminFromGlobalVisState(vistest);
 	prstate.cutoffs = cutoffs;
 
 	/*
@@ -877,6 +896,20 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 	 */
 	if (prstate.update_vm)
 	{
+		/*
+		 * If this is on-access and we aren't actually pruning, don't set the
+		 * VM if doing so would newly dirty the heap page or, if the page is
+		 * already dirty, if the WAL record emitted would have to contain an
+		 * FPI of the heap page. This should rarely happen, as we only attempt
+		 * on-access pruning when pd_prune_xid is valid.
+		 */
+		if (reason == PRUNE_ON_ACCESS &&
+			!do_prune && !do_freeze &&
+			(!BufferIsDirty(buffer) || XLogCheckBufferNeedsBackup(buffer)))
+		{
+			/* Don't update the VM */
+		}
+
 		/*
 		 * Clear any VM corruption. This does not need to be in a critical
 		 * section, so we do it first. If PD_ALL_VISIBLE is incorrectly set,
@@ -885,9 +918,9 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 		 * of VM corruption, so we don't have to worry about the extra
 		 * performance overhead.
 		 */
-		if (identify_and_fix_vm_corruption(relation,
-										   blockno, buffer, page,
-										   blk_known_av, prstate.lpdead_items, vmbuffer))
+		else if (identify_and_fix_vm_corruption(relation,
+												blockno, buffer, page,
+												blk_known_av, prstate.lpdead_items, vmbuffer))
 		{
 			/* If we fix corruption, don't update the VM further */
 		}
@@ -1013,7 +1046,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 			 */
 			else if (do_freeze)
 			{
-				conflict_xid = prstate.cutoffs->OldestXmin;
+				conflict_xid = prstate.oldest_xmin;
 				TransactionIdRetreat(conflict_xid);
 			}
 
@@ -1071,12 +1104,10 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 		TransactionId debug_cutoff;
 		bool		debug_all_frozen;
 
-		Assert(cutoffs);
-
 		Assert(prstate.lpdead_items == 0);
 
 		if (!heap_page_is_all_visible(relation, buffer,
-									  cutoffs->OldestXmin,
+									  prstate.oldest_xmin,
 									  &debug_all_frozen,
 									  &debug_cutoff, off_loc))
 			Assert(false);
@@ -1136,9 +1167,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
 	 * vacuuming the relation. OldestXmin is used for freezing determination
 	 * and we cannot freeze dead tuples' xmaxes.
 	 */
-	if (prstate->cutoffs &&
-		TransactionIdIsValid(prstate->cutoffs->OldestXmin) &&
-		NormalTransactionIdPrecedes(dead_after, prstate->cutoffs->OldestXmin))
+	if (TransactionIdIsValid(prstate->oldest_xmin) &&
+		NormalTransactionIdPrecedes(dead_after, prstate->oldest_xmin))
 		return HEAPTUPLE_DEAD;
 
 	/*
@@ -1607,8 +1637,7 @@ heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumb
 				 * could use GlobalVisTestIsRemovableXid instead, if a
 				 * non-freezing caller wanted to set the VM bit.
 				 */
-				Assert(prstate->cutoffs);
-				if (!TransactionIdPrecedes(xmin, prstate->cutoffs->OldestXmin))
+				if (!TransactionIdPrecedes(xmin, prstate->oldest_xmin))
 				{
 					prstate->all_visible = false;
 					break;
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 219df1971da..d803c307517 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -279,6 +279,32 @@ index_beginscan(Relation heapRelation,
 	return scan;
 }
 
+/*
+ * Similar to index_beginscan(), but allows the caller to indicate whether the
+ * query modifies the underlying base relation. This is used when the caller
+ * wants to attempt marking pages in the base relation as all-visible in the
+ * visibility map during on-access pruning.
+ */
+IndexScanDesc
+index_beginscan_vmset(Relation heapRelation,
+					  Relation indexRelation,
+					  Snapshot snapshot,
+					  IndexScanInstrumentation *instrument,
+					  int nkeys, int norderbys, bool modifies_base_rel)
+{
+	IndexScanDesc scan;
+
+	scan = index_beginscan(heapRelation,
+						   indexRelation,
+						   snapshot,
+						   instrument,
+						   nkeys, norderbys);
+
+	scan->xs_heapfetch->modifies_base_rel = modifies_base_rel;
+
+	return scan;
+}
+
 /*
  * index_beginscan_bitmap - start a scan of an index with amgetbitmap
  *
@@ -610,6 +636,26 @@ index_beginscan_parallel(Relation heaprel, Relation indexrel,
 	return scan;
 }
 
+/*
+ * Parallel version of index_beginscan_vmset()
+ */
+IndexScanDesc
+index_beginscan_parallel_vmset(Relation heaprel, Relation indexrel,
+							   IndexScanInstrumentation *instrument,
+							   int nkeys, int norderbys,
+							   ParallelIndexScanDesc pscan,
+							   bool modifies_base_rel)
+{
+	IndexScanDesc scan;
+
+	scan = index_beginscan_parallel(heaprel, indexrel,
+									instrument,
+									nkeys, norderbys,
+									pscan);
+	scan->xs_heapfetch->modifies_base_rel = modifies_base_rel;
+	return scan;
+}
+
 /* ----------------
  * index_getnext_tid - get the next TID from a scan
  *
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index a56c5eceb14..67dbf99f5b5 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -49,6 +49,10 @@
 char	   *default_table_access_method = DEFAULT_TABLE_ACCESS_METHOD;
 bool		synchronize_seqscans = true;
 
+/* Helper for table_beginscan_parallel() and table_beginscan_parallel_vmset() */
+static TableScanDesc table_beginscan_parallel_common(Relation relation, ParallelTableScanDesc pscan,
+													 uint32 flags);
+
 
 /* ----------------------------------------------------------------------------
  * Slot functions.
@@ -162,12 +166,14 @@ table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan,
 	}
 }
 
-TableScanDesc
-table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
+/*
+ * Common helper for table_beginscan_parallel() and table_beginscan_parallel_vmset()
+ */
+static TableScanDesc
+table_beginscan_parallel_common(Relation relation, ParallelTableScanDesc pscan,
+								uint32 flags)
 {
 	Snapshot	snapshot;
-	uint32		flags = SO_TYPE_SEQSCAN |
-		SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
 
 	Assert(RelFileLocatorEquals(relation->rd_locator, pscan->phs_locator));
 
@@ -188,6 +194,31 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
 											pscan, flags);
 }
 
+TableScanDesc
+table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
+{
+	uint32		flags = SO_TYPE_SEQSCAN |
+		SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
+
+	return table_beginscan_parallel_common(relation, pscan, flags);
+}
+
+/*
+ * Parallel version of table_beginscan_vmset()
+ */
+TableScanDesc
+table_beginscan_parallel_vmset(Relation relation, ParallelTableScanDesc pscan,
+							   bool modifies_rel)
+{
+	uint32		flags = SO_TYPE_SEQSCAN |
+		SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
+
+	if (!modifies_rel)
+		flags |= SO_ALLOW_VM_SET;
+
+	return table_beginscan_parallel_common(relation, pscan, flags);
+}
+
 
 /* ----------------------------------------------------------------------------
  * Index scan related functions.
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 0391798dd2c..065676eb7cf 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -917,6 +917,10 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 					break;
 			}
 
+			/* If it has a rowmark, the relation is modified */
+			estate->es_modified_relids = bms_add_member(estate->es_modified_relids,
+														rc->rti);
+
 			/* Check that relation is a legal target for marking */
 			if (relation)
 				CheckValidRowMarkRel(relation, rc->markType);
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index fdc65c2b42b..28a06dcd244 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -893,6 +893,8 @@ ExecInitResultRelation(EState *estate, ResultRelInfo *resultRelInfo,
 		estate->es_result_relations = (ResultRelInfo **)
 			palloc0(estate->es_range_table_size * sizeof(ResultRelInfo *));
 	estate->es_result_relations[rti - 1] = resultRelInfo;
+	estate->es_modified_relids = bms_add_member(estate->es_modified_relids,
+												rti);
 
 	/*
 	 * Saving in the list allows to avoid needlessly traversing the whole
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index bf24f3d7fe0..2c57bc7ac49 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -109,7 +109,8 @@ BitmapTableScanSetup(BitmapHeapScanState *node)
 			table_beginscan_bm(node->ss.ss_currentRelation,
 							   node->ss.ps.state->es_snapshot,
 							   0,
-							   NULL);
+							   NULL,
+							   node->modifies_rel);
 	}
 
 	node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator;
@@ -360,6 +361,9 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->initialized = false;
 	scanstate->pstate = NULL;
 	scanstate->recheck = true;
+	scanstate->modifies_rel =
+		bms_is_member(node->scan.scanrelid,
+					  estate->es_modified_relids);
 
 	/*
 	 * Miscellaneous initialization
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index 7fcaa37fe62..f91c6b17620 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -106,12 +106,13 @@ IndexNext(IndexScanState *node)
 		 * We reach here if the index scan is not parallel, or if we're
 		 * serially executing an index scan that was planned to be parallel.
 		 */
-		scandesc = index_beginscan(node->ss.ss_currentRelation,
-								   node->iss_RelationDesc,
-								   estate->es_snapshot,
-								   &node->iss_Instrument,
-								   node->iss_NumScanKeys,
-								   node->iss_NumOrderByKeys);
+		scandesc = index_beginscan_vmset(node->ss.ss_currentRelation,
+										 node->iss_RelationDesc,
+										 estate->es_snapshot,
+										 &node->iss_Instrument,
+										 node->iss_NumScanKeys,
+										 node->iss_NumOrderByKeys,
+										 node->iss_ModifiesBaseRel);
 
 		node->iss_ScanDesc = scandesc;
 
@@ -935,6 +936,10 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 	indexstate->ss.ss_currentRelation = currentRelation;
 	indexstate->ss.ss_currentScanDesc = NULL;	/* no heap scan here */
 
+	indexstate->iss_ModifiesBaseRel =
+		bms_is_member(node->scan.scanrelid,
+					  estate->es_modified_relids);
+
 	/*
 	 * get the scan type from the relation descriptor.
 	 */
diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c
index ed35c58c2c3..cded7f15703 100644
--- a/src/backend/executor/nodeSeqscan.c
+++ b/src/backend/executor/nodeSeqscan.c
@@ -69,9 +69,9 @@ SeqNext(SeqScanState *node)
 		 * We reach here if the scan is not parallel, or if we're serially
 		 * executing a scan that was planned to be parallel.
 		 */
-		scandesc = table_beginscan(node->ss.ss_currentRelation,
-								   estate->es_snapshot,
-								   0, NULL);
+		scandesc = table_beginscan_vmset(node->ss.ss_currentRelation,
+										 estate->es_snapshot,
+										 0, NULL, node->modifies_rel);
 		node->ss.ss_currentScanDesc = scandesc;
 	}
 
@@ -237,6 +237,10 @@ ExecInitSeqScan(SeqScan *node, EState *estate, int eflags)
 							 node->scan.scanrelid,
 							 eflags);
 
+	scanstate->modifies_rel =
+		bms_is_member(node->scan.scanrelid,
+					  estate->es_modified_relids);
+
 	/* and create slot with the appropriate rowtype */
 	ExecInitScanTupleSlot(estate, &scanstate->ss,
 						  RelationGetDescr(scanstate->ss.ss_currentRelation),
@@ -370,7 +374,8 @@ ExecSeqScanInitializeDSM(SeqScanState *node,
 								  estate->es_snapshot);
 	shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
 	node->ss.ss_currentScanDesc =
-		table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
+		table_beginscan_parallel_vmset(node->ss.ss_currentRelation, pscan,
+									   node->modifies_rel);
 }
 
 /* ----------------------------------------------------------------
@@ -403,5 +408,7 @@ ExecSeqScanInitializeWorker(SeqScanState *node,
 
 	pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
 	node->ss.ss_currentScanDesc =
-		table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
+		table_beginscan_parallel_vmset(node->ss.ss_currentRelation,
+									   pscan,
+									   node->modifies_rel);
 }
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index e5b945a9ee3..01d2bda3f72 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -4133,6 +4133,18 @@ GlobalVisTestFor(Relation rel)
 	return state;
 }
 
+/*
+ * Returns maybe_needed as a 32-bit TransactionId. Can be used in callers that
+ * need to compare transaction IDs to a single value and are okay with using
+ * the more conservative boundary.
+ */
+TransactionId
+OldestXminFromGlobalVisState(GlobalVisState *state)
+{
+	return XidFromFullTransactionId(state->maybe_needed);
+}
+
+
 /*
  * Return true if it's worth updating the accurate maybe_needed boundary.
  *
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index 5b2ab181b5f..bf272c2c37f 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -180,6 +180,11 @@ extern IndexScanDesc index_beginscan(Relation heapRelation,
 									 Snapshot snapshot,
 									 IndexScanInstrumentation *instrument,
 									 int nkeys, int norderbys);
+extern IndexScanDesc index_beginscan_vmset(Relation heapRelation,
+										   Relation indexRelation,
+										   Snapshot snapshot,
+										   IndexScanInstrumentation *instrument,
+										   int nkeys, int norderbys, bool modifies_heap_rel);
 extern IndexScanDesc index_beginscan_bitmap(Relation indexRelation,
 											Snapshot snapshot,
 											IndexScanInstrumentation *instrument,
@@ -206,6 +211,12 @@ extern IndexScanDesc index_beginscan_parallel(Relation heaprel,
 											  IndexScanInstrumentation *instrument,
 											  int nkeys, int norderbys,
 											  ParallelIndexScanDesc pscan);
+
+extern IndexScanDesc index_beginscan_parallel_vmset(Relation heaprel, Relation indexrel,
+													IndexScanInstrumentation *instrument,
+													int nkeys, int norderbys,
+													ParallelIndexScanDesc pscan,
+													bool modifies_rel);
 extern ItemPointer index_getnext_tid(IndexScanDesc scan,
 									 ScanDirection direction);
 struct TupleTableSlot;
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 0b9bb1c9b13..46ea8b8455c 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -94,6 +94,13 @@ typedef struct HeapScanDescData
 	 */
 	ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
 
+	/*
+	 * For sequential scans and bitmap heap scans. If the relation is not
+	 * being modified, on-access pruning may read in the current heap page's
+	 * corresponding VM block to this buffer.
+	 */
+	Buffer		rs_vmbuffer;
+
 	/* these fields only used in page-at-a-time mode and for bitmap scans */
 	uint32		rs_cindex;		/* current tuple's index in vistuples */
 	uint32		rs_ntuples;		/* number of visible tuples on page */
@@ -116,8 +123,18 @@ typedef struct IndexFetchHeapData
 {
 	IndexFetchTableData xs_base;	/* AM independent part of the descriptor */
 
-	Buffer		xs_cbuf;		/* current heap buffer in scan, if any */
-	/* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
+	/*
+	 * Current heap buffer in scan, if any. NB: if xs_cbuf is not
+	 * InvalidBuffer, we hold a pin on that buffer.
+	 */
+	Buffer		xs_cbuf;
+
+	/*
+	 * For index scans that do not modify the underlying heap table, on-access
+	 * pruning may read in the current heap page's corresponding VM block to
+	 * this buffer.
+	 */
+	Buffer		xs_vmbuffer;
 } IndexFetchHeapData;
 
 /* Result codes for HeapTupleSatisfiesVacuum */
@@ -374,7 +391,8 @@ extern TransactionId heap_index_delete_tuples(Relation rel,
 
 /* in heap/pruneheap.c */
 struct GlobalVisState;
-extern void heap_page_prune_opt(Relation relation, Buffer buffer);
+extern void heap_page_prune_opt(Relation relation, Buffer buffer,
+								Buffer *vmbuffer, bool allow_vmset);
 extern void heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 									   bool blk_known_av,
 									   Buffer vmbuffer,
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index b5e0fb386c0..f496e0b4939 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -121,6 +121,12 @@ typedef struct ParallelBlockTableScanWorkerData *ParallelBlockTableScanWorker;
 typedef struct IndexFetchTableData
 {
 	Relation	rel;
+
+	/*
+	 * Some optimizations can only be performed if the query does not modify
+	 * the underlying relation. Track that here.
+	 */
+	bool		modifies_base_rel;
 } IndexFetchTableData;
 
 struct IndexScanInstrumentation;
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 1c9e802a6b1..0e986d8ef72 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -62,6 +62,8 @@ typedef enum ScanOptions
 
 	/* unregister snapshot at scan end? */
 	SO_TEMP_SNAPSHOT = 1 << 9,
+	/* whether or not scan should attempt to set the VM */
+	SO_ALLOW_VM_SET = 1 << 10,
 }			ScanOptions;
 
 /*
@@ -876,6 +878,25 @@ table_beginscan(Relation rel, Snapshot snapshot,
 	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
 }
 
+/*
+ * Similar to table_beginscan(), but allows the caller to indicate whether the
+ * query modifies the relation. This is used when the caller wants to attempt
+ * marking pages in the relation as all-visible in the visibility map during
+ * on-access pruning.
+ */
+static inline TableScanDesc
+table_beginscan_vmset(Relation rel, Snapshot snapshot,
+					  int nkeys, struct ScanKeyData *key, bool modifies_rel)
+{
+	uint32		flags = SO_TYPE_SEQSCAN |
+		SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
+
+	if (!modifies_rel)
+		flags |= SO_ALLOW_VM_SET;
+
+	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+}
+
 /*
  * Like table_beginscan(), but for scanning catalog. It'll automatically use a
  * snapshot appropriate for scanning catalog relations.
@@ -913,10 +934,13 @@ table_beginscan_strat(Relation rel, Snapshot snapshot,
  */
 static inline TableScanDesc
 table_beginscan_bm(Relation rel, Snapshot snapshot,
-				   int nkeys, struct ScanKeyData *key)
+				   int nkeys, struct ScanKeyData *key, bool modifies_rel)
 {
 	uint32		flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
 
+	if (!modifies_rel)
+		flags |= SO_ALLOW_VM_SET;
+
 	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key,
 									   NULL, flags);
 }
@@ -1125,6 +1149,10 @@ extern void table_parallelscan_initialize(Relation rel,
 extern TableScanDesc table_beginscan_parallel(Relation relation,
 											  ParallelTableScanDesc pscan);
 
+extern TableScanDesc table_beginscan_parallel_vmset(Relation relation,
+													ParallelTableScanDesc pscan,
+													bool modifies_rel);
+
 /*
  * Restart a parallel scan.  Call this in the leader process.  Caller is
  * responsible for making sure that all workers have finished the scan
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index e107d6e5f81..1d0b374b652 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -680,6 +680,12 @@ typedef struct EState
 									 * ExecDoInitialPruning() */
 	const char *es_sourceText;	/* Source text from QueryDesc */
 
+	/*
+	 * RT indexes of relations modified by the query either through
+	 * UPDATE/DELETE/INSERT/MERGE or SELECT FOR UPDATE
+	 */
+	Bitmapset  *es_modified_relids;
+
 	JunkFilter *es_junkFilter;	/* top-level junk filter, if any */
 
 	/* If query can insert/delete tuples, the command ID to mark them with */
@@ -1631,6 +1637,13 @@ typedef struct SeqScanState
 {
 	ScanState	ss;				/* its first field is NodeTag */
 	Size		pscan_len;		/* size of parallel heap scan descriptor */
+
+	/*
+	 * Whether or not the query modifies the relation scanned by this node.
+	 * This is used to avoid the overhead of optimizations that are only
+	 * effective for tables not modified by the query.
+	 */
+	bool		modifies_rel;
 } SeqScanState;
 
 /* ----------------
@@ -1702,6 +1715,7 @@ typedef struct
  *		OrderByTypByVals   is the datatype of order by expression pass-by-value?
  *		OrderByTypLens	   typlens of the datatypes of order by expressions
  *		PscanLen		   size of parallel index scan descriptor
+ *		ModifiesBaseRel    true if query modifies base relation
  * ----------------
  */
 typedef struct IndexScanState
@@ -1731,6 +1745,7 @@ typedef struct IndexScanState
 	bool	   *iss_OrderByTypByVals;
 	int16	   *iss_OrderByTypLens;
 	Size		iss_PscanLen;
+	bool		iss_ModifiesBaseRel;
 } IndexScanState;
 
 /* ----------------
@@ -1888,6 +1903,7 @@ typedef struct SharedBitmapHeapInstrumentation
  *		pstate			   shared state for parallel bitmap scan
  *		sinstrument		   statistics for parallel workers
  *		recheck			   do current page's tuples need recheck
+ *		modifies_rel	   does the query modify the base relation
  * ----------------
  */
 typedef struct BitmapHeapScanState
@@ -1900,6 +1916,7 @@ typedef struct BitmapHeapScanState
 	ParallelBitmapHeapState *pstate;
 	SharedBitmapHeapInstrumentation *sinstrument;
 	bool		recheck;
+	bool		modifies_rel;
 } BitmapHeapScanState;
 
 /* ----------------
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index d346be71642..fcb10b8d136 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -101,6 +101,7 @@ extern bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid
 extern bool GlobalVisTestIsRemovableFullXid(GlobalVisState *state, FullTransactionId fxid);
 extern bool GlobalVisCheckRemovableXid(Relation rel, TransactionId xid);
 extern bool GlobalVisCheckRemovableFullXid(Relation rel, FullTransactionId fxid);
+extern TransactionId OldestXminFromGlobalVisState(GlobalVisState *state);
 
 /*
  * Utility functions for implementing visibility routines in table AMs.
-- 
2.43.0

