From 17dabfb6dade53ab1a73272edc383ed482989329 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Tue, 29 Jul 2025 14:34:30 -0400
Subject: [PATCH v6 17/20] Allow on-access pruning to set pages all-visible

Many queries do not modify the underlying relation. For such queries, if
on-access pruning occurs during the scan, we can check whether the page
has become all-visible and update the visibility map accordingly.
Previously, only vacuum marked pages as all-visible or all-frozen.

Supporting this requires passing information about whether the relation
is modified from the executor down to the scan descriptor.

This commit implements on-access VM setting for sequential scans as well
as for the underlying heap relation in index scans and bitmap heap
scans.
---
 src/backend/access/heap/heapam.c              | 15 ++++-
 src/backend/access/heap/heapam_handler.c      | 15 ++++-
 src/backend/access/heap/pruneheap.c           | 63 ++++++++++++++-----
 src/backend/access/index/indexam.c            | 46 ++++++++++++++
 src/backend/access/table/tableam.c            | 39 ++++++++++--
 src/backend/executor/execMain.c               |  4 ++
 src/backend/executor/execUtils.c              |  2 +
 src/backend/executor/nodeBitmapHeapscan.c     |  7 ++-
 src/backend/executor/nodeIndexscan.c          | 18 ++++--
 src/backend/executor/nodeSeqscan.c            | 24 +++++--
 src/include/access/genam.h                    | 11 ++++
 src/include/access/heapam.h                   | 24 ++++++-
 src/include/access/relscan.h                  |  6 ++
 src/include/access/tableam.h                  | 30 ++++++++-
 src/include/nodes/execnodes.h                 |  6 ++
 .../t/035_standby_logical_decoding.pl         |  8 ++-
 16 files changed, 278 insertions(+), 40 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 48f7b84156a..f90b014a9b0 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -556,6 +556,7 @@ heap_prepare_pagescan(TableScanDesc sscan)
 	Buffer		buffer = scan->rs_cbuf;
 	BlockNumber block = scan->rs_cblock;
 	Snapshot	snapshot;
+	Buffer	   *vmbuffer = NULL;
 	Page		page;
 	int			lines;
 	bool		all_visible;
@@ -570,7 +571,9 @@ heap_prepare_pagescan(TableScanDesc sscan)
 	/*
 	 * Prune and repair fragmentation for the whole page, if possible.
 	 */
-	heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
+	if (sscan->rs_flags & SO_ALLOW_VM_SET)
+		vmbuffer = &scan->rs_vmbuffer;
+	heap_page_prune_opt(scan->rs_base.rs_rd, buffer, vmbuffer);
 
 	/*
 	 * We must hold share lock on the buffer content while examining tuple
@@ -1236,6 +1239,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
 														  sizeof(TBMIterateResult));
 	}
 
+	scan->rs_vmbuffer = InvalidBuffer;
 
 	return (TableScanDesc) scan;
 }
@@ -1274,6 +1278,12 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
 		scan->rs_cbuf = InvalidBuffer;
 	}
 
+	if (BufferIsValid(scan->rs_vmbuffer))
+	{
+		ReleaseBuffer(scan->rs_vmbuffer);
+		scan->rs_vmbuffer = InvalidBuffer;
+	}
+
 	/*
 	 * SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any
 	 * additional data vs a normal HeapScan
@@ -1306,6 +1316,9 @@ heap_endscan(TableScanDesc sscan)
 	if (BufferIsValid(scan->rs_cbuf))
 		ReleaseBuffer(scan->rs_cbuf);
 
+	if (BufferIsValid(scan->rs_vmbuffer))
+		ReleaseBuffer(scan->rs_vmbuffer);
+
 	/*
 	 * Must free the read stream before freeing the BufferAccessStrategy.
 	 */
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index cb4bc35c93e..c68283de6f2 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -85,6 +85,7 @@ heapam_index_fetch_begin(Relation rel)
 
 	hscan->xs_base.rel = rel;
 	hscan->xs_cbuf = InvalidBuffer;
+	hscan->xs_vmbuffer = InvalidBuffer;
 
 	return &hscan->xs_base;
 }
@@ -99,6 +100,12 @@ heapam_index_fetch_reset(IndexFetchTableData *scan)
 		ReleaseBuffer(hscan->xs_cbuf);
 		hscan->xs_cbuf = InvalidBuffer;
 	}
+
+	if (BufferIsValid(hscan->xs_vmbuffer))
+	{
+		ReleaseBuffer(hscan->xs_vmbuffer);
+		hscan->xs_vmbuffer = InvalidBuffer;
+	}
 }
 
 static void
@@ -138,7 +145,8 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
 		 * Prune page, but only if we weren't already on this page
 		 */
 		if (prev_buf != hscan->xs_cbuf)
-			heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
+			heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf,
+								scan->modifies_base_rel ? NULL : &hscan->xs_vmbuffer);
 	}
 
 	/* Obtain share-lock on the buffer so we can examine visibility */
@@ -2471,6 +2479,7 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
 	TBMIterateResult *tbmres;
 	OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE];
 	int			noffsets = -1;
+	Buffer	   *vmbuffer = NULL;
 
 	Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN);
 	Assert(hscan->rs_read_stream);
@@ -2517,7 +2526,9 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
 	/*
 	 * Prune and repair fragmentation for the whole page, if possible.
 	 */
-	heap_page_prune_opt(scan->rs_rd, buffer);
+	if (scan->rs_flags & SO_ALLOW_VM_SET)
+		vmbuffer = &hscan->rs_vmbuffer;
+	heap_page_prune_opt(scan->rs_rd, buffer, vmbuffer);
 
 	/*
 	 * We must hold share lock on the buffer content while examining tuple
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 80d055e5376..dad341cb265 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -198,9 +198,13 @@ static bool identify_and_fix_vm_corruption(Relation relation,
  * if there's not any use in pruning.
  *
  * Caller must have pin on the buffer, and must *not* have a lock on it.
+ *
+ * If vmbuffer is not NULL, it is okay for pruning to set the visibility map if
+ * the page is all visible. We will take care of pinning and, if needed,
+ * reading in the page of the visibility map.
  */
 void
-heap_page_prune_opt(Relation relation, Buffer buffer)
+heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
 {
 	Page		page = BufferGetPage(buffer);
 	TransactionId prune_xid;
@@ -264,6 +268,13 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
 		{
 			OffsetNumber dummy_off_loc;
 			PruneFreezeResult presult;
+			int			options = 0;
+
+			if (vmbuffer)
+			{
+				visibilitymap_pin(relation, BufferGetBlockNumber(buffer), vmbuffer);
+				options = HEAP_PAGE_PRUNE_UPDATE_VM;
+			}
 
 			/*
 			 * For now, pass mark_unused_now as false regardless of whether or
@@ -271,9 +282,10 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
 			 * that during on-access pruning with the current implementation.
 			 */
 			heap_page_prune_and_freeze(relation, buffer, false,
-									   InvalidBuffer,
-									   vistest, 0,
-									   NULL, &presult, PRUNE_ON_ACCESS, &dummy_off_loc, NULL, NULL);
+									   vmbuffer ? *vmbuffer : InvalidBuffer,
+									   vistest, options,
+									   NULL, &presult, PRUNE_ON_ACCESS,
+									   &dummy_off_loc, NULL, NULL);
 
 			/*
 			 * Report the number of tuples reclaimed to pgstats.  This is
@@ -513,12 +525,17 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 	 * all-frozen for use in opportunistic freezing and to update the VM if
 	 * the caller requests it.
 	 *
-	 * Currently, only VACUUM attempts freezing and setting the VM bits. But
-	 * other callers could do either one. The visibility bookkeeping is
-	 * required for opportunistic freezing (in addition to setting the VM
-	 * bits) because we only consider opportunistically freezing tuples if the
-	 * whole page would become all-frozen or if the whole page will be frozen
-	 * except for dead tuples that will be removed by vacuum.
+	 * Currently, only VACUUM attempts freezing. But other callers could. The
+	 * visibility bookkeeping is required for opportunistic freezing (in
+	 * addition to setting the VM bits) because we only consider
+	 * opportunistically freezing tuples if the whole page would become
+	 * all-frozen or if the whole page will be frozen except for dead tuples
+	 * that will be removed by vacuum. But if consider_update_vm is false,
+	 * we'll not set the VM even if the page is discovered to be all-visible.
+	 *
+	 * If only HEAP_PAGE_PRUNE_UPDATE_VM is passed and not
+	 * HEAP_PAGE_PRUNE_FREEZE, prstate.all_frozen must be initialized to false
+	 * because we will not call heap_prepare_freeze_tuple() on each tuple.
 	 *
 	 * If only updating the VM, we must initialize all_frozen to false, as
 	 * heap_prepare_freeze_tuple() will not be called for each tuple on the
@@ -530,7 +547,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 	 * whether or not to freeze but before deciding whether or not to update
 	 * the VM so that we don't set the VM bit incorrectly.
 	 *
-	 * If not freezing or updating the VM, we otherwise avoid the extra
+	 * If not freezing and not updating the VM, we avoid the extra
 	 * bookkeeping. Initializing all_visible to false allows skipping the work
 	 * to update them in heap_prune_record_unchanged_lp_normal().
 	 */
@@ -879,12 +896,30 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 		prstate.all_frozen = false;
 	}
 
+	/*
+	 * If this is an on-access call and we're not actually pruning, avoid
+	 * setting the visibility map if it would newly dirty the heap page or, if
+	 * the page is already dirty, if doing so would require including a
+	 * full-page image (FPI) of the heap page in the WAL. This situation
+	 * should be rare, as on-access pruning is only attempted when
+	 * pd_prune_xid is valid.
+	 */
+	if (reason == PRUNE_ON_ACCESS &&
+		prstate.consider_update_vm &&
+		prstate.all_visible &&
+		!do_prune && !do_freeze &&
+		(!BufferIsDirty(buffer) || XLogCheckBufferNeedsBackup(buffer)))
+	{
+		prstate.consider_update_vm = false;
+		prstate.all_visible = prstate.all_frozen = false;
+	}
+
 	Assert(!prstate.all_frozen || prstate.all_visible);
 
 	/*
-	 * Handle setting visibility map bit based on information from the VM (as
-	 * of last heap_vac_scan_next_block() call), and from all_visible and
-	 * all_frozen variables.
+	 * Handle setting visibility map bit based on information from the VM (if
+	 * provided, e.g. by vacuum from the last heap_vac_scan_next_block()
+	 * call), and from all_visible and all_frozen variables.
 	 */
 	if (prstate.consider_update_vm)
 	{
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 219df1971da..d803c307517 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -279,6 +279,32 @@ index_beginscan(Relation heapRelation,
 	return scan;
 }
 
+/*
+ * Similar to index_beginscan(), but allows the caller to indicate whether the
+ * query modifies the underlying base relation. This is used when the caller
+ * wants to attempt marking pages in the base relation as all-visible in the
+ * visibility map during on-access pruning.
+ */
+IndexScanDesc
+index_beginscan_vmset(Relation heapRelation,
+					  Relation indexRelation,
+					  Snapshot snapshot,
+					  IndexScanInstrumentation *instrument,
+					  int nkeys, int norderbys, bool modifies_base_rel)
+{
+	IndexScanDesc scan;
+
+	scan = index_beginscan(heapRelation,
+						   indexRelation,
+						   snapshot,
+						   instrument,
+						   nkeys, norderbys);
+
+	scan->xs_heapfetch->modifies_base_rel = modifies_base_rel;
+
+	return scan;
+}
+
 /*
  * index_beginscan_bitmap - start a scan of an index with amgetbitmap
  *
@@ -610,6 +636,26 @@ index_beginscan_parallel(Relation heaprel, Relation indexrel,
 	return scan;
 }
 
+/*
+ * Parallel version of index_beginscan_vmset()
+ */
+IndexScanDesc
+index_beginscan_parallel_vmset(Relation heaprel, Relation indexrel,
+							   IndexScanInstrumentation *instrument,
+							   int nkeys, int norderbys,
+							   ParallelIndexScanDesc pscan,
+							   bool modifies_base_rel)
+{
+	IndexScanDesc scan;
+
+	scan = index_beginscan_parallel(heaprel, indexrel,
+									instrument,
+									nkeys, norderbys,
+									pscan);
+	scan->xs_heapfetch->modifies_base_rel = modifies_base_rel;
+	return scan;
+}
+
 /* ----------------
  * index_getnext_tid - get the next TID from a scan
  *
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index a56c5eceb14..67dbf99f5b5 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -49,6 +49,10 @@
 char	   *default_table_access_method = DEFAULT_TABLE_ACCESS_METHOD;
 bool		synchronize_seqscans = true;
 
+/* Helper for table_beginscan_parallel() and table_beginscan_parallel_vmset() */
+static TableScanDesc table_beginscan_parallel_common(Relation relation, ParallelTableScanDesc pscan,
+													 uint32 flags);
+
 
 /* ----------------------------------------------------------------------------
  * Slot functions.
@@ -162,12 +166,14 @@ table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan,
 	}
 }
 
-TableScanDesc
-table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
+/*
+ * Common helper for table_beginscan_parallel() and table_beginscan_parallel_vmset()
+ */
+static TableScanDesc
+table_beginscan_parallel_common(Relation relation, ParallelTableScanDesc pscan,
+								uint32 flags)
 {
 	Snapshot	snapshot;
-	uint32		flags = SO_TYPE_SEQSCAN |
-		SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
 
 	Assert(RelFileLocatorEquals(relation->rd_locator, pscan->phs_locator));
 
@@ -188,6 +194,31 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
 											pscan, flags);
 }
 
+TableScanDesc
+table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
+{
+	uint32		flags = SO_TYPE_SEQSCAN |
+		SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
+
+	return table_beginscan_parallel_common(relation, pscan, flags);
+}
+
+/*
+ * Parallel version of table_beginscan_vmset()
+ */
+TableScanDesc
+table_beginscan_parallel_vmset(Relation relation, ParallelTableScanDesc pscan,
+							   bool modifies_rel)
+{
+	uint32		flags = SO_TYPE_SEQSCAN |
+		SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
+
+	if (!modifies_rel)
+		flags |= SO_ALLOW_VM_SET;
+
+	return table_beginscan_parallel_common(relation, pscan, flags);
+}
+
 
 /* ----------------------------------------------------------------------------
  * Index scan related functions.
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 0391798dd2c..065676eb7cf 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -917,6 +917,10 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 					break;
 			}
 
+			/* If it has a rowmark, the relation is modified */
+			estate->es_modified_relids = bms_add_member(estate->es_modified_relids,
+														rc->rti);
+
 			/* Check that relation is a legal target for marking */
 			if (relation)
 				CheckValidRowMarkRel(relation, rc->markType);
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index fdc65c2b42b..28a06dcd244 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -893,6 +893,8 @@ ExecInitResultRelation(EState *estate, ResultRelInfo *resultRelInfo,
 		estate->es_result_relations = (ResultRelInfo **)
 			palloc0(estate->es_range_table_size * sizeof(ResultRelInfo *));
 	estate->es_result_relations[rti - 1] = resultRelInfo;
+	estate->es_modified_relids = bms_add_member(estate->es_modified_relids,
+												rti);
 
 	/*
 	 * Saving in the list allows to avoid needlessly traversing the whole
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index bf24f3d7fe0..af6db9f7919 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -105,11 +105,16 @@ BitmapTableScanSetup(BitmapHeapScanState *node)
 	 */
 	if (!node->ss.ss_currentScanDesc)
 	{
+		bool		modifies_rel =
+			bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid,
+						  node->ss.ps.state->es_modified_relids);
+
 		node->ss.ss_currentScanDesc =
 			table_beginscan_bm(node->ss.ss_currentRelation,
 							   node->ss.ps.state->es_snapshot,
 							   0,
-							   NULL);
+							   NULL,
+							   modifies_rel);
 	}
 
 	node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator;
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index 7fcaa37fe62..c2ffbd3b08e 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -102,16 +102,22 @@ IndexNext(IndexScanState *node)
 
 	if (scandesc == NULL)
 	{
+
+		bool		modifies_base_rel =
+			bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid,
+						  estate->es_modified_relids);
+
 		/*
 		 * We reach here if the index scan is not parallel, or if we're
 		 * serially executing an index scan that was planned to be parallel.
 		 */
-		scandesc = index_beginscan(node->ss.ss_currentRelation,
-								   node->iss_RelationDesc,
-								   estate->es_snapshot,
-								   &node->iss_Instrument,
-								   node->iss_NumScanKeys,
-								   node->iss_NumOrderByKeys);
+		scandesc = index_beginscan_vmset(node->ss.ss_currentRelation,
+										 node->iss_RelationDesc,
+										 estate->es_snapshot,
+										 &node->iss_Instrument,
+										 node->iss_NumScanKeys,
+										 node->iss_NumOrderByKeys,
+										 modifies_base_rel);
 
 		node->iss_ScanDesc = scandesc;
 
diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c
index ed35c58c2c3..15e1853027b 100644
--- a/src/backend/executor/nodeSeqscan.c
+++ b/src/backend/executor/nodeSeqscan.c
@@ -65,13 +65,18 @@ SeqNext(SeqScanState *node)
 
 	if (scandesc == NULL)
 	{
+		bool		modifies_rel =
+			bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid,
+						  estate->es_modified_relids);
+
 		/*
 		 * We reach here if the scan is not parallel, or if we're serially
 		 * executing a scan that was planned to be parallel.
 		 */
-		scandesc = table_beginscan(node->ss.ss_currentRelation,
-								   estate->es_snapshot,
-								   0, NULL);
+		scandesc = table_beginscan_vmset(node->ss.ss_currentRelation,
+										 estate->es_snapshot,
+										 0, NULL, modifies_rel);
+
 		node->ss.ss_currentScanDesc = scandesc;
 	}
 
@@ -362,6 +367,7 @@ ExecSeqScanInitializeDSM(SeqScanState *node,
 						 ParallelContext *pcxt)
 {
 	EState	   *estate = node->ss.ps.state;
+	bool		modifies_rel;
 	ParallelTableScanDesc pscan;
 
 	pscan = shm_toc_allocate(pcxt->toc, node->pscan_len);
@@ -369,8 +375,11 @@ ExecSeqScanInitializeDSM(SeqScanState *node,
 								  pscan,
 								  estate->es_snapshot);
 	shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
+	modifies_rel = bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid,
+								 estate->es_modified_relids);
 	node->ss.ss_currentScanDesc =
-		table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
+		table_beginscan_parallel_vmset(node->ss.ss_currentRelation, pscan,
+									   modifies_rel);
 }
 
 /* ----------------------------------------------------------------
@@ -400,8 +409,13 @@ ExecSeqScanInitializeWorker(SeqScanState *node,
 							ParallelWorkerContext *pwcxt)
 {
 	ParallelTableScanDesc pscan;
+	bool		modifies_rel =
+		bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid,
+					  node->ss.ps.state->es_modified_relids);
 
 	pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
 	node->ss.ss_currentScanDesc =
-		table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
+		table_beginscan_parallel_vmset(node->ss.ss_currentRelation,
+									   pscan,
+									   modifies_rel);
 }
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index 5b2ab181b5f..bf272c2c37f 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -180,6 +180,11 @@ extern IndexScanDesc index_beginscan(Relation heapRelation,
 									 Snapshot snapshot,
 									 IndexScanInstrumentation *instrument,
 									 int nkeys, int norderbys);
+extern IndexScanDesc index_beginscan_vmset(Relation heapRelation,
+										   Relation indexRelation,
+										   Snapshot snapshot,
+										   IndexScanInstrumentation *instrument,
+										   int nkeys, int norderbys, bool modifies_heap_rel);
 extern IndexScanDesc index_beginscan_bitmap(Relation indexRelation,
 											Snapshot snapshot,
 											IndexScanInstrumentation *instrument,
@@ -206,6 +211,12 @@ extern IndexScanDesc index_beginscan_parallel(Relation heaprel,
 											  IndexScanInstrumentation *instrument,
 											  int nkeys, int norderbys,
 											  ParallelIndexScanDesc pscan);
+
+extern IndexScanDesc index_beginscan_parallel_vmset(Relation heaprel, Relation indexrel,
+													IndexScanInstrumentation *instrument,
+													int nkeys, int norderbys,
+													ParallelIndexScanDesc pscan,
+													bool modifies_rel);
 extern ItemPointer index_getnext_tid(IndexScanDesc scan,
 									 ScanDirection direction);
 struct TupleTableSlot;
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 4278f351bdf..16f7904a21e 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -94,6 +94,13 @@ typedef struct HeapScanDescData
 	 */
 	ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
 
+	/*
+	 * For sequential scans and bitmap heap scans. If the relation is not
+	 * being modified, on-access pruning may read in the current heap page's
+	 * corresponding VM block to this buffer.
+	 */
+	Buffer		rs_vmbuffer;
+
 	/* these fields only used in page-at-a-time mode and for bitmap scans */
 	uint32		rs_cindex;		/* current tuple's index in vistuples */
 	uint32		rs_ntuples;		/* number of visible tuples on page */
@@ -116,8 +123,18 @@ typedef struct IndexFetchHeapData
 {
 	IndexFetchTableData xs_base;	/* AM independent part of the descriptor */
 
-	Buffer		xs_cbuf;		/* current heap buffer in scan, if any */
-	/* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
+	/*
+	 * Current heap buffer in scan, if any. NB: if xs_cbuf is not
+	 * InvalidBuffer, we hold a pin on that buffer.
+	 */
+	Buffer		xs_cbuf;
+
+	/*
+	 * For index scans that do not modify the underlying heap table, on-access
+	 * pruning may read in the current heap page's corresponding VM block to
+	 * this buffer.
+	 */
+	Buffer		xs_vmbuffer;
 } IndexFetchHeapData;
 
 /* Result codes for HeapTupleSatisfiesVacuum */
@@ -374,7 +391,8 @@ extern TransactionId heap_index_delete_tuples(Relation rel,
 
 /* in heap/pruneheap.c */
 struct GlobalVisState;
-extern void heap_page_prune_opt(Relation relation, Buffer buffer);
+extern void heap_page_prune_opt(Relation relation, Buffer buffer,
+								Buffer *vmbuffer);
 extern void heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 									   bool blk_known_av,
 									   Buffer vmbuffer,
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index b5e0fb386c0..f496e0b4939 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -121,6 +121,12 @@ typedef struct ParallelBlockTableScanWorkerData *ParallelBlockTableScanWorker;
 typedef struct IndexFetchTableData
 {
 	Relation	rel;
+
+	/*
+	 * Some optimizations can only be performed if the query does not modify
+	 * the underlying relation. Track that here.
+	 */
+	bool		modifies_base_rel;
 } IndexFetchTableData;
 
 struct IndexScanInstrumentation;
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 1c9e802a6b1..0e986d8ef72 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -62,6 +62,8 @@ typedef enum ScanOptions
 
 	/* unregister snapshot at scan end? */
 	SO_TEMP_SNAPSHOT = 1 << 9,
+	/* whether or not scan should attempt to set the VM */
+	SO_ALLOW_VM_SET = 1 << 10,
 }			ScanOptions;
 
 /*
@@ -876,6 +878,25 @@ table_beginscan(Relation rel, Snapshot snapshot,
 	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
 }
 
+/*
+ * Similar to table_beginscan(), but allows the caller to indicate whether the
+ * query modifies the relation. This is used when the caller wants to attempt
+ * marking pages in the relation as all-visible in the visibility map during
+ * on-access pruning.
+ */
+static inline TableScanDesc
+table_beginscan_vmset(Relation rel, Snapshot snapshot,
+					  int nkeys, struct ScanKeyData *key, bool modifies_rel)
+{
+	uint32		flags = SO_TYPE_SEQSCAN |
+		SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
+
+	if (!modifies_rel)
+		flags |= SO_ALLOW_VM_SET;
+
+	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+}
+
 /*
  * Like table_beginscan(), but for scanning catalog. It'll automatically use a
  * snapshot appropriate for scanning catalog relations.
@@ -913,10 +934,13 @@ table_beginscan_strat(Relation rel, Snapshot snapshot,
  */
 static inline TableScanDesc
 table_beginscan_bm(Relation rel, Snapshot snapshot,
-				   int nkeys, struct ScanKeyData *key)
+				   int nkeys, struct ScanKeyData *key, bool modifies_rel)
 {
 	uint32		flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
 
+	if (!modifies_rel)
+		flags |= SO_ALLOW_VM_SET;
+
 	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key,
 									   NULL, flags);
 }
@@ -1125,6 +1149,10 @@ extern void table_parallelscan_initialize(Relation rel,
 extern TableScanDesc table_beginscan_parallel(Relation relation,
 											  ParallelTableScanDesc pscan);
 
+extern TableScanDesc table_beginscan_parallel_vmset(Relation relation,
+													ParallelTableScanDesc pscan,
+													bool modifies_rel);
+
 /*
  * Restart a parallel scan.  Call this in the leader process.  Caller is
  * responsible for making sure that all workers have finished the scan
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index e107d6e5f81..326d7d78860 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -680,6 +680,12 @@ typedef struct EState
 									 * ExecDoInitialPruning() */
 	const char *es_sourceText;	/* Source text from QueryDesc */
 
+	/*
+	 * RT indexes of relations modified by the query either through
+	 * UPDATE/DELETE/INSERT/MERGE or SELECT FOR UPDATE
+	 */
+	Bitmapset  *es_modified_relids;
+
 	JunkFilter *es_junkFilter;	/* top-level junk filter, if any */
 
 	/* If query can insert/delete tuples, the command ID to mark them with */
diff --git a/src/test/recovery/t/035_standby_logical_decoding.pl b/src/test/recovery/t/035_standby_logical_decoding.pl
index 921813483e3..5d0863a7933 100644
--- a/src/test/recovery/t/035_standby_logical_decoding.pl
+++ b/src/test/recovery/t/035_standby_logical_decoding.pl
@@ -9,6 +9,7 @@ use warnings FATAL => 'all';
 use PostgreSQL::Test::Cluster;
 use PostgreSQL::Test::Utils;
 use Test::More;
+use Time::HiRes qw(usleep);
 
 if ($ENV{enable_injection_points} ne 'yes')
 {
@@ -295,6 +296,7 @@ wal_level = 'logical'
 max_replication_slots = 4
 max_wal_senders = 4
 autovacuum = off
+hot_standby_feedback = on
 });
 $node_primary->dump_info;
 $node_primary->start;
@@ -744,7 +746,7 @@ check_pg_recvlogical_stderr($handle,
 $logstart = -s $node_standby->logfile;
 
 reactive_slots_change_hfs_and_wait_for_xmins('shared_row_removal_',
-	'no_conflict_', 0, 1);
+	'no_conflict_', 1, 0);
 
 # This should not trigger a conflict
 wait_until_vacuum_can_remove(
@@ -754,12 +756,12 @@ wait_until_vacuum_can_remove(
 
 # message should not be issued
 ok( !$node_standby->log_contains(
-		"invalidating obsolete slot \"no_conflict_inactiveslot\"", $logstart),
+		"invalidating obsolete replication slot \"no_conflict_inactiveslot\"", $logstart),
 	'inactiveslot slot invalidation is not logged with vacuum on conflict_test'
 );
 
 ok( !$node_standby->log_contains(
-		"invalidating obsolete slot \"no_conflict_activeslot\"", $logstart),
+		"invalidating obsolete replication slot \"no_conflict_activeslot\"", $logstart),
 	'activeslot slot invalidation is not logged with vacuum on conflict_test'
 );
 
-- 
2.43.0

