From dc318358572f61efbd0e05aae2b9a077b422bcf5 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 18 Jun 2025 12:42:13 -0400
Subject: [PATCH v8 05/22] Eliminate xl_heap_visible from vacuum phase III

Instead of emitting a separate xl_heap_visible record for each page that
is rendered all-visible by vacuum's third phase, include the updates to
the VM in the already emitted xl_heap_prune record.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
---
 src/backend/access/heap/heapam_xlog.c  | 143 +++++++++++++++++++++---
 src/backend/access/heap/pruneheap.c    |  48 +++++++-
 src/backend/access/heap/vacuumlazy.c   | 149 +++++++++++++++++--------
 src/backend/access/rmgrdesc/heapdesc.c |  13 ++-
 src/include/access/heapam.h            |   9 ++
 src/include/access/heapam_xlog.h       |   7 +-
 6 files changed, 300 insertions(+), 69 deletions(-)

diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c
index 0c902c87682..e68e61feade 100644
--- a/src/backend/access/heap/heapam_xlog.c
+++ b/src/backend/access/heap/heapam_xlog.c
@@ -35,7 +35,8 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 	Buffer		buffer;
 	RelFileLocator rlocator;
 	BlockNumber blkno;
-	XLogRedoAction action;
+	Buffer		vmbuffer = InvalidBuffer;
+	uint8		vmflags = 0;
 
 	XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
 	memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
@@ -51,10 +52,15 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 		   (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
 
 	/*
-	 * We are about to remove and/or freeze tuples.  In Hot Standby mode,
-	 * ensure that there are no queries running for which the removed tuples
-	 * are still visible or which still consider the frozen xids as running.
-	 * The conflict horizon XID comes after xl_heap_prune.
+	 * After xl_heap_prune is the optional snapshot conflict horizon.
+	 *
+	 * In Hot Standby mode, we must ensure that there are no running queries
+	 * which would conflict with the changes in this record. If pruning, that
+	 * means we cannot remove tuples still visible to transactions on the
+	 * standby. If freezing, that means we cannot freeze tuples with xids that
+	 * are still considered running on the standby. And for setting the VM, we
+	 * cannot do so if the page isn't all-visible to all transactions on the
+	 * standby.
 	 */
 	if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
 	{
@@ -70,13 +76,29 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 												rlocator);
 	}
 
+	/* Next are the optionally included vmflags. Copy them out for later use. */
+	if ((xlrec.flags & XLHP_HAS_VMFLAGS) != 0)
+	{
+		/* memcpy because vmflags is stored unaligned */
+		memcpy(&vmflags, maindataptr, sizeof(uint8));
+		maindataptr += sizeof(uint8);
+
+		/*
+		 * We don't set VISIBILITYMAP_XLOG_CATALOG_REL in the combined record
+		 * because we already have XLHP_IS_CATALOG_REL.
+		 */
+		Assert((vmflags & VISIBILITYMAP_VALID_BITS) == vmflags);
+		/* Must never set all_frozen bit without also setting all_visible bit */
+		Assert(vmflags != VISIBILITYMAP_ALL_FROZEN);
+	}
+
 	/*
-	 * If we have a full-page image, restore it and we're done.
+	 * If we have a full-page image of the heap block, restore it and we're
+	 * done with the heap block.
 	 */
-	action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
-										   (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
-										   &buffer);
-	if (action == BLK_NEEDS_REDO)
+	if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
+									  (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
+									  &buffer) == BLK_NEEDS_REDO)
 	{
 		Page		page = BufferGetPage(buffer);
 		OffsetNumber *redirected;
@@ -89,6 +111,9 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 		Size		datalen;
 		xlhp_freeze_plan *plans;
 		OffsetNumber *frz_offsets;
+		bool		do_prune;
+		bool		mark_buffer_dirty;
+		bool		set_heap_lsn;
 		char	   *dataptr = XLogRecGetBlockData(record, 0, &datalen);
 
 		heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
@@ -97,11 +122,18 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 											   &ndead, &nowdead,
 											   &nunused, &nowunused);
 
+		do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
+		set_heap_lsn = mark_buffer_dirty = do_prune || nplans > 0;
+
+		/* Ensure the record does something */
+		Assert(do_prune || nplans > 0 ||
+			   vmflags & VISIBILITYMAP_VALID_BITS);
+
 		/*
 		 * Update all line pointers per the record, and repair fragmentation
 		 * if needed.
 		 */
-		if (nredirected > 0 || ndead > 0 || nunused > 0)
+		if (do_prune)
 			heap_page_prune_execute(buffer,
 									(xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
 									redirected, nredirected,
@@ -138,26 +170,72 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 		/* There should be no more data */
 		Assert((char *) frz_offsets == dataptr + datalen);
 
+		Assert(BufferIsValid(buffer) &&
+			   BufferGetBlockNumber(buffer) == blkno);
+
+		/*
+		 * Now set PD_ALL_VISIBLE, if required. We'll only do this if we are
+		 * also going to set bits in the VM later.
+		 *
+		 * We must never end up with the VM bit set and the page-level
+		 * PD_ALL_VISIBLE bit clear. If that were to occur, a subsequent page
+		 * modification would fail to clear the VM bit.
+		 */
+		if ((vmflags & VISIBILITYMAP_VALID_BITS) && !PageIsAllVisible(page))
+		{
+			PageSetAllVisible(page);
+
+			/*
+			 * If the only change to the heap page is setting PD_ALL_VISIBLE,
+			 * we can avoid setting the page LSN unless checksums or
+			 * wal_log_hints are enabled.
+			 */
+			set_heap_lsn = XLogHintBitIsNeeded() ? true : set_heap_lsn;
+			mark_buffer_dirty = true;
+		}
+
 		/*
 		 * Note: we don't worry about updating the page's prunability hints.
 		 * At worst this will cause an extra prune cycle to occur soon.
 		 */
 
-		PageSetLSN(page, lsn);
-		MarkBufferDirty(buffer);
+		if (mark_buffer_dirty)
+			MarkBufferDirty(buffer);
+		if (set_heap_lsn)
+			PageSetLSN(page, lsn);
 	}
 
 	/*
-	 * If we released any space or line pointers, update the free space map.
+	 * If we released any space or line pointers or will be setting a page in
+	 * the visibility map, update the free space map.
+	 *
+	 * Even if we are just updating the VM (and thus not freeing up any
+	 * space), we'll still update the FSM for this page. Since FSM is not
+	 * WAL-logged and only updated heuristically, it easily becomes stale in
+	 * standbys.  If the standby is later promoted and runs VACUUM, it will
+	 * skip updating individual free space figures for pages that became
+	 * all-visible (or all-frozen, depending on the vacuum mode,) which is
+	 * troublesome when FreeSpaceMapVacuum propagates too optimistic free
+	 * space values to upper FSM layers; later inserters try to use such pages
+	 * only to find out that they are unusable.  This can cause long stalls
+	 * when there are many such pages.
+	 *
+	 * Forestall those problems by updating FSM's idea about a page that is
+	 * becoming all-visible or all-frozen.
 	 *
 	 * Do this regardless of a full-page image being applied, since the FSM
 	 * data is not in the page anyway.
+	 *
+	 * We want to avoid holding an exclusive lock on the heap buffer while
+	 * doing IO (either of the FSM or the VM), so we'll release the lock on
+	 * the heap buffer before doing either.
 	 */
 	if (BufferIsValid(buffer))
 	{
-		if (xlrec.flags & (XLHP_HAS_REDIRECTIONS |
-						   XLHP_HAS_DEAD_ITEMS |
-						   XLHP_HAS_NOW_UNUSED_ITEMS))
+		if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
+							XLHP_HAS_DEAD_ITEMS |
+							XLHP_HAS_NOW_UNUSED_ITEMS)) ||
+			vmflags & VISIBILITYMAP_VALID_BITS)
 		{
 			Size		freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
 
@@ -168,6 +246,37 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 		else
 			UnlockReleaseBuffer(buffer);
 	}
+
+	/*
+	 * Read and update the VM block. Even if we skipped updating the heap page
+	 * due to the file being dropped or truncated later in recovery, it's
+	 * still safe to update the visibility map.  Any WAL record that clears
+	 * the visibility map bit does so before checking the page LSN, so any
+	 * bits that need to be cleared will still be cleared.
+	 *
+	 * Note that it is *only* okay that we do not hold a lock on the heap page
+	 * because we are in recovery and can expect no other writers to clear
+	 * PD_ALL_VISIBLE before we are able to update the VM.
+	 */
+	if (vmflags & VISIBILITYMAP_VALID_BITS &&
+		XLogReadBufferForRedoExtended(record, 1,
+									  RBM_ZERO_ON_ERROR,
+									  false,
+									  &vmbuffer) == BLK_NEEDS_REDO)
+	{
+		uint8		old_vmbits = 0;
+		Relation	reln = CreateFakeRelcacheEntry(rlocator);
+
+		visibilitymap_pin(reln, blkno, &vmbuffer);
+		old_vmbits = visibilitymap_set_vmbyte(reln, blkno, vmbuffer, vmflags);
+		/* Only set VM page LSN if we modified the page */
+		if (old_vmbits != vmflags)
+			PageSetLSN(BufferGetPage(vmbuffer), lsn);
+		FreeFakeRelcacheEntry(reln);
+	}
+
+	if (BufferIsValid(vmbuffer))
+		UnlockReleaseBuffer(vmbuffer);
 }
 
 /*
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 7ebd22f00a3..f1a8f938e9e 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -21,6 +21,7 @@
 #include "access/transam.h"
 #include "access/xlog.h"
 #include "access/xloginsert.h"
+#include "access/visibilitymapdefs.h"
 #include "commands/vacuum.h"
 #include "executor/instrument.h"
 #include "miscadmin.h"
@@ -835,6 +836,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 				conflict_xid = prstate.latest_xid_removed;
 
 			log_heap_prune_and_freeze(relation, buffer,
+									  InvalidBuffer, 0, false,
 									  conflict_xid,
 									  true, reason,
 									  prstate.frozen, prstate.nfrozen,
@@ -2045,12 +2047,23 @@ heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
  * replaying 'unused' items depends on whether they were all previously marked
  * as dead.
  *
+ * If the VM is being updated, vmflags will contain the bits to set. In this
+ * case, vmbuffer should already have been updated and marked dirty and should
+ * still be pinned and locked.
+ *
+ * set_pd_all_vis indicates that we set PD_ALL_VISIBLE and thus should update
+ * the page LSN when checksums/wal_log_hints are enabled even if we did not
+ * prune or freeze tuples on the page.
+ *
  * Note: This function scribbles on the 'frozen' array.
  *
  * Note: This is called in a critical section, so careful what you do here.
  */
 void
 log_heap_prune_and_freeze(Relation relation, Buffer buffer,
+						  Buffer vmbuffer,
+						  uint8 vmflags,
+						  bool set_pd_all_vis,
 						  TransactionId conflict_xid,
 						  bool cleanup_lock,
 						  PruneReason reason,
@@ -2062,6 +2075,7 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 	xl_heap_prune xlrec;
 	XLogRecPtr	recptr;
 	uint8		info;
+	uint8		regbuf_flags;
 
 	/* The following local variables hold data registered in the WAL record: */
 	xlhp_freeze_plan plans[MaxHeapTuplesPerPage];
@@ -2070,8 +2084,19 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 	xlhp_prune_items dead_items;
 	xlhp_prune_items unused_items;
 	OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
+	bool		do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
 
 	xlrec.flags = 0;
+	regbuf_flags = REGBUF_STANDARD;
+
+	/*
+	 * We can avoid an FPI if the only modification we are making to the heap
+	 * page is to set PD_ALL_VISIBLE and checksums/wal_log_hints are disabled.
+	 */
+	if (!do_prune &&
+		nfrozen == 0 &&
+		(!set_pd_all_vis || !XLogHintBitIsNeeded()))
+		regbuf_flags |= REGBUF_NO_IMAGE;
 
 	/*
 	 * Prepare data for the buffer.  The arrays are not actually in the
@@ -2079,7 +2104,11 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 	 * page image, the arrays can be omitted.
 	 */
 	XLogBeginInsert();
-	XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+	XLogRegisterBuffer(0, buffer, regbuf_flags);
+
+	if (vmflags & VISIBILITYMAP_VALID_BITS)
+		XLogRegisterBuffer(1, vmbuffer, 0);
+
 	if (nfrozen > 0)
 	{
 		int			nplans;
@@ -2136,6 +2165,8 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 	 * Prepare the main xl_heap_prune record.  We already set the XLHP_HAS_*
 	 * flag above.
 	 */
+	if (vmflags & VISIBILITYMAP_VALID_BITS)
+		xlrec.flags |= XLHP_HAS_VMFLAGS;
 	if (RelationIsAccessibleInLogicalDecoding(relation))
 		xlrec.flags |= XLHP_IS_CATALOG_REL;
 	if (TransactionIdIsValid(conflict_xid))
@@ -2150,6 +2181,8 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 	XLogRegisterData(&xlrec, SizeOfHeapPrune);
 	if (TransactionIdIsValid(conflict_xid))
 		XLogRegisterData(&conflict_xid, sizeof(TransactionId));
+	if (vmflags & VISIBILITYMAP_VALID_BITS)
+		XLogRegisterData(&vmflags, sizeof(uint8));
 
 	switch (reason)
 	{
@@ -2168,5 +2201,16 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 	}
 	recptr = XLogInsert(RM_HEAP2_ID, info);
 
-	PageSetLSN(BufferGetPage(buffer), recptr);
+	if (vmflags & VISIBILITYMAP_VALID_BITS)
+		PageSetLSN(BufferGetPage(vmbuffer), recptr);
+
+	/*
+	 * If pruning or freezing tuples or setting the page all-visible when
+	 * checksums or wal_hint_bits are enabled, we must bump the LSN. Torn
+	 * pages are possible if we update PD_ALL_VISIBLE without bumping the LSN,
+	 * but this is deemed okay for page hint updates.
+	 */
+	if (do_prune || nfrozen > 0 ||
+		(set_pd_all_vis && XLogHintBitIsNeeded()))
+		PageSetLSN(BufferGetPage(buffer), recptr);
 }
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index f4e29aecf46..1d3feab4ded 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -463,11 +463,13 @@ static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *
 						   int num_offsets);
 static void dead_items_reset(LVRelState *vacrel);
 static void dead_items_cleanup(LVRelState *vacrel);
-static bool heap_page_is_all_visible(Relation rel, Buffer buf,
-									 TransactionId OldestXmin,
-									 bool *all_frozen,
-									 TransactionId *visibility_cutoff_xid,
-									 OffsetNumber *logging_offnum);
+static bool heap_page_is_all_visible_except_lpdead(Relation rel, Buffer buf,
+												   TransactionId OldestXmin,
+												   OffsetNumber *deadoffsets,
+												   int allowed_num_offsets,
+												   bool *all_frozen,
+												   TransactionId *visibility_cutoff_xid,
+												   OffsetNumber *logging_offnum);
 static void update_relstats_all_indexes(LVRelState *vacrel);
 static void vacuum_error_callback(void *arg);
 static void update_vacuum_error_info(LVRelState *vacrel,
@@ -2846,8 +2848,11 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 	OffsetNumber unused[MaxHeapTuplesPerPage];
 	int			nunused = 0;
 	TransactionId visibility_cutoff_xid;
+	TransactionId conflict_xid = InvalidTransactionId;
 	bool		all_frozen;
 	LVSavedErrInfo saved_err_info;
+	uint8		vmflags = 0;
+	bool		set_pd_all_vis = false;
 
 	Assert(vacrel->do_index_vacuuming);
 
@@ -2858,6 +2863,20 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 							 VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
 							 InvalidOffsetNumber);
 
+	if (heap_page_is_all_visible_except_lpdead(vacrel->rel, buffer,
+											   vacrel->cutoffs.OldestXmin,
+											   deadoffsets, num_offsets,
+											   &all_frozen, &visibility_cutoff_xid,
+											   &vacrel->offnum))
+	{
+		vmflags |= VISIBILITYMAP_ALL_VISIBLE;
+		if (all_frozen)
+		{
+			vmflags |= VISIBILITYMAP_ALL_FROZEN;
+			Assert(!TransactionIdIsValid(visibility_cutoff_xid));
+		}
+	}
+
 	START_CRIT_SECTION();
 
 	for (int i = 0; i < num_offsets; i++)
@@ -2877,6 +2896,18 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 	/* Attempt to truncate line pointer array now */
 	PageTruncateLinePointerArray(page);
 
+	if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
+	{
+		Assert(!PageIsAllVisible(page));
+		set_pd_all_vis = true;
+		LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
+		PageSetAllVisible(page);
+		visibilitymap_set_vmbyte(vacrel->rel,
+								 blkno,
+								 vmbuffer, vmflags);
+		conflict_xid = visibility_cutoff_xid;
+	}
+
 	/*
 	 * Mark buffer dirty before we write WAL.
 	 */
@@ -2886,7 +2917,10 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 	if (RelationNeedsWAL(vacrel->rel))
 	{
 		log_heap_prune_and_freeze(vacrel->rel, buffer,
-								  InvalidTransactionId,
+								  vmbuffer,
+								  vmflags,
+								  set_pd_all_vis,
+								  conflict_xid,
 								  false,	/* no cleanup lock required */
 								  PRUNE_VACUUM_CLEANUP,
 								  NULL, 0,	/* frozen */
@@ -2895,39 +2929,12 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 								  unused, nunused);
 	}
 
-	/*
-	 * End critical section, so we safely can do visibility tests (which
-	 * possibly need to perform IO and allocate memory!). If we crash now the
-	 * page (including the corresponding vm bit) might not be marked all
-	 * visible, but that's fine. A later vacuum will fix that.
-	 */
 	END_CRIT_SECTION();
 
-	/*
-	 * Now that we have removed the LP_DEAD items from the page, once again
-	 * check if the page has become all-visible.  The page is already marked
-	 * dirty, exclusively locked, and, if needed, a full page image has been
-	 * emitted.
-	 */
-	Assert(!PageIsAllVisible(page));
-	if (heap_page_is_all_visible(vacrel->rel, buffer, vacrel->cutoffs.OldestXmin,
-								 &all_frozen, &visibility_cutoff_xid, &vacrel->offnum))
+	if ((vmflags & VISIBILITYMAP_ALL_VISIBLE) != 0)
 	{
-		uint8		flags = VISIBILITYMAP_ALL_VISIBLE;
-
-		if (all_frozen)
-		{
-			Assert(!TransactionIdIsValid(visibility_cutoff_xid));
-			flags |= VISIBILITYMAP_ALL_FROZEN;
-		}
-
-		PageSetAllVisible(page);
-		visibilitymap_set(vacrel->rel, blkno, buffer,
-						  InvalidXLogRecPtr,
-						  vmbuffer, visibility_cutoff_xid,
-						  flags);
-
 		/* Count the newly set VM page for logging */
+		LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
 		vacrel->vm_new_visible_pages++;
 		if (all_frozen)
 			vacrel->vm_new_visible_frozen_pages++;
@@ -3593,6 +3600,25 @@ dead_items_cleanup(LVRelState *vacrel)
 	vacrel->pvs = NULL;
 }
 
+/*
+ * Wrapper for heap_page_is_all_visible_except_lpdead() which can be used for
+ * callers that expect no LP_DEAD on the page.
+ */
+bool
+heap_page_is_all_visible(Relation rel, Buffer buf,
+						 TransactionId OldestXmin,
+						 bool *all_frozen,
+						 TransactionId *visibility_cutoff_xid,
+						 OffsetNumber *logging_offnum)
+{
+
+	return heap_page_is_all_visible_except_lpdead(rel, buf, OldestXmin,
+												  NULL, 0,
+												  all_frozen,
+												  visibility_cutoff_xid,
+												  logging_offnum);
+}
+
 /*
  * Check if every tuple in the given page is visible to all current and future
  * transactions.
@@ -3606,23 +3632,35 @@ dead_items_cleanup(LVRelState *vacrel)
  * visible tuples. Sets *all_frozen to true if every tuple on this page is
  * frozen.
  *
- * This is a stripped down version of lazy_scan_prune().  If you change
- * anything here, make sure that everything stays in sync.  Note that an
- * assertion calls us to verify that everybody still agrees.  Be sure to avoid
- * introducing new side-effects here.
+ * deadoffsets are the offsets we know about and are about to set LP_UNUSED.
+ * allowed_num_offsets is the number of those. As long as the LP_DEAD items we
+ * encounter on the page match those exactly, we can set the page all-visible
+ * in the VM.
+ *
+ * Callers looking to verify that the page is all-visible can call
+ * heap_page_is_all_visible().
+ *
+ * This is similar logic to that in heap_prune_record_unchanged_lp_normal() If
+ * you change anything here, make sure that everything stays in sync.  Note
+ * that an assertion calls us to verify that everybody still agrees.  Be sure
+ * to avoid introducing new side-effects here.
  */
 static bool
-heap_page_is_all_visible(Relation rel, Buffer buf,
-						 TransactionId OldestXmin,
-						 bool *all_frozen,
-						 TransactionId *visibility_cutoff_xid,
-						 OffsetNumber *logging_offnum)
+heap_page_is_all_visible_except_lpdead(Relation rel, Buffer buf,
+									   TransactionId OldestXmin,
+									   OffsetNumber *deadoffsets,
+									   int allowed_num_offsets,
+									   bool *all_frozen,
+									   TransactionId *visibility_cutoff_xid,
+									   OffsetNumber *logging_offnum)
 {
 	Page		page = BufferGetPage(buf);
 	BlockNumber blockno = BufferGetBlockNumber(buf);
 	OffsetNumber offnum,
 				maxoff;
 	bool		all_visible = true;
+	OffsetNumber current_dead_offsets[MaxHeapTuplesPerPage];
+	size_t		current_num_offsets = 0;
 
 	*visibility_cutoff_xid = InvalidTransactionId;
 	*all_frozen = true;
@@ -3654,9 +3692,8 @@ heap_page_is_all_visible(Relation rel, Buffer buf,
 		 */
 		if (ItemIdIsDead(itemid))
 		{
-			all_visible = false;
-			*all_frozen = false;
-			break;
+			current_dead_offsets[current_num_offsets++] = offnum;
+			continue;
 		}
 
 		Assert(ItemIdIsNormal(itemid));
@@ -3723,7 +3760,23 @@ heap_page_is_all_visible(Relation rel, Buffer buf,
 	/* Clear the offset information once we have processed the given page. */
 	*logging_offnum = InvalidOffsetNumber;
 
-	return all_visible;
+	/* If we already know it's not all-visible, return false */
+	if (!all_visible)
+		return false;
+
+	/* If we weren't allowed any dead offsets, we're done */
+	if (allowed_num_offsets == 0)
+		return current_num_offsets == 0;
+
+	/* If the number of dead offsets has changed, that's wrong */
+	if (current_num_offsets != allowed_num_offsets)
+		return false;
+
+	Assert(deadoffsets);
+
+	/* The dead offsets must be the same dead offsets */
+	return memcmp(current_dead_offsets, deadoffsets,
+				  allowed_num_offsets * sizeof(OffsetNumber)) == 0;
 }
 
 /*
diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c
index b48d7dc1d24..d6c86ccac20 100644
--- a/src/backend/access/rmgrdesc/heapdesc.c
+++ b/src/backend/access/rmgrdesc/heapdesc.c
@@ -266,6 +266,7 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
 {
 	char	   *rec = XLogRecGetData(record);
 	uint8		info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+	char	   *maindataptr = rec + SizeOfHeapPrune;
 
 	info &= XLOG_HEAP_OPMASK;
 	if (info == XLOG_HEAP2_PRUNE_ON_ACCESS ||
@@ -278,7 +279,8 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
 		{
 			TransactionId conflict_xid;
 
-			memcpy(&conflict_xid, rec + SizeOfHeapPrune, sizeof(TransactionId));
+			memcpy(&conflict_xid, maindataptr, sizeof(TransactionId));
+			maindataptr += sizeof(TransactionId);
 
 			appendStringInfo(buf, "snapshotConflictHorizon: %u",
 							 conflict_xid);
@@ -287,6 +289,15 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
 		appendStringInfo(buf, ", isCatalogRel: %c",
 						 xlrec->flags & XLHP_IS_CATALOG_REL ? 'T' : 'F');
 
+		if (xlrec->flags & XLHP_HAS_VMFLAGS)
+		{
+			uint8		vmflags;
+
+			memcpy(&vmflags, maindataptr, sizeof(uint8));
+			maindataptr += sizeof(uint8);
+			appendStringInfo(buf, ", vm_flags: 0x%02X", vmflags);
+		}
+
 		if (XLogRecHasBlockData(record, 0))
 		{
 			Size		datalen;
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index a2bd5a897f8..8b47295efa2 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -344,6 +344,12 @@ extern void heap_inplace_update_and_unlock(Relation relation,
 										   Buffer buffer);
 extern void heap_inplace_unlock(Relation relation,
 								HeapTuple oldtup, Buffer buffer);
+
+extern bool heap_page_is_all_visible(Relation rel, Buffer buf,
+									 TransactionId OldestXmin,
+									 bool *all_frozen,
+									 TransactionId *visibility_cutoff_xid,
+									 OffsetNumber *logging_offnum);
 extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 									  const struct VacuumCutoffs *cutoffs,
 									  HeapPageFreeze *pagefrz,
@@ -388,6 +394,9 @@ extern void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
 									OffsetNumber *nowunused, int nunused);
 extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
 extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer,
+									  Buffer vmbuffer,
+									  uint8 vmflags,
+									  bool vm_modified_heap_page,
 									  TransactionId conflict_xid,
 									  bool cleanup_lock,
 									  PruneReason reason,
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 277df6b3cf0..d6a479f6984 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -289,12 +289,17 @@ typedef struct xl_heap_prune
 
 	/*
 	 * If XLHP_HAS_CONFLICT_HORIZON is set, the conflict horizon XID follows,
-	 * unaligned
+	 * unaligned.
+	 *
+	 * Then, if XLHP_HAS_VMFLAGS is set, the VM flags follow, unaligned.
 	 */
 } xl_heap_prune;
 
 #define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(uint8))
 
+/* If the record should update the VM, it contains their new value */
+#define		XLHP_HAS_VMFLAGS			(1 << 0)
+
 /* to handle recovery conflict during logical decoding on standby */
 #define		XLHP_IS_CATALOG_REL			(1 << 1)
 
-- 
2.43.0

