From 635bac1f6bd4747288656097c9423688f8ff1e0e Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Sat, 6 Jan 2024 14:01:37 -0500
Subject: [PATCH v2 03/17] heap_page_prune sets all_visible and
 frz_conflict_horizon

In order to combine the prune and freeze records, we must know if the
page is eligible to be opportunistically frozen before finishing
pruning. Save all_visible in the PruneResult and set it to false when we
see non-removable tuples which are not visible to everyone.

We will also need to ensure that the snapshotConflictHorizon for the combined
prune + freeze record is the more conservative of that calculated for each of
pruning and freezing. Calculate the visibility_cutoff_xid for the purposes of
freezing -- the newest xmin on the page -- in heap_page_prune() and save it in
PruneResult.frz_conflict_horizon.
---
 src/backend/access/heap/pruneheap.c  | 122 +++++++++++++++++++++++++--
 src/backend/access/heap/vacuumlazy.c | 116 +++++++------------------
 src/include/access/heapam.h          |   3 +
 3 files changed, 146 insertions(+), 95 deletions(-)

diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 4600ee53751..b3a7ce06699 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -65,8 +65,10 @@ static int	heap_prune_chain(Buffer buffer,
 static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
 static void heap_prune_record_redirect(PruneState *prstate,
 									   OffsetNumber offnum, OffsetNumber rdoffnum);
-static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
-static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum);
+static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
+								   PruneResult *presult);
+static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
+											 PruneResult *presult);
 static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
 static void page_verify_redirects(Page page);
 
@@ -249,6 +251,14 @@ heap_page_prune(Relation relation, Buffer buffer,
 	presult->ndeleted = 0;
 	presult->nnewlpdead = 0;
 
+	/*
+	 * Keep track of whether or not the page is all_visible in case the caller
+	 * wants to use this information to update the VM.
+	 */
+	presult->all_visible = true;
+	/* for recovery conflicts */
+	presult->frz_conflict_horizon = InvalidTransactionId;
+
 	maxoff = PageGetMaxOffsetNumber(page);
 	tup.t_tableOid = RelationGetRelid(prstate.rel);
 
@@ -300,8 +310,92 @@ heap_page_prune(Relation relation, Buffer buffer,
 
 		presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
 															buffer);
+		switch (presult->htsv[offnum])
+		{
+			case HEAPTUPLE_DEAD:
+
+				/*
+				 * Deliberately delay unsetting all_visible until later during
+				 * pruning. Removable dead tuples shouldn't preclude freezing
+				 * the page. After finishing this first pass of tuple
+				 * visibility checks, initialize all_visible_except_removable
+				 * with the current value of all_visible to indicate whether
+				 * or not the page is all visible except for dead tuples. This
+				 * will allow us to attempt to freeze the page after pruning.
+				 * Later during pruning, if we encounter an LP_DEAD item or
+				 * are setting an item LP_DEAD, we will unset all_visible. As
+				 * long as we unset it before updating the visibility map,
+				 * this will be correct.
+				 */
+				break;
+			case HEAPTUPLE_LIVE:
+
+				/*
+				 * Is the tuple definitely visible to all transactions?
+				 *
+				 * NB: Like with per-tuple hint bits, we can't set the
+				 * PD_ALL_VISIBLE flag if the inserter committed
+				 * asynchronously. See SetHintBits for more info. Check that
+				 * the tuple is hinted xmin-committed because of that.
+				 */
+				if (presult->all_visible)
+				{
+					TransactionId xmin;
+
+					if (!HeapTupleHeaderXminCommitted(htup))
+					{
+						presult->all_visible = false;
+						break;
+					}
+
+					/*
+					 * The inserter definitely committed. But is it old enough
+					 * that everyone sees it as committed?
+					 */
+					xmin = HeapTupleHeaderGetXmin(htup);
+					if (!GlobalVisTestIsRemovableXid(vistest, xmin))
+					{
+						presult->all_visible = false;
+						break;
+					}
+
+					/* Track newest xmin on page. */
+					if (TransactionIdFollows(xmin, presult->frz_conflict_horizon) &&
+						TransactionIdIsNormal(xmin))
+						presult->frz_conflict_horizon = xmin;
+				}
+				break;
+			case HEAPTUPLE_RECENTLY_DEAD:
+				presult->all_visible = false;
+				break;
+			case HEAPTUPLE_INSERT_IN_PROGRESS:
+				presult->all_visible = false;
+				break;
+			case HEAPTUPLE_DELETE_IN_PROGRESS:
+				/* This is an expected case during concurrent vacuum */
+				presult->all_visible = false;
+				break;
+			default:
+				elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+				break;
+		}
 	}
 
+	/*
+	 * For vacuum, if the whole page will become frozen, we consider
+	 * opportunistically freezing tuples. Dead tuples which will be removed by
+	 * the end of vacuuming should not preclude us from opportunistically
+	 * freezing. We will not be able to freeze the whole page if there are
+	 * tuples present which are not visible to everyone or if there are dead
+	 * tuples which are not yet removable. We need all_visible to be false if
+	 * LP_DEAD tuples remain after pruning so that we do not incorrectly
+	 * update the visibility map or page hint bit. So, we will update
+	 * presult->all_visible to reflect the presence of LP_DEAD items while
+	 * pruning and keep all_visible_except_removable to permit freezing if the
+	 * whole page will eventually become all visible after removing tuples.
+	 */
+	presult->all_visible_except_removable = presult->all_visible;
+
 	/* Scan the page */
 	for (offnum = FirstOffsetNumber;
 		 offnum <= maxoff;
@@ -596,10 +690,14 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 			/*
 			 * If the caller set mark_unused_now true, we can set dead line
 			 * pointers LP_UNUSED now. We don't increment ndeleted here since
-			 * the LP was already marked dead.
+			 * the LP was already marked dead. If it will not be marked
+			 * LP_UNUSED, it will remain LP_DEAD, making the page not
+			 * all_visible.
 			 */
 			if (unlikely(prstate->mark_unused_now))
 				heap_prune_record_unused(prstate, offnum);
+			else
+				presult->all_visible = false;
 
 			break;
 		}
@@ -736,7 +834,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 		 * redirect the root to the correct chain member.
 		 */
 		if (i >= nchain)
-			heap_prune_record_dead_or_unused(prstate, rootoffnum);
+			heap_prune_record_dead_or_unused(prstate, rootoffnum, presult);
 		else
 			heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
 	}
@@ -749,7 +847,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 		 * redirect item.  We can clean up by setting the redirect item to
 		 * DEAD state or LP_UNUSED if the caller indicated.
 		 */
-		heap_prune_record_dead_or_unused(prstate, rootoffnum);
+		heap_prune_record_dead_or_unused(prstate, rootoffnum, presult);
 	}
 
 	return ndeleted;
@@ -786,13 +884,20 @@ heap_prune_record_redirect(PruneState *prstate,
 
 /* Record line pointer to be marked dead */
 static void
-heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
+heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
+					   PruneResult *presult)
 {
 	Assert(prstate->ndead < MaxHeapTuplesPerPage);
 	prstate->nowdead[prstate->ndead] = offnum;
 	prstate->ndead++;
 	Assert(!prstate->marked[offnum]);
 	prstate->marked[offnum] = true;
+
+	/*
+	 * Setting the line pointer LP_DEAD means the page will definitely not be
+	 * all_visible.
+	 */
+	presult->all_visible = false;
 }
 
 /*
@@ -802,7 +907,8 @@ heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
  * pointers LP_DEAD if mark_unused_now is true.
  */
 static void
-heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum)
+heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
+								 PruneResult *presult)
 {
 	/*
 	 * If the caller set mark_unused_now to true, we can remove dead tuples
@@ -813,7 +919,7 @@ heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum)
 	if (unlikely(prstate->mark_unused_now))
 		heap_prune_record_unused(prstate, offnum);
 	else
-		heap_prune_record_dead(prstate, offnum);
+		heap_prune_record_dead(prstate, offnum, presult);
 }
 
 /* Record line pointer to be marked unused */
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index d1efd885c88..f9892f4cd08 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -1422,9 +1422,7 @@ lazy_scan_prune(LVRelState *vacrel,
 				recently_dead_tuples;
 	HeapPageFreeze pagefrz;
 	bool		hastup = false;
-	bool		all_visible,
-				all_frozen;
-	TransactionId visibility_cutoff_xid;
+	bool		all_frozen;
 	int64		fpi_before = pgWalUsage.wal_fpi;
 	OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
 	HeapTupleFreeze frozen[MaxHeapTuplesPerPage];
@@ -1465,17 +1463,16 @@ lazy_scan_prune(LVRelState *vacrel,
 					&presult, &vacrel->offnum);
 
 	/*
-	 * We will update the VM after collecting LP_DEAD items and freezing
-	 * tuples. Keep track of whether or not the page is all_visible and
-	 * all_frozen and use this information to update the VM. all_visible
-	 * implies 0 lpdead_items, but don't trust all_frozen result unless
-	 * all_visible is also set to true.
+	 * Now scan the page to collect LP_DEAD items and check for tuples
+	 * requiring freezing among remaining tuples with storage. We will update
+	 * the VM after collecting LP_DEAD items and freezing tuples. Pruning will
+	 * have determined whether or not the page is all_visible. Keep track of
+	 * whether or not the page is all_frozen and use this information to
+	 * update the VM. all_visible implies lpdead_items == 0, but don't trust
+	 * all_frozen result unless all_visible is also set to true.
 	 *
-	 * Also keep track of the visibility cutoff xid for recovery conflicts.
 	 */
-	all_visible = true;
 	all_frozen = true;
-	visibility_cutoff_xid = InvalidTransactionId;
 
 	/*
 	 * Now scan the page to collect LP_DEAD items and update the variables set
@@ -1516,11 +1513,6 @@ lazy_scan_prune(LVRelState *vacrel,
 			 * will only happen every other VACUUM, at most.  Besides, VACUUM
 			 * must treat hastup/nonempty_pages as provisional no matter how
 			 * LP_DEAD items are handled (handled here, or handled later on).
-			 *
-			 * Also deliberately delay unsetting all_visible until just before
-			 * we return to lazy_scan_heap caller, as explained in full below.
-			 * (This is another case where it's useful to anticipate that any
-			 * LP_DEAD items will become LP_UNUSED during the ongoing VACUUM.)
 			 */
 			deadoffsets[lpdead_items++] = offnum;
 			continue;
@@ -1558,41 +1550,6 @@ lazy_scan_prune(LVRelState *vacrel,
 				 * what acquire_sample_rows() does.
 				 */
 				live_tuples++;
-
-				/*
-				 * Is the tuple definitely visible to all transactions?
-				 *
-				 * NB: Like with per-tuple hint bits, we can't set the
-				 * PD_ALL_VISIBLE flag if the inserter committed
-				 * asynchronously. See SetHintBits for more info. Check that
-				 * the tuple is hinted xmin-committed because of that.
-				 */
-				if (all_visible)
-				{
-					TransactionId xmin;
-
-					if (!HeapTupleHeaderXminCommitted(htup))
-					{
-						all_visible = false;
-						break;
-					}
-
-					/*
-					 * The inserter definitely committed. But is it old enough
-					 * that everyone sees it as committed?
-					 */
-					xmin = HeapTupleHeaderGetXmin(htup);
-					if (!GlobalVisTestIsRemovableXid(vacrel->vistest, xmin))
-					{
-						all_visible = false;
-						break;
-					}
-
-					/* Track newest xmin on page. */
-					if (TransactionIdFollows(xmin, visibility_cutoff_xid) &&
-						TransactionIdIsNormal(xmin))
-						visibility_cutoff_xid = xmin;
-				}
 				break;
 			case HEAPTUPLE_RECENTLY_DEAD:
 
@@ -1602,7 +1559,6 @@ lazy_scan_prune(LVRelState *vacrel,
 				 * pruning.)
 				 */
 				recently_dead_tuples++;
-				all_visible = false;
 				break;
 			case HEAPTUPLE_INSERT_IN_PROGRESS:
 
@@ -1613,16 +1569,13 @@ lazy_scan_prune(LVRelState *vacrel,
 				 * results.  This assumption is a bit shaky, but it is what
 				 * acquire_sample_rows() does, so be consistent.
 				 */
-				all_visible = false;
 				break;
 			case HEAPTUPLE_DELETE_IN_PROGRESS:
-				/* This is an expected case during concurrent vacuum */
-				all_visible = false;
 
 				/*
-				 * Count such rows as live.  As above, we assume the deleting
-				 * transaction will commit and update the counters after we
-				 * report.
+				 * This an expected case during concurrent vacuum. Count such
+				 * rows as live.  As above, we assume the deleting transaction
+				 * will commit and update the counters after we report.
 				 */
 				live_tuples++;
 				break;
@@ -1665,7 +1618,7 @@ lazy_scan_prune(LVRelState *vacrel,
 	 * page all-frozen afterwards (might not happen until final heap pass).
 	 */
 	if (pagefrz.freeze_required || tuples_frozen == 0 ||
-		(all_visible && all_frozen &&
+		(presult.all_visible_except_removable && all_frozen &&
 		 fpi_before != pgWalUsage.wal_fpi))
 	{
 		/*
@@ -1698,16 +1651,16 @@ lazy_scan_prune(LVRelState *vacrel,
 			vacrel->frozen_pages++;
 
 			/*
-			 * We can use visibility_cutoff_xid as our cutoff for conflicts
+			 * We can use frz_conflict_horizon as our cutoff for conflicts
 			 * when the whole page is eligible to become all-frozen in the VM
 			 * once we're done with it.  Otherwise we generate a conservative
 			 * cutoff by stepping back from OldestXmin.
 			 */
-			if (all_visible && all_frozen)
+			if (presult.all_visible_except_removable && all_frozen)
 			{
 				/* Using same cutoff when setting VM is now unnecessary */
-				snapshotConflictHorizon = visibility_cutoff_xid;
-				visibility_cutoff_xid = InvalidTransactionId;
+				snapshotConflictHorizon = presult.frz_conflict_horizon;
+				presult.frz_conflict_horizon = InvalidTransactionId;
 			}
 			else
 			{
@@ -1743,17 +1696,19 @@ lazy_scan_prune(LVRelState *vacrel,
 	 */
 #ifdef USE_ASSERT_CHECKING
 	/* Note that all_frozen value does not matter when !all_visible */
-	if (all_visible && lpdead_items == 0)
+	if (presult.all_visible)
 	{
 		TransactionId debug_cutoff;
 		bool		debug_all_frozen;
 
+		Assert(lpdead_items == 0);
+
 		if (!heap_page_is_all_visible(vacrel, buf,
 									  &debug_cutoff, &debug_all_frozen))
 			Assert(false);
 
 		Assert(!TransactionIdIsValid(debug_cutoff) ||
-			   debug_cutoff == visibility_cutoff_xid);
+			   debug_cutoff == presult.frz_conflict_horizon);
 	}
 #endif
 
@@ -1778,19 +1733,6 @@ lazy_scan_prune(LVRelState *vacrel,
 		Assert(dead_items->num_items <= dead_items->max_items);
 		pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
 									 dead_items->num_items);
-
-		/*
-		 * It was convenient to ignore LP_DEAD items in all_visible earlier on
-		 * to make the choice of whether or not to freeze the page unaffected
-		 * by the short-term presence of LP_DEAD items.  These LP_DEAD items
-		 * were effectively assumed to be LP_UNUSED items in the making.  It
-		 * doesn't matter which heap pass (initial pass or final pass) ends up
-		 * setting the page all-frozen, as long as the ongoing VACUUM does it.
-		 *
-		 * Now that freezing has been finalized, unset all_visible.  It needs
-		 * to reflect the present state of things, as expected by our caller.
-		 */
-		all_visible = false;
 	}
 
 	/* Finally, add page-local counts to whole-VACUUM counts */
@@ -1807,20 +1749,20 @@ lazy_scan_prune(LVRelState *vacrel,
 	/* Did we find LP_DEAD items? */
 	*has_lpdead_items = (lpdead_items > 0);
 
-	Assert(!all_visible || !(*has_lpdead_items));
+	Assert(!presult.all_visible || !(*has_lpdead_items));
 
 	/*
 	 * Handle setting visibility map bit based on information from the VM (as
 	 * of last heap_vac_scan_next_block() call), and from all_visible and
 	 * all_frozen variables
 	 */
-	if (!all_visible_according_to_vm && all_visible)
+	if (!all_visible_according_to_vm && presult.all_visible)
 	{
 		uint8		flags = VISIBILITYMAP_ALL_VISIBLE;
 
 		if (all_frozen)
 		{
-			Assert(!TransactionIdIsValid(visibility_cutoff_xid));
+			Assert(!TransactionIdIsValid(presult.frz_conflict_horizon));
 			flags |= VISIBILITYMAP_ALL_FROZEN;
 		}
 
@@ -1840,7 +1782,7 @@ lazy_scan_prune(LVRelState *vacrel,
 		PageSetAllVisible(page);
 		MarkBufferDirty(buf);
 		visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
-						  vmbuffer, visibility_cutoff_xid,
+						  vmbuffer, presult.frz_conflict_horizon,
 						  flags);
 	}
 
@@ -1888,7 +1830,7 @@ lazy_scan_prune(LVRelState *vacrel,
 	 * it as all-frozen.  Note that all_frozen is only valid if all_visible is
 	 * true, so we must check both all_visible and all_frozen.
 	 */
-	else if (all_visible_according_to_vm && all_visible &&
+	else if (all_visible_according_to_vm && presult.all_visible &&
 			 all_frozen && !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
 	{
 		/*
@@ -1905,11 +1847,11 @@ lazy_scan_prune(LVRelState *vacrel,
 		/*
 		 * Set the page all-frozen (and all-visible) in the VM.
 		 *
-		 * We can pass InvalidTransactionId as our visibility_cutoff_xid,
-		 * since a snapshotConflictHorizon sufficient to make everything safe
-		 * for REDO was logged when the page's tuples were frozen.
+		 * We can pass InvalidTransactionId as our frz_conflict_horizon, since
+		 * a snapshotConflictHorizon sufficient to make everything safe for
+		 * REDO was logged when the page's tuples were frozen.
 		 */
-		Assert(!TransactionIdIsValid(visibility_cutoff_xid));
+		Assert(!TransactionIdIsValid(presult.frz_conflict_horizon));
 		visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
 						  vmbuffer, InvalidTransactionId,
 						  VISIBILITYMAP_ALL_VISIBLE |
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 4b133f68593..4cfaf9ea46c 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -198,6 +198,8 @@ typedef struct PruneResult
 {
 	int			ndeleted;		/* Number of tuples deleted from the page */
 	int			nnewlpdead;		/* Number of newly LP_DEAD items */
+	bool		all_visible;	/* Whether or not the page is all visible */
+	TransactionId frz_conflict_horizon; /* Newest xmin on the page */
 
 	/*
 	 * Tuple visibility is only computed once for each tuple, for correctness
@@ -209,6 +211,7 @@ typedef struct PruneResult
 	 * 1. Otherwise every access would need to subtract 1.
 	 */
 	int8		htsv[MaxHeapTuplesPerPage + 1];
+	bool		all_visible_except_removable;
 } PruneResult;
 
 /*
-- 
2.40.1

