From 1a621864420cd17db1bef0546990d8298e3a3d34 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 6 Sep 2023 16:54:41 -0400
Subject: [PATCH v4 3/3] Reuse heap_page_prune() tuple visibility statuses

heap_page_prune() obtains the HTSV_Result (tuple visibility status)
returned from HeapTupleSatisfiesVacuum() for every tuple on the page and
stores them in an array. By making this array available to
heap_page_prune()'s caller lazy_scan_prune(), we can avoid an additional
call to HeapTupleSatisfiesVacuum() when freezing the tuples and
recording LP_DEAD items for vacuum. This saves resources and eliminates
the possibility that vacuuming corrupted data results in a hang due to
endless retry looping.

This replaces the retry mechanism introduced in 8523492d4 to handle cases in
which a tuple's inserting transaction aborted between the visibility check in
heap_page_prune() and lazy_scan_prune()'s call to HeapTupleSatisfiesVacuum() --
rendering it dead but without a dead line pointer. We can instead reuse the
tuple's original visibility status, circumventing any disagreements.

Discussion: https://postgr.es/m/CAAKRu_br124qsGJieuYA0nGjywEukhK1dKBfRdby_4yY3E9SXA%40mail.gmail.com
---
 src/backend/access/heap/pruneheap.c  | 37 ++++++++++++------------
 src/backend/access/heap/vacuumlazy.c | 42 ++++++++--------------------
 src/include/access/heapam.h          | 11 ++++++++
 3 files changed, 41 insertions(+), 49 deletions(-)

diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 5ac286e152..256039c6be 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -53,16 +53,6 @@ typedef struct
 	 * 1. Otherwise every access would need to subtract 1.
 	 */
 	bool		marked[MaxHeapTuplesPerPage + 1];
-
-	/*
-	 * Tuple visibility is only computed once for each tuple, for correctness
-	 * and efficiency reasons; see comment in heap_page_prune() for details.
-	 * This is of type int8[], instead of HTSV_Result[], so we can use -1 to
-	 * indicate no visibility has been computed, e.g. for LP_DEAD items.
-	 *
-	 * Same indexing as ->marked.
-	 */
-	int8		htsv[MaxHeapTuplesPerPage + 1];
 } PruneState;
 
 /* Local functions */
@@ -71,6 +61,7 @@ static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
 											   Buffer buffer);
 static int	heap_prune_chain(Buffer buffer,
 							 OffsetNumber rootoffnum,
+							 int8 *htsv,
 							 PruneState *prstate);
 static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
 static void heap_prune_record_redirect(PruneState *prstate,
@@ -243,6 +234,10 @@ heap_page_prune(Relation relation, Buffer buffer,
 	prstate.nredirected = prstate.ndead = prstate.nunused = 0;
 	memset(prstate.marked, 0, sizeof(prstate.marked));
 
+	/*
+	 * presult->htsv is not initialized here because all ntuple spots in the
+	 * array will be set either to a valid HTSV_Result value or -1.
+	 */
 	presult->ndeleted = 0;
 	presult->nnewlpdead = 0;
 
@@ -279,7 +274,7 @@ heap_page_prune(Relation relation, Buffer buffer,
 		/* Nothing to do if slot doesn't contain a tuple */
 		if (!ItemIdIsNormal(itemid))
 		{
-			prstate.htsv[offnum] = -1;
+			presult->htsv[offnum] = -1;
 			continue;
 		}
 
@@ -295,8 +290,8 @@ heap_page_prune(Relation relation, Buffer buffer,
 		if (off_loc)
 			*off_loc = offnum;
 
-		prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
-														   buffer);
+		presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
+															buffer);
 	}
 
 	/* Scan the page */
@@ -320,7 +315,8 @@ heap_page_prune(Relation relation, Buffer buffer,
 			continue;
 
 		/* Process this item or chain of items */
-		presult->ndeleted += heap_prune_chain(buffer, offnum, &prstate);
+		presult->ndeleted += heap_prune_chain(buffer, offnum,
+											  presult->htsv, &prstate);
 	}
 
 	/* Clear the offset information once we have processed the given page. */
@@ -449,6 +445,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
 /*
  * Prune specified line pointer or a HOT chain originating at line pointer.
  *
+ * Tuple visibility information is provided in htsv.
+ *
  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
  * chain.  We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
@@ -476,7 +474,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
  * Returns the number of tuples (to be) deleted from the page.
  */
 static int
-heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
+heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
+				 int8 *htsv, PruneState *prstate)
 {
 	int			ndeleted = 0;
 	Page		dp = (Page) BufferGetPage(buffer);
@@ -497,7 +496,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
 	 */
 	if (ItemIdIsNormal(rootlp))
 	{
-		Assert(prstate->htsv[rootoffnum] != -1);
+		Assert(htsv[rootoffnum] != -1);
 		htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
 
 		if (HeapTupleHeaderIsHeapOnly(htup))
@@ -520,7 +519,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
 			 * either here or while following a chain below.  Whichever path
 			 * gets there first will mark the tuple unused.
 			 */
-			if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
+			if (htsv[rootoffnum] == HEAPTUPLE_DEAD &&
 				!HeapTupleHeaderIsHotUpdated(htup))
 			{
 				heap_prune_record_unused(prstate, rootoffnum);
@@ -588,7 +587,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
 			break;
 
 		Assert(ItemIdIsNormal(lp));
-		Assert(prstate->htsv[offnum] != -1);
+		Assert(htsv[offnum] != -1);
 		htup = (HeapTupleHeader) PageGetItem(dp, lp);
 
 		/*
@@ -608,7 +607,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
 		 */
 		tupdead = recent_dead = false;
 
-		switch ((HTSV_Result) prstate->htsv[offnum])
+		switch ((HTSV_Result) htsv[offnum])
 		{
 			case HEAPTUPLE_DEAD:
 				tupdead = true;
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 7ead9cfe9d..6c328f3ed5 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -1524,12 +1524,12 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
  * of complexity just so we could deal with tuples that were DEAD to VACUUM,
  * but nevertheless were left with storage after pruning.
  *
- * The approach we take now is to restart pruning when the race condition is
- * detected.  This allows heap_page_prune() to prune the tuples inserted by
- * the now-aborted transaction.  This is a little crude, but it guarantees
- * that any items that make it into the dead_items array are simple LP_DEAD
- * line pointers, and that every remaining item with tuple storage is
- * considered as a candidate for freezing.
+ * As of Postgres 17, we circumvent this problem altogether by reusing the
+ * result of heap_page_prune()'s visibility check. Without the second call to
+ * HeapTupleSatisfiesVacuum(), there is no new HTSV_Result and there can be no
+ * disagreement. The tuple's TID won't be added to the array of dead tuple TIDs
+ * for vacuum, thus vacuum will never be tasked with reaping a tuple with
+ * storage.
  */
 static void
 lazy_scan_prune(LVRelState *vacrel,
@@ -1542,8 +1542,6 @@ lazy_scan_prune(LVRelState *vacrel,
 	OffsetNumber offnum,
 				maxoff;
 	ItemId		itemid;
-	HeapTupleData tuple;
-	HTSV_Result res;
 	PruneResult presult;
 	int			tuples_frozen,
 				lpdead_items,
@@ -1563,8 +1561,6 @@ lazy_scan_prune(LVRelState *vacrel,
 	 */
 	maxoff = PageGetMaxOffsetNumber(page);
 
-retry:
-
 	/* Initialize (or reset) page-level state */
 	pagefrz.freeze_required = false;
 	pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
@@ -1604,6 +1600,7 @@ retry:
 		 offnum <= maxoff;
 		 offnum = OffsetNumberNext(offnum))
 	{
+		HeapTupleHeader htup;
 		bool		totally_frozen;
 
 		/*
@@ -1646,22 +1643,7 @@ retry:
 
 		Assert(ItemIdIsNormal(itemid));
 
-		ItemPointerSet(&(tuple.t_self), blkno, offnum);
-		tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
-		tuple.t_len = ItemIdGetLength(itemid);
-		tuple.t_tableOid = RelationGetRelid(rel);
-
-		/*
-		 * DEAD tuples are almost always pruned into LP_DEAD line pointers by
-		 * heap_page_prune(), but it's possible that the tuple state changed
-		 * since heap_page_prune() looked.  Handle that here by restarting.
-		 * (See comments at the top of function for a full explanation.)
-		 */
-		res = HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
-									   buf);
-
-		if (unlikely(res == HEAPTUPLE_DEAD))
-			goto retry;
+		htup = (HeapTupleHeader) PageGetItem(page, itemid);
 
 		/*
 		 * The criteria for counting a tuple as live in this block need to
@@ -1682,7 +1664,7 @@ retry:
 		 * (Cases where we bypass index vacuuming will violate this optimistic
 		 * assumption, but the overall impact of that should be negligible.)
 		 */
-		switch (res)
+		switch ((HTSV_Result) presult.htsv[offnum])
 		{
 			case HEAPTUPLE_LIVE:
 
@@ -1704,7 +1686,7 @@ retry:
 				{
 					TransactionId xmin;
 
-					if (!HeapTupleHeaderXminCommitted(tuple.t_data))
+					if (!HeapTupleHeaderXminCommitted(htup))
 					{
 						prunestate->all_visible = false;
 						break;
@@ -1714,7 +1696,7 @@ retry:
 					 * The inserter definitely committed. But is it old enough
 					 * that everyone sees it as committed?
 					 */
-					xmin = HeapTupleHeaderGetXmin(tuple.t_data);
+					xmin = HeapTupleHeaderGetXmin(htup);
 					if (!TransactionIdPrecedes(xmin,
 											   vacrel->cutoffs.OldestXmin))
 					{
@@ -1768,7 +1750,7 @@ retry:
 		prunestate->hastup = true;	/* page makes rel truncation unsafe */
 
 		/* Tuple with storage -- consider need to freeze */
-		if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, &pagefrz,
+		if (heap_prepare_freeze_tuple(htup, &vacrel->cutoffs, &pagefrz,
 									  &frozen[tuples_frozen], &totally_frozen))
 		{
 			/* Save prepared freeze plan for later */
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 2d3f149e4f..864ae561c4 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -198,6 +198,17 @@ typedef struct PruneResult
 {
 	int			ndeleted;		/* Number of tuples deleted from the page */
 	int			nnewlpdead;		/* Number of newly LP_DEAD items */
+
+	/*
+	 * Tuple visibility is only computed once for each tuple, for correctness
+	 * and efficiency reasons; see comment in heap_page_prune() for details.
+	 * This is of type int8[], instead of HTSV_Result[], so we can use -1 to
+	 * indicate no visibility has been computed, e.g. for LP_DEAD items.
+	 *
+	 * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
+	 * 1. Otherwise every access would need to subtract 1.
+	 */
+	int8		htsv[MaxHeapTuplesPerPage + 1];
 } PruneResult;
 
 /* ----------------
-- 
2.37.2

