From 1564aebed44f891b86d93b0edacb08deaf8937c6 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Fri, 29 Mar 2024 16:05:41 -0400
Subject: [PATCH v10 04/10] Handle non-chain tuples outside of
 heap_prune_chain()

Dead branches of aborted HOT chains or leftover LP_DEAD and LP_REDIRECT
line pointers can be handled outside of heap_prune_chain(). This
simplifies the logic in heap_prune_chain() as well as allowing us to
clean up more RECENTLY_DEAD -> DEAD chains.

To accomplish this efficiently, partition tuples into HOT and non-HOT
while first collecting visibility information for each tuple in
heap_page_prune(). Then call heap_prune_chain() only on potential chain
members. Then mop up the leftover HOT tuples afterwards.
---
 src/backend/access/heap/pruneheap.c | 270 ++++++++++++++++------------
 1 file changed, 154 insertions(+), 116 deletions(-)

diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 6188c5b2f3..9e6cfbf9f9 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -44,6 +44,18 @@ typedef struct
 	OffsetNumber nowdead[MaxHeapTuplesPerPage];
 	OffsetNumber nowunused[MaxHeapTuplesPerPage];
 
+	/*
+	 * Chain candidates contains indexes of all LP_NORMAL and LP_REDIRECT
+	 * items. The first partition are the indexes of the LP_NORMAL and
+	 * LP_REDIRECT items we know to be part of a chain. The second partition
+	 * are the indexes of HOT tuples that may or may not be part of a HOT
+	 * chain. Those which are part of a HOT chain will be visited and marked
+	 * by heap_prune_chain() and the others will be processed afterward.
+	 */
+	int			nchain_members;
+	int			nchain_candidates;
+	OffsetNumber chain_candidates[MaxHeapTuplesPerPage];
+
 	/*
 	 * marked[i] is true if item i is entered in one of the above arrays.
 	 *
@@ -250,6 +262,8 @@ heap_page_prune(Relation relation, Buffer buffer,
 	prstate.nredirected = prstate.ndead = prstate.nunused = 0;
 	memset(prstate.marked, 0, sizeof(prstate.marked));
 	prstate.ndeleted = 0;
+	prstate.nchain_members = 0;
+	prstate.nchain_candidates = 0;
 
 	/*
 	 * presult->htsv is not initialized here because all ntuple spots in the
@@ -288,17 +302,11 @@ heap_page_prune(Relation relation, Buffer buffer,
 		ItemId		itemid = PageGetItemId(page, offnum);
 		HeapTupleHeader htup;
 
+		presult->htsv[offnum] = -1;
+
 		/* Nothing to do if slot doesn't contain a tuple */
-		if (!ItemIdIsNormal(itemid))
-		{
-			presult->htsv[offnum] = -1;
+		if (!ItemIdIsUsed(itemid))
 			continue;
-		}
-
-		htup = (HeapTupleHeader) PageGetItem(page, itemid);
-		tup.t_data = htup;
-		tup.t_len = ItemIdGetLength(itemid);
-		ItemPointerSet(&(tup.t_self), blockno, offnum);
 
 		/*
 		 * Set the offset number so that we can display it along with any
@@ -307,18 +315,66 @@ heap_page_prune(Relation relation, Buffer buffer,
 		if (off_loc)
 			*off_loc = offnum;
 
+		if (ItemIdIsDead(itemid))
+		{
+			/*
+			 * If the caller set mark_unused_now true, we can set dead line
+			 * pointers LP_UNUSED now.
+			 */
+			if (unlikely(prstate.mark_unused_now))
+				heap_prune_record_unused(&prstate, offnum, false);
+			else
+				heap_prune_record_unchanged_lp_dead(&prstate, offnum);
+
+			continue;
+		}
+
+		if (ItemIdIsRedirected(itemid))
+		{
+			/* This is a chain member, so it goes in partition 1 */
+			OffsetNumber swap = prstate.chain_candidates[prstate.nchain_members];
+
+			prstate.chain_candidates[prstate.nchain_candidates++] = swap;
+			prstate.chain_candidates[prstate.nchain_members++] = offnum;
+
+			continue;
+		}
+
+		Assert(ItemIdIsNormal(itemid));
+
+		/*
+		 * Given that we have an LP_NORMAL item, let's get its visibility
+		 * status and then examine the tuple to decide whether to put it in
+		 * partition 1 or 2 of chain_candidates.
+		 */
+		htup = (HeapTupleHeader) PageGetItem(page, itemid);
+		tup.t_data = htup;
+		tup.t_len = ItemIdGetLength(itemid);
+		ItemPointerSet(&tup.t_self, blockno, offnum);
+
 		presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
 															buffer);
+
+		if (!HeapTupleHeaderIsHeapOnly(htup))
+		{
+			/* All non-HOT tuples go in partition 1 */
+			OffsetNumber swap = prstate.chain_candidates[prstate.nchain_members];
+
+			prstate.chain_candidates[prstate.nchain_candidates++] = swap;
+			prstate.chain_candidates[prstate.nchain_members++] = offnum;
+
+			continue;
+		}
+
+		/* All LP_NORMAL HOT tuples go in partition 2 */
+		prstate.chain_candidates[prstate.nchain_candidates++] = offnum;
 	}
 
-	/* Scan the page */
-	for (offnum = FirstOffsetNumber;
-		 offnum <= maxoff;
-		 offnum = OffsetNumberNext(offnum))
+	/* Process HOT chains */
+	for (int i = 0; i < prstate.nchain_members; i++)
 	{
-		ItemId		itemid;
+		offnum = prstate.chain_candidates[i];
 
-		/* Ignore items already processed as part of an earlier chain */
 		if (prstate.marked[offnum])
 			continue;
 
@@ -326,23 +382,78 @@ heap_page_prune(Relation relation, Buffer buffer,
 		if (off_loc)
 			*off_loc = offnum;
 
-		/* Nothing to do if slot is empty */
-		itemid = PageGetItemId(page, offnum);
-		if (!ItemIdIsUsed(itemid))
-			continue;
-
 		/* Process this item or chain of items */
 		heap_prune_chain(buffer, offnum, presult->htsv, &prstate);
 	}
 
-	for (offnum = FirstOffsetNumber;
-		 offnum <= maxoff;
-		 offnum = OffsetNumberNext(offnum))
+	/*
+	 * Check all HOT tuples to see if they have already been marked by
+	 * heap_prune_chain() or if they still need to be processed. They will
+	 * either be marked for removal or marked as unchanged.
+	 */
+	for (int i = prstate.nchain_members; i < prstate.nchain_candidates; i++)
 	{
-		ItemId		itemid = PageGetItemId(page, offnum);
+		offnum = prstate.chain_candidates[i];
 
-		if (ItemIdIsUsed(itemid) && !prstate.marked[offnum])
-			heap_prune_record_unchanged(page, &prstate, offnum);
+		if (prstate.marked[offnum])
+			continue;
+
+		/* see preceding loop */
+		if (off_loc)
+			*off_loc = offnum;
+
+		if (presult->htsv[offnum] == HEAPTUPLE_DEAD)
+		{
+			ItemId		itemid = PageGetItemId(page, offnum);
+			HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
+
+			/*
+			 * If the tuple is DEAD and doesn't chain to anything else, mark
+			 * it unused immediately.  (If it does chain, we can only remove
+			 * it as part of pruning its chain.)
+			 *
+			 * We need this primarily to handle aborted HOT updates, that is,
+			 * XMIN_INVALID heap-only tuples.  Those might not be linked to by
+			 * any chain, since the parent tuple might be re-updated before
+			 * any pruning occurs.  So we have to be able to reap them
+			 * separately from chain-pruning.  (Note that
+			 * HeapTupleHeaderIsHotUpdated will never return true for an
+			 * XMIN_INVALID tuple, so this code will work even when there were
+			 * sequential updates within the aborted transaction.)
+			 *
+			 * Note that we might first arrive at a dead heap-only tuple
+			 * either above while following a chain or here.  Whichever path
+			 * gets there first will mark the tuple unused.
+			 *
+			 * Whether we arrive at the dead HOT tuple first here or while
+			 * following a chain above affects whether preceding RECENTLY_DEAD
+			 * tuples in the chain can be removed or not.  Imagine that you
+			 * have a chain with two tuples: RECENTLY_DEAD -> DEAD.  If we
+			 * reach the RECENTLY_DEAD tuple first, the chain-following logic
+			 * will find the DEAD tuple and conclude that both tuples are in
+			 * fact dead and can be removed.  But if we reach the DEAD tuple
+			 * at the end of the chain first, when we reach the RECENTLY_DEAD
+			 * tuple later, we will not follow the chain because the DEAD
+			 * TUPLE is already 'marked', and will not remove the
+			 * RECENTLY_DEAD tuple.  This is not a correctness issue, and the
+			 * RECENTLY_DEAD tuple will be removed by a later VACUUM.
+			 */
+			if (!HeapTupleHeaderIsHotUpdated(htup))
+			{
+				HeapTupleHeaderAdvanceConflictHorizon(htup,
+													  &prstate.snapshotConflictHorizon);
+				heap_prune_record_unused(&prstate, offnum, true);
+				continue;
+			}
+		}
+
+		/*
+		 * HOT tuple is not DEAD or has been HOT-updated. If it is a DEAD,
+		 * HOT-updated member of a chain, it should have been already been
+		 * marked by heap_prune_chain() and heap_prune_record_unchanged() will
+		 * return immediately.
+		 */
+		heap_prune_record_unchanged(page, &prstate, offnum);
 	}
 
 /* We should now have processed every tuple exactly once  */
@@ -351,12 +462,10 @@ heap_page_prune(Relation relation, Buffer buffer,
 		 offnum <= maxoff;
 		 offnum = OffsetNumberNext(offnum))
 	{
-		ItemId		itemid;
-
 		if (off_loc)
 			*off_loc = offnum;
-		itemid = PageGetItemId(page, offnum);
-		if (ItemIdIsUsed(itemid))
+
+		if (ItemIdIsUsed(PageGetItemId(page, offnum)))
 			Assert(prstate.marked[offnum]);
 		else
 			Assert(!prstate.marked[offnum]);
@@ -490,11 +599,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 				 int8 *htsv, PruneState *prstate)
 {
 	Page		page = (Page) BufferGetPage(buffer);
-	TransactionId priorXmax = InvalidTransactionId;
-	ItemId		rootlp;
-	HeapTupleHeader htup;
-	OffsetNumber maxoff = PageGetMaxOffsetNumber(page),
-				offnum;
+	ItemId		rootlp = PageGetItemId(page, rootoffnum);
 	OffsetNumber chainitems[MaxHeapTuplesPerPage];
 
 	/*
@@ -504,67 +609,14 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 	int			ndeadchain = 0,
 				nchain = 0;
 
-	rootlp = PageGetItemId(page, rootoffnum);
-
-	/*
-	 * If it's a heap-only tuple, then it is not the start of a HOT chain.
-	 */
-	if (ItemIdIsNormal(rootlp))
-	{
-		Assert(htsv[rootoffnum] != -1);
-		htup = (HeapTupleHeader) PageGetItem(page, rootlp);
-
-		if (HeapTupleHeaderIsHeapOnly(htup))
-		{
-			/*
-			 * If the tuple is DEAD and doesn't chain to anything else, mark
-			 * it unused immediately.  (If it does chain, we can only remove
-			 * it as part of pruning its chain.)
-			 *
-			 * We need this primarily to handle aborted HOT updates, that is,
-			 * XMIN_INVALID heap-only tuples.  Those might not be linked to by
-			 * any chain, since the parent tuple might be re-updated before
-			 * any pruning occurs.  So we have to be able to reap them
-			 * separately from chain-pruning.  (Note that
-			 * HeapTupleHeaderIsHotUpdated will never return true for an
-			 * XMIN_INVALID tuple, so this code will work even when there were
-			 * sequential updates within the aborted transaction.)
-			 *
-			 * Note that we might first arrive at a dead heap-only tuple
-			 * either here or while following a chain below.  Whichever path
-			 * gets there first will mark the tuple unused.
-			 *
-			 * Whether we arrive at the dead HOT tuple first here or while
-			 * following a chain below affects whether preceding RECENTLY_DEAD
-			 * tuples in the chain can be removed or not.  Imagine that you
-			 * have a chain with two tuples: RECENTLY_DEAD -> DEAD.  If we
-			 * reach the RECENTLY_DEAD tuple first, the chain-following logic
-			 * will find the DEAD tuple and conclude that both tuples are in
-			 * fact dead and can be removed.  But if we reach the DEAD tuple
-			 * at the end of the chain first, when we reach the RECENTLY_DEAD
-			 * tuple later, we will not follow the chain because the DEAD
-			 * TUPLE is already 'marked', and will not remove the
-			 * RECENTLY_DEAD tuple.  This is not a correctness issue, and the
-			 * RECENTLY_DEAD tuple will be removed by a later VACUUM.
-			 */
-			if (htsv[rootoffnum] == HEAPTUPLE_DEAD &&
-				!HeapTupleHeaderIsHotUpdated(htup))
-			{
-				heap_prune_record_unused(prstate, rootoffnum, true);
-				HeapTupleHeaderAdvanceConflictHorizon(htup,
-													  &prstate->snapshotConflictHorizon);
-			}
-
-			return;
-		}
-	}
+	OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+	TransactionId priorXmax = InvalidTransactionId;
 
 	/* Start from the root tuple */
-	offnum = rootoffnum;
-
 	/* while not end of the chain */
-	for (;;)
+	for (OffsetNumber offnum = rootoffnum;;)
 	{
+		HeapTupleHeader htup;
 		ItemId		lp;
 
 		/* Sanity check (pure paranoia) */
@@ -584,8 +636,13 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 
 		lp = PageGetItemId(page, offnum);
 
-		/* Unused item obviously isn't part of the chain */
-		if (!ItemIdIsUsed(lp))
+		/*
+		 * Unused item obviously isn't part of the chain. Likewise, a dead
+		 * line pointer can't be part of the chain. (We already eliminated the
+		 * case of dead root tuple outside this function.). MFIXME should dead
+		 * item check be an assert?
+		 */
+		if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
 			break;
 
 		/*
@@ -602,27 +659,8 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 			continue;
 		}
 
-		/*
-		 * Likewise, a dead line pointer can't be part of the chain. (We
-		 * already eliminated the case of dead root tuple outside this
-		 * function.)
-		 */
-		if (ItemIdIsDead(lp))
-		{
-			/*
-			 * If the caller set mark_unused_now true, we can set dead line
-			 * pointers LP_UNUSED now. We don't increment ndeleted here since
-			 * the LP was already marked dead.
-			 */
-			if (unlikely(prstate->mark_unused_now))
-				heap_prune_record_unused(prstate, offnum, false);
-			else
-				heap_prune_record_unchanged_lp_dead(prstate, offnum);
-
-			break;
-		}
-
 		Assert(ItemIdIsNormal(lp));
+
 		htup = (HeapTupleHeader) PageGetItem(page, lp);
 
 		/*
@@ -748,7 +786,7 @@ process_chains:
 
 		/* the rest of tuples in the chain are normal, unchanged tuples */
 		for (; i < nchain; i++)
-			heap_prune_record_unchanged(dp, prstate, chainitems[i]);
+			heap_prune_record_unchanged(page, prstate, chainitems[i]);
 	}
 	else if (ndeadchain == nchain)
 	{
@@ -780,7 +818,7 @@ process_chains:
 
 		/* the rest of tuples in the chain are normal, unchanged tuples */
 		for (int i = ndeadchain; i < nchain; i++)
-			heap_prune_record_unchanged(dp, prstate, chainitems[i]);
+			heap_prune_record_unchanged(page, prstate, chainitems[i]);
 	}
 }
 
-- 
2.40.1

