From f9356564f651fa9f91e99adbbac50914601e3551 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Mon, 22 Jan 2024 17:25:56 -0500
Subject: [PATCH v4 16/19] Count tuples for vacuum logging in heap_page_prune

lazy_scan_prune() loops through all of the tuple visibility information
that was recorded in heap_page_prune() and then counts live and recently
dead tuples. That information is available in heap_page_prune(), so just
record it there. Add live and recently dead tuple counters to the
PruneResult. Doing this counting in heap_page_prune() eliminates the
need for saving the tuple visibility status information in the
PruneResult. Instead, save it in the PruneState where it can be
referenced by heap_prune_chain().
---
 src/backend/access/heap/pruneheap.c  | 99 ++++++++++++++++++++++++----
 src/backend/access/heap/vacuumlazy.c | 93 +-------------------------
 src/include/access/heapam.h          | 29 +-------
 3 files changed, 93 insertions(+), 128 deletions(-)

diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index d183912a402..f59b03222b0 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -55,6 +55,18 @@ typedef struct
 	 */
 	bool		marked[MaxHeapTuplesPerPage + 1];
 
+	/*
+	 * Tuple visibility is only computed once for each tuple, for correctness
+	 * and efficiency reasons; see comment in heap_page_prune_and_freeze() for
+	 * details. This is of type int8[], instead of HTSV_Result[], so we can
+	 * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD
+	 * items.
+	 *
+	 * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
+	 * 1. Otherwise every access would need to subtract 1.
+	 */
+	int8		htsv[MaxHeapTuplesPerPage + 1];
+
 	/*
 	 * One entry for every tuple that we may freeze.
 	 */
@@ -69,6 +81,7 @@ static int	heap_prune_chain(Buffer buffer,
 							 OffsetNumber rootoffnum,
 							 PruneState *prstate, PruneFreezeResult *presult);
 
+static inline HTSV_Result htsv_get_valid_status(int status);
 static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
 static void heap_prune_record_redirect(PruneState *prstate,
 									   OffsetNumber offnum, OffsetNumber rdoffnum,
@@ -269,7 +282,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 	prstate.nredirected = prstate.ndead = prstate.nunused = 0;
 
 	/*
-	 * presult->htsv is not initialized here because all ntuple spots in the
+	 * prstate.htsv is not initialized here because all ntuple spots in the
 	 * array will be set either to a valid HTSV_Result value or -1.
 	 */
 	memset(prstate.marked, 0, sizeof(prstate.marked));
@@ -280,6 +293,9 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 
 	presult->hastup = false;
 
+	presult->live_tuples = 0;
+	presult->recently_dead_tuples = 0;
+
 	/*
 	 * Caller will update the VM after pruning, collecting LP_DEAD items, and
 	 * freezing tuples. Keep track of whether or not the page is all_visible
@@ -329,7 +345,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 		/* Nothing to do if slot doesn't contain a tuple */
 		if (!ItemIdIsNormal(itemid))
 		{
-			presult->htsv[offnum] = -1;
+			prstate.htsv[offnum] = -1;
 			continue;
 		}
 
@@ -345,9 +361,29 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 		if (off_loc)
 			*off_loc = offnum;
 
-		presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
-															buffer);
-		switch (presult->htsv[offnum])
+		prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
+														   buffer);
+
+		/*
+		 * The criteria for counting a tuple as live in this block need to
+		 * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
+		 * and ANALYZE may produce wildly different reltuples values, e.g.
+		 * when there are many recently-dead tuples.
+		 *
+		 * The logic here is a bit simpler than acquire_sample_rows(), as
+		 * VACUUM can't run inside a transaction block, which makes some cases
+		 * impossible (e.g. in-progress insert from the same transaction).
+		 *
+		 * We treat LP_DEAD items (which are the closest thing to DEAD tuples
+		 * that might be seen here) differently, too: we assume that they'll
+		 * become LP_UNUSED before VACUUM finishes.  This difference is only
+		 * superficial.  VACUUM effectively agrees with ANALYZE about DEAD
+		 * items, in the end.  VACUUM won't remember LP_DEAD items, but only
+		 * because they're not supposed to be left behind when it is done.
+		 * (Cases where we bypass index vacuuming will violate this optimistic
+		 * assumption, but the overall impact of that should be negligible.)
+		 */
+		switch (prstate.htsv[offnum])
 		{
 			case HEAPTUPLE_DEAD:
 
@@ -367,6 +403,12 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 				break;
 			case HEAPTUPLE_LIVE:
 
+				/*
+				 * Count it as live.  Not only is this natural, but it's also
+				 * what acquire_sample_rows() does.
+				 */
+				presult->live_tuples++;
+
 				/*
 				 * Is the tuple definitely visible to all transactions?
 				 *
@@ -408,13 +450,34 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 				}
 				break;
 			case HEAPTUPLE_RECENTLY_DEAD:
+
+				/*
+				 * If tuple is recently dead then we must not remove it from
+				 * the relation.  (We only remove items that are LP_DEAD from
+				 * pruning.)
+				 */
+				presult->recently_dead_tuples++;
 				presult->all_visible = false;
 				break;
 			case HEAPTUPLE_INSERT_IN_PROGRESS:
+
+				/*
+				 * We do not count these rows as live, because we expect the
+				 * inserting transaction to update the counters at commit, and
+				 * we assume that will happen only after we report our
+				 * results.  This assumption is a bit shaky, but it is what
+				 * acquire_sample_rows() does, so be consistent.
+				 */
 				presult->all_visible = false;
 				break;
 			case HEAPTUPLE_DELETE_IN_PROGRESS:
-				/* This is an expected case during concurrent vacuum */
+
+				/*
+				 * This an expected case during concurrent vacuum. Count such
+				 * rows as live.  As above, we assume the deleting transaction
+				 * will commit and update the counters after we report.
+				 */
+				presult->live_tuples++;
 				presult->all_visible = false;
 				break;
 			default:
@@ -422,7 +485,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 				break;
 		}
 
-		if (presult->htsv[offnum] != HEAPTUPLE_DEAD)
+		if (prstate.htsv[offnum] != HEAPTUPLE_DEAD)
 		{
 			/*
 			 * Deliberately don't set hastup for LP_DEAD items.  We make the
@@ -772,10 +835,24 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
 }
 
 
+/*
+ * Pruning calculates tuple visibility once and saves the results in an array
+ * of int8. See PruneState.htsv for details. This helper function is meant to
+ * guard against examining visibility status array members which have not yet
+ * been computed.
+ */
+static inline HTSV_Result
+htsv_get_valid_status(int status)
+{
+	Assert(status >= HEAPTUPLE_DEAD &&
+		   status <= HEAPTUPLE_DELETE_IN_PROGRESS);
+	return (HTSV_Result) status;
+}
+
 /*
  * Prune specified line pointer or a HOT chain originating at line pointer.
  *
- * Tuple visibility information is provided in presult->htsv.
+ * Tuple visibility information is provided in prstate->htsv.
  *
  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
@@ -826,7 +903,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 	 */
 	if (ItemIdIsNormal(rootlp))
 	{
-		Assert(presult->htsv[rootoffnum] != -1);
+		Assert(prstate->htsv[rootoffnum] != -1);
 		htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
 
 		if (HeapTupleHeaderIsHeapOnly(htup))
@@ -849,7 +926,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 			 * either here or while following a chain below.  Whichever path
 			 * gets there first will mark the tuple unused.
 			 */
-			if (presult->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
+			if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
 				!HeapTupleHeaderIsHotUpdated(htup))
 			{
 				heap_prune_record_unused(prstate, rootoffnum);
@@ -950,7 +1027,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 		 */
 		tupdead = recent_dead = false;
 
-		switch (htsv_get_valid_status(presult->htsv[offnum]))
+		switch (htsv_get_valid_status(prstate->htsv[offnum]))
 		{
 			case HEAPTUPLE_DEAD:
 				tupdead = true;
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 370721a619a..a3c971cd26d 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -1378,22 +1378,6 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
  *
  * Caller must hold pin and buffer cleanup lock on the buffer.
  *
- * Prior to PostgreSQL 14 there were very rare cases where
- * heap_page_prune_and_freeze() was allowed to disagree with our
- * HeapTupleSatisfiesVacuum() call about whether or not a tuple should be
- * considered DEAD.  This happened when an inserting transaction concurrently
- * aborted (after our heap_page_prune_and_freeze() call, before our
- * HeapTupleSatisfiesVacuum() call).  There was rather a lot of complexity just
- * so we could deal with tuples that were DEAD to VACUUM, but nevertheless were
- * left with storage after pruning.
- *
- * As of Postgres 17, we circumvent this problem altogether by reusing the
- * result of heap_page_prune_and_freeze()'s visibility check. Without the
- * second call to HeapTupleSatisfiesVacuum(), there is no new HTSV_Result and
- * there can be no disagreement. We'll just handle such tuples as if they had
- * become fully dead right after this operation completes instead of in the
- * middle of it.
- *
  * vmbuffer is the buffer containing the VM block with visibility information
  * for the heap block, blkno. all_visible_according_to_vm is the saved
  * visibility status of the heap block looked up earlier by the caller. We
@@ -1415,10 +1399,8 @@ lazy_scan_prune(LVRelState *vacrel,
 	OffsetNumber offnum,
 				maxoff;
 	ItemId		itemid;
+	int			lpdead_items = 0;
 	PruneFreezeResult presult;
-	int			lpdead_items,
-				live_tuples,
-				recently_dead_tuples;
 	HeapPageFreeze pagefrz;
 	OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
 
@@ -1438,9 +1420,6 @@ lazy_scan_prune(LVRelState *vacrel,
 	pagefrz.NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
 	pagefrz.NoFreezePageRelminMxid = vacrel->NewRelminMxid;
 	pagefrz.cutoffs = &vacrel->cutoffs;
-	lpdead_items = 0;
-	live_tuples = 0;
-	recently_dead_tuples = 0;
 
 	/*
 	 * Prune all HOT-update chains and potentially freeze tuples on this page.
@@ -1476,9 +1455,6 @@ lazy_scan_prune(LVRelState *vacrel,
 		vacrel->offnum = offnum;
 		itemid = PageGetItemId(page, offnum);
 
-		/* Redirect items mustn't be touched */
-		if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid))
-			continue;
 
 		if (ItemIdIsDead(itemid))
 		{
@@ -1486,69 +1462,6 @@ lazy_scan_prune(LVRelState *vacrel,
 			continue;
 		}
 
-		Assert(ItemIdIsNormal(itemid));
-
-		/*
-		 * The criteria for counting a tuple as live in this block need to
-		 * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
-		 * and ANALYZE may produce wildly different reltuples values, e.g.
-		 * when there are many recently-dead tuples.
-		 *
-		 * The logic here is a bit simpler than acquire_sample_rows(), as
-		 * VACUUM can't run inside a transaction block, which makes some cases
-		 * impossible (e.g. in-progress insert from the same transaction).
-		 *
-		 * We treat LP_DEAD items (which are the closest thing to DEAD tuples
-		 * that might be seen here) differently, too: we assume that they'll
-		 * become LP_UNUSED before VACUUM finishes.  This difference is only
-		 * superficial.  VACUUM effectively agrees with ANALYZE about DEAD
-		 * items, in the end.  VACUUM won't remember LP_DEAD items, but only
-		 * because they're not supposed to be left behind when it is done.
-		 * (Cases where we bypass index vacuuming will violate this optimistic
-		 * assumption, but the overall impact of that should be negligible.)
-		 */
-		switch (htsv_get_valid_status(presult.htsv[offnum]))
-		{
-			case HEAPTUPLE_LIVE:
-
-				/*
-				 * Count it as live.  Not only is this natural, but it's also
-				 * what acquire_sample_rows() does.
-				 */
-				live_tuples++;
-				break;
-			case HEAPTUPLE_RECENTLY_DEAD:
-
-				/*
-				 * If tuple is recently dead then we must not remove it from
-				 * the relation.  (We only remove items that are LP_DEAD from
-				 * pruning.)
-				 */
-				recently_dead_tuples++;
-				break;
-			case HEAPTUPLE_INSERT_IN_PROGRESS:
-
-				/*
-				 * We do not count these rows as live, because we expect the
-				 * inserting transaction to update the counters at commit, and
-				 * we assume that will happen only after we report our
-				 * results.  This assumption is a bit shaky, but it is what
-				 * acquire_sample_rows() does, so be consistent.
-				 */
-				break;
-			case HEAPTUPLE_DELETE_IN_PROGRESS:
-
-				/*
-				 * This an expected case during concurrent vacuum. Count such
-				 * rows as live.  As above, we assume the deleting transaction
-				 * will commit and update the counters after we report.
-				 */
-				live_tuples++;
-				break;
-			default:
-				elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
-				break;
-		}
 	}
 
 	vacrel->offnum = InvalidOffsetNumber;
@@ -1626,8 +1539,8 @@ lazy_scan_prune(LVRelState *vacrel,
 	vacrel->tuples_deleted += presult.ndeleted;
 	vacrel->tuples_frozen += presult.nfrozen;
 	vacrel->lpdead_items += lpdead_items;
-	vacrel->live_tuples += live_tuples;
-	vacrel->recently_dead_tuples += recently_dead_tuples;
+	vacrel->live_tuples += presult.live_tuples;
+	vacrel->recently_dead_tuples += presult.recently_dead_tuples;
 
 	/* Can't truncate this page */
 	if (presult.hastup)
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 25dbae8139e..23ba23b5b01 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -198,6 +198,8 @@ typedef struct HeapPageFreeze
  */
 typedef struct PruneFreezeResult
 {
+	int			live_tuples;
+	int			recently_dead_tuples;
 	int			ndeleted;		/* Number of tuples deleted from the page */
 	int			nnewlpdead;		/* Number of newly LP_DEAD items */
 	bool		all_visible;	/* Whether or not the page is all visible */
@@ -211,19 +213,6 @@ typedef struct PruneFreezeResult
 	int			nfrozen;
 	TransactionId frz_conflict_horizon; /* Newest xmin on the page */
 
-	/*
-	 * Tuple visibility is only computed once for each tuple, for correctness
-	 * and efficiency reasons; see comment in heap_page_prune_and_freeze() for
-	 * details. This is of type int8[], instead of HTSV_Result[], so we can
-	 * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD
-	 * items.
-	 *
-	 * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
-	 * 1. Otherwise every access would need to subtract 1.
-	 */
-	int8		htsv[MaxHeapTuplesPerPage + 1];
-
-
 	/* New value of relfrozenxid found by heap_page_prune_and_freeze() */
 	TransactionId new_relfrozenxid;
 
@@ -231,20 +220,6 @@ typedef struct PruneFreezeResult
 	MultiXactId new_relminmxid;
 } PruneFreezeResult;
 
-/*
- * Pruning calculates tuple visibility once and saves the results in an array
- * of int8. See PruneFreezeResult.htsv for details. This helper function is
- * meant to guard against examining visibility status array members which have
- * not yet been computed.
- */
-static inline HTSV_Result
-htsv_get_valid_status(int status)
-{
-	Assert(status >= HEAPTUPLE_DEAD &&
-		   status <= HEAPTUPLE_DELETE_IN_PROGRESS);
-	return (HTSV_Result) status;
-}
-
 /* ----------------
  *		function prototypes for heap access method
  *
-- 
2.40.1

