From e642b2fa85fd21626fa6830e769867ce8aafe54e Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Mon, 22 Jan 2024 17:25:56 -0500
Subject: [PATCH v1 14/15] Count tuples for vacuum logging in heap_page_prune

lazy_scan_prune() loops through all of the tuple visibility information
that was recorded in heap_page_prune() and then counts live and recently
dead tuples. That information is available in heap_page_prune(), so just
record it there. Add live and recently dead tuple counters to the
PruneResult. Doing this counting in heap_page_prune() eliminates the
need for saving the tuple visibility status information in the
PruneResult. Instead, save it in the PruneState where it can be
referenced by heap_prune_chain().
---
 src/backend/access/heap/pruneheap.c  | 109 +++++++++++++++++++++++----
 src/backend/access/heap/vacuumlazy.c |  75 +-----------------
 src/include/access/heapam.h          |  27 +------
 3 files changed, 97 insertions(+), 114 deletions(-)

diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 157ee4dc170..88adad99c39 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -56,6 +56,17 @@ typedef struct
 	 * 1. Otherwise every access would need to subtract 1.
 	 */
 	bool		marked[MaxHeapTuplesPerPage + 1];
+
+	/*
+	 * Tuple visibility is only computed once for each tuple, for correctness
+	 * and efficiency reasons; see comment in heap_page_prune() for details.
+	 * This is of type int8[], instead of HTSV_Result[], so we can use -1 to
+	 * indicate no visibility has been computed, e.g. for LP_DEAD items.
+	 *
+	 * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
+	 * 1. Otherwise every access would need to subtract 1.
+	 */
+	int8		htsv[MaxHeapTuplesPerPage + 1];
 } PruneState;
 
 /* Local functions */
@@ -66,7 +77,8 @@ static int	heap_prune_chain(Buffer buffer,
 							 OffsetNumber rootoffnum,
 							 PruneState *prstate, PruneResult *presult);
 
-static void prune_prepare_freeze_tuple(Page page, OffsetNumber offnum,
+static inline HTSV_Result htsv_get_valid_status(int status);
+static void prune_prepare_freeze_tuple(Page page, OffsetNumber offnum, PruneState *prstate,
 									   HeapPageFreeze *pagefrz, HeapTupleFreeze *frozen,
 									   PruneResult *presult);
 static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
@@ -278,6 +290,9 @@ heap_page_prune(Relation relation, Buffer buffer,
 
 	presult->hastup = false;
 
+	presult->live_tuples = 0;
+	presult->recently_dead_tuples = 0;
+
 	/*
 	 * Keep track of whether or not the page is all_visible in case the caller
 	 * wants to use this information to update the VM.
@@ -319,7 +334,7 @@ heap_page_prune(Relation relation, Buffer buffer,
 		/* Nothing to do if slot doesn't contain a tuple */
 		if (!ItemIdIsNormal(itemid))
 		{
-			presult->htsv[offnum] = -1;
+			prstate.htsv[offnum] = -1;
 			continue;
 		}
 
@@ -335,9 +350,30 @@ heap_page_prune(Relation relation, Buffer buffer,
 		if (off_loc)
 			*off_loc = offnum;
 
-		presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
-															buffer);
-		switch (presult->htsv[offnum])
+		prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
+														   buffer);
+		Assert(ItemIdIsNormal(itemid));
+
+		/*
+		 * The criteria for counting a tuple as live in this block need to
+		 * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
+		 * and ANALYZE may produce wildly different reltuples values, e.g.
+		 * when there are many recently-dead tuples.
+		 *
+		 * The logic here is a bit simpler than acquire_sample_rows(), as
+		 * VACUUM can't run inside a transaction block, which makes some cases
+		 * impossible (e.g. in-progress insert from the same transaction).
+		 *
+		 * We treat LP_DEAD items (which are the closest thing to DEAD tuples
+		 * that might be seen here) differently, too: we assume that they'll
+		 * become LP_UNUSED before VACUUM finishes.  This difference is only
+		 * superficial.  VACUUM effectively agrees with ANALYZE about DEAD
+		 * items, in the end.  VACUUM won't remember LP_DEAD items, but only
+		 * because they're not supposed to be left behind when it is done.
+		 * (Cases where we bypass index vacuuming will violate this optimistic
+		 * assumption, but the overall impact of that should be negligible.)
+		 */
+		switch (prstate.htsv[offnum])
 		{
 			case HEAPTUPLE_DEAD:
 
@@ -357,6 +393,12 @@ heap_page_prune(Relation relation, Buffer buffer,
 				break;
 			case HEAPTUPLE_LIVE:
 
+				/*
+				 * Count it as live.  Not only is this natural, but it's also
+				 * what acquire_sample_rows() does.
+				 */
+				presult->live_tuples++;
+
 				/*
 				 * Is the tuple definitely visible to all transactions?
 				 *
@@ -393,13 +435,34 @@ heap_page_prune(Relation relation, Buffer buffer,
 				}
 				break;
 			case HEAPTUPLE_RECENTLY_DEAD:
+
+				/*
+				 * If tuple is recently dead then we must not remove it from
+				 * the relation.  (We only remove items that are LP_DEAD from
+				 * pruning.)
+				 */
+				presult->recently_dead_tuples++;
 				presult->all_visible = false;
 				break;
 			case HEAPTUPLE_INSERT_IN_PROGRESS:
+
+				/*
+				 * We do not count these rows as live, because we expect the
+				 * inserting transaction to update the counters at commit, and
+				 * we assume that will happen only after we report our
+				 * results.  This assumption is a bit shaky, but it is what
+				 * acquire_sample_rows() does, so be consistent.
+				 */
 				presult->all_visible = false;
 				break;
 			case HEAPTUPLE_DELETE_IN_PROGRESS:
-				/* This is an expected case during concurrent vacuum */
+
+				/*
+				 * This an expected case during concurrent vacuum. Count such
+				 * rows as live.  As above, we assume the deleting transaction
+				 * will commit and update the counters after we report.
+				 */
+				presult->live_tuples++;
 				presult->all_visible = false;
 				break;
 			default:
@@ -451,15 +514,15 @@ heap_page_prune(Relation relation, Buffer buffer,
 			*off_loc = offnum;
 
 		if (pagefrz)
-			prune_prepare_freeze_tuple(page, offnum,
+			prune_prepare_freeze_tuple(page, offnum, &prstate,
 									   pagefrz, frozen, presult);
 
 		itemid = PageGetItemId(page, offnum);
 
 		if (ItemIdIsNormal(itemid) &&
-			presult->htsv[offnum] != HEAPTUPLE_DEAD)
+			prstate.htsv[offnum] != HEAPTUPLE_DEAD)
 		{
-			Assert(presult->htsv[offnum] != -1);
+			Assert(prstate.htsv[offnum] != -1);
 
 			/*
 			 * Deliberately don't set hastup for LP_DEAD items.  We make the
@@ -723,10 +786,24 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
 }
 
 
+/*
+ * Pruning calculates tuple visibility once and saves the results in an array
+ * of int8. See PruneState.htsv for details. This helper function is meant to
+ * guard against examining visibility status array members which have not yet
+ * been computed.
+ */
+static inline HTSV_Result
+htsv_get_valid_status(int status)
+{
+	Assert(status >= HEAPTUPLE_DEAD &&
+		   status <= HEAPTUPLE_DELETE_IN_PROGRESS);
+	return (HTSV_Result) status;
+}
+
 /*
  * Prune specified line pointer or a HOT chain originating at line pointer.
  *
- * Tuple visibility information is provided in presult->htsv.
+ * Tuple visibility information is provided in prstate->htsv.
  *
  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
@@ -777,7 +854,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 	 */
 	if (ItemIdIsNormal(rootlp))
 	{
-		Assert(presult->htsv[rootoffnum] != -1);
+		Assert(prstate->htsv[rootoffnum] != -1);
 		htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
 
 		if (HeapTupleHeaderIsHeapOnly(htup))
@@ -800,7 +877,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 			 * either here or while following a chain below.  Whichever path
 			 * gets there first will mark the tuple unused.
 			 */
-			if (presult->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
+			if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
 				!HeapTupleHeaderIsHotUpdated(htup))
 			{
 				heap_prune_record_unused(prstate, rootoffnum);
@@ -901,7 +978,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 		 */
 		tupdead = recent_dead = false;
 
-		switch (htsv_get_valid_status(presult->htsv[offnum]))
+		switch (htsv_get_valid_status(prstate->htsv[offnum]))
 		{
 			case HEAPTUPLE_DEAD:
 				tupdead = true;
@@ -1039,7 +1116,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
  * want to consider freezing normal tuples which will not be removed.
 */
 static void
-prune_prepare_freeze_tuple(Page page, OffsetNumber offnum,
+prune_prepare_freeze_tuple(Page page, OffsetNumber offnum, PruneState *prstate,
 						   HeapPageFreeze *pagefrz,
 						   HeapTupleFreeze *frozen,
 						   PruneResult *presult)
@@ -1056,8 +1133,8 @@ prune_prepare_freeze_tuple(Page page, OffsetNumber offnum,
 		return;
 
 	/* We do not consider freezing tuples which will be removed. */
-	if (presult->htsv[offnum] == HEAPTUPLE_DEAD ||
-		presult->htsv[offnum] == -1)
+	if (prstate->htsv[offnum] == HEAPTUPLE_DEAD ||
+		prstate->htsv[offnum] == -1)
 		return;
 
 	htup = (HeapTupleHeader) PageGetItem(page, itemid);
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index eb415a0aa6f..4770fcea021 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -1413,9 +1413,6 @@ lazy_scan_prune(LVRelState *vacrel,
 				maxoff;
 	ItemId		itemid;
 	PruneResult presult;
-	int			lpdead_items,
-				live_tuples,
-				recently_dead_tuples;
 	HeapPageFreeze pagefrz;
 	OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
 
@@ -1436,8 +1433,6 @@ lazy_scan_prune(LVRelState *vacrel,
 	pagefrz.NoFreezePageRelminMxid = vacrel->NewRelminMxid;
 	pagefrz.cutoffs = &vacrel->cutoffs;
 	lpdead_items = 0;
-	live_tuples = 0;
-	recently_dead_tuples = 0;
 
 	/*
 	 * Prune all HOT-update chains in this page.
@@ -1472,9 +1467,6 @@ lazy_scan_prune(LVRelState *vacrel,
 		vacrel->offnum = offnum;
 		itemid = PageGetItemId(page, offnum);
 
-		/* Redirect items mustn't be touched */
-		if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid))
-			continue;
 
 		if (ItemIdIsDead(itemid))
 		{
@@ -1482,69 +1474,6 @@ lazy_scan_prune(LVRelState *vacrel,
 			continue;
 		}
 
-		Assert(ItemIdIsNormal(itemid));
-
-		/*
-		 * The criteria for counting a tuple as live in this block need to
-		 * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
-		 * and ANALYZE may produce wildly different reltuples values, e.g.
-		 * when there are many recently-dead tuples.
-		 *
-		 * The logic here is a bit simpler than acquire_sample_rows(), as
-		 * VACUUM can't run inside a transaction block, which makes some cases
-		 * impossible (e.g. in-progress insert from the same transaction).
-		 *
-		 * We treat LP_DEAD items (which are the closest thing to DEAD tuples
-		 * that might be seen here) differently, too: we assume that they'll
-		 * become LP_UNUSED before VACUUM finishes.  This difference is only
-		 * superficial.  VACUUM effectively agrees with ANALYZE about DEAD
-		 * items, in the end.  VACUUM won't remember LP_DEAD items, but only
-		 * because they're not supposed to be left behind when it is done.
-		 * (Cases where we bypass index vacuuming will violate this optimistic
-		 * assumption, but the overall impact of that should be negligible.)
-		 */
-		switch (htsv_get_valid_status(presult.htsv[offnum]))
-		{
-			case HEAPTUPLE_LIVE:
-
-				/*
-				 * Count it as live.  Not only is this natural, but it's also
-				 * what acquire_sample_rows() does.
-				 */
-				live_tuples++;
-				break;
-			case HEAPTUPLE_RECENTLY_DEAD:
-
-				/*
-				 * If tuple is recently dead then we must not remove it from
-				 * the relation.  (We only remove items that are LP_DEAD from
-				 * pruning.)
-				 */
-				recently_dead_tuples++;
-				break;
-			case HEAPTUPLE_INSERT_IN_PROGRESS:
-
-				/*
-				 * We do not count these rows as live, because we expect the
-				 * inserting transaction to update the counters at commit, and
-				 * we assume that will happen only after we report our
-				 * results.  This assumption is a bit shaky, but it is what
-				 * acquire_sample_rows() does, so be consistent.
-				 */
-				break;
-			case HEAPTUPLE_DELETE_IN_PROGRESS:
-
-				/*
-				 * This an expected case during concurrent vacuum. Count such
-				 * rows as live.  As above, we assume the deleting transaction
-				 * will commit and update the counters after we report.
-				 */
-				live_tuples++;
-				break;
-			default:
-				elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
-				break;
-		}
 	}
 
 	vacrel->offnum = InvalidOffsetNumber;
@@ -1634,8 +1563,8 @@ lazy_scan_prune(LVRelState *vacrel,
 	vacrel->tuples_deleted += presult.ndeleted;
 	vacrel->tuples_frozen += presult.nfrozen;
 	vacrel->lpdead_items += lpdead_items;
-	vacrel->live_tuples += live_tuples;
-	vacrel->recently_dead_tuples += recently_dead_tuples;
+	vacrel->live_tuples += presult.live_tuples;
+	vacrel->recently_dead_tuples += presult.recently_dead_tuples;
 
 	/* Can't truncate this page */
 	if (presult.hastup)
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 4e41cf68957..989515a628d 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -198,23 +198,14 @@ typedef struct HeapPageFreeze
  */
 typedef struct PruneResult
 {
+	int			live_tuples;
+	int			recently_dead_tuples;
 	int			ndeleted;		/* Number of tuples deleted from the page */
 	int			nnewlpdead;		/* Number of newly LP_DEAD items */
 	bool		all_visible;	/* Whether or not the page is all visible */
 	bool		hastup;			/* Does page make rel truncation unsafe */
 	TransactionId frz_conflict_horizon; /* Newest xmin on the page */
 
-	/*
-	 * Tuple visibility is only computed once for each tuple, for correctness
-	 * and efficiency reasons; see comment in heap_page_prune() for details.
-	 * This is of type int8[], instead of HTSV_Result[], so we can use -1 to
-	 * indicate no visibility has been computed, e.g. for LP_DEAD items.
-	 *
-	 * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
-	 * 1. Otherwise every access would need to subtract 1.
-	 */
-	int8		htsv[MaxHeapTuplesPerPage + 1];
-
 	bool		all_visible_except_removable;
 
 	/* Whether or not the page can be set all frozen in the VM */
@@ -224,20 +215,6 @@ typedef struct PruneResult
 	int			nfrozen;
 } PruneResult;
 
-/*
- * Pruning calculates tuple visibility once and saves the results in an array
- * of int8. See PruneResult.htsv for details. This helper function is meant to
- * guard against examining visibility status array members which have not yet
- * been computed.
- */
-static inline HTSV_Result
-htsv_get_valid_status(int status)
-{
-	Assert(status >= HEAPTUPLE_DEAD &&
-		   status <= HEAPTUPLE_DELETE_IN_PROGRESS);
-	return (HTSV_Result) status;
-}
-
 /* ----------------
  *		function prototypes for heap access method
  *
-- 
2.37.2

