From 373a2eb58914d9426045da264885a8440df35356 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Mon, 22 Jan 2024 17:25:56 -0500
Subject: [PATCH v2 14/17] Count tuples for vacuum logging in heap_page_prune

lazy_scan_prune() loops through all of the tuple visibility information
that was recorded in heap_page_prune() and then counts live and recently
dead tuples. That information is available in heap_page_prune(), so just
record it there. Add live and recently dead tuple counters to the
PruneResult. Doing this counting in heap_page_prune() eliminates the
need for saving the tuple visibility status information in the
PruneResult. Instead, save it in the PruneState where it can be
referenced by heap_prune_chain().
---
 src/backend/access/heap/pruneheap.c  | 110 +++++++++++++++++++++++----
 src/backend/access/heap/vacuumlazy.c |  77 +------------------
 src/include/access/heapam.h          |  28 +------
 3 files changed, 99 insertions(+), 116 deletions(-)

diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index faaab375651..46b5173a401 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -55,6 +55,18 @@ typedef struct
 	 * 1. Otherwise every access would need to subtract 1.
 	 */
 	bool		marked[MaxHeapTuplesPerPage + 1];
+
+	/*
+	 * Tuple visibility is only computed once for each tuple, for correctness
+	 * and efficiency reasons; see comment in heap_page_prune_and_freeze() for
+	 * details. This is of type int8[], instead of HTSV_Result[], so we can
+	 * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD
+	 * items.
+	 *
+	 * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
+	 * 1. Otherwise every access would need to subtract 1.
+	 */
+	int8		htsv[MaxHeapTuplesPerPage + 1];
 } PruneState;
 
 /* Local functions */
@@ -65,7 +77,8 @@ static int	heap_prune_chain(Buffer buffer,
 							 OffsetNumber rootoffnum,
 							 PruneState *prstate, PruneFreezeResult *presult);
 
-static void prune_prepare_freeze_tuple(Page page, OffsetNumber offnum,
+static inline HTSV_Result htsv_get_valid_status(int status);
+static void prune_prepare_freeze_tuple(Page page, OffsetNumber offnum, PruneState *prstate,
 									   HeapPageFreeze *pagefrz, HeapTupleFreeze *frozen,
 									   PruneFreezeResult *presult);
 static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
@@ -283,6 +296,9 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 
 	presult->hastup = false;
 
+	presult->live_tuples = 0;
+	presult->recently_dead_tuples = 0;
+
 	/*
 	 * Keep track of whether or not the page is all_visible in case the caller
 	 * wants to use this information to update the VM.
@@ -328,7 +344,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 		/* Nothing to do if slot doesn't contain a tuple */
 		if (!ItemIdIsNormal(itemid))
 		{
-			presult->htsv[offnum] = -1;
+			prstate.htsv[offnum] = -1;
 			continue;
 		}
 
@@ -344,9 +360,30 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 		if (off_loc)
 			*off_loc = offnum;
 
-		presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
-															buffer);
-		switch (presult->htsv[offnum])
+		prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
+														   buffer);
+		Assert(ItemIdIsNormal(itemid));
+
+		/*
+		 * The criteria for counting a tuple as live in this block need to
+		 * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
+		 * and ANALYZE may produce wildly different reltuples values, e.g.
+		 * when there are many recently-dead tuples.
+		 *
+		 * The logic here is a bit simpler than acquire_sample_rows(), as
+		 * VACUUM can't run inside a transaction block, which makes some cases
+		 * impossible (e.g. in-progress insert from the same transaction).
+		 *
+		 * We treat LP_DEAD items (which are the closest thing to DEAD tuples
+		 * that might be seen here) differently, too: we assume that they'll
+		 * become LP_UNUSED before VACUUM finishes.  This difference is only
+		 * superficial.  VACUUM effectively agrees with ANALYZE about DEAD
+		 * items, in the end.  VACUUM won't remember LP_DEAD items, but only
+		 * because they're not supposed to be left behind when it is done.
+		 * (Cases where we bypass index vacuuming will violate this optimistic
+		 * assumption, but the overall impact of that should be negligible.)
+		 */
+		switch (prstate.htsv[offnum])
 		{
 			case HEAPTUPLE_DEAD:
 
@@ -366,6 +403,12 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 				break;
 			case HEAPTUPLE_LIVE:
 
+				/*
+				 * Count it as live.  Not only is this natural, but it's also
+				 * what acquire_sample_rows() does.
+				 */
+				presult->live_tuples++;
+
 				/*
 				 * Is the tuple definitely visible to all transactions?
 				 *
@@ -402,13 +445,34 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 				}
 				break;
 			case HEAPTUPLE_RECENTLY_DEAD:
+
+				/*
+				 * If tuple is recently dead then we must not remove it from
+				 * the relation.  (We only remove items that are LP_DEAD from
+				 * pruning.)
+				 */
+				presult->recently_dead_tuples++;
 				presult->all_visible = false;
 				break;
 			case HEAPTUPLE_INSERT_IN_PROGRESS:
+
+				/*
+				 * We do not count these rows as live, because we expect the
+				 * inserting transaction to update the counters at commit, and
+				 * we assume that will happen only after we report our
+				 * results.  This assumption is a bit shaky, but it is what
+				 * acquire_sample_rows() does, so be consistent.
+				 */
 				presult->all_visible = false;
 				break;
 			case HEAPTUPLE_DELETE_IN_PROGRESS:
-				/* This is an expected case during concurrent vacuum */
+
+				/*
+				 * This an expected case during concurrent vacuum. Count such
+				 * rows as live.  As above, we assume the deleting transaction
+				 * will commit and update the counters after we report.
+				 */
+				presult->live_tuples++;
 				presult->all_visible = false;
 				break;
 			default:
@@ -460,15 +524,15 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 			*off_loc = offnum;
 
 		if (pagefrz)
-			prune_prepare_freeze_tuple(page, offnum,
+			prune_prepare_freeze_tuple(page, offnum, &prstate,
 									   pagefrz, frozen, presult);
 
 		itemid = PageGetItemId(page, offnum);
 
 		if (ItemIdIsNormal(itemid) &&
-			presult->htsv[offnum] != HEAPTUPLE_DEAD)
+			prstate.htsv[offnum] != HEAPTUPLE_DEAD)
 		{
-			Assert(presult->htsv[offnum] != -1);
+			Assert(prstate.htsv[offnum] != -1);
 
 			/*
 			 * Deliberately don't set hastup for LP_DEAD items.  We make the
@@ -756,10 +820,24 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
 }
 
 
+/*
+ * Pruning calculates tuple visibility once and saves the results in an array
+ * of int8. See PruneState.htsv for details. This helper function is meant to
+ * guard against examining visibility status array members which have not yet
+ * been computed.
+ */
+static inline HTSV_Result
+htsv_get_valid_status(int status)
+{
+	Assert(status >= HEAPTUPLE_DEAD &&
+		   status <= HEAPTUPLE_DELETE_IN_PROGRESS);
+	return (HTSV_Result) status;
+}
+
 /*
  * Prune specified line pointer or a HOT chain originating at line pointer.
  *
- * Tuple visibility information is provided in presult->htsv.
+ * Tuple visibility information is provided in prstate->htsv.
  *
  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
@@ -810,7 +888,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 	 */
 	if (ItemIdIsNormal(rootlp))
 	{
-		Assert(presult->htsv[rootoffnum] != -1);
+		Assert(prstate->htsv[rootoffnum] != -1);
 		htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
 
 		if (HeapTupleHeaderIsHeapOnly(htup))
@@ -833,7 +911,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 			 * either here or while following a chain below.  Whichever path
 			 * gets there first will mark the tuple unused.
 			 */
-			if (presult->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
+			if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
 				!HeapTupleHeaderIsHotUpdated(htup))
 			{
 				heap_prune_record_unused(prstate, rootoffnum);
@@ -934,7 +1012,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
 		 */
 		tupdead = recent_dead = false;
 
-		switch (htsv_get_valid_status(presult->htsv[offnum]))
+		switch (htsv_get_valid_status(prstate->htsv[offnum]))
 		{
 			case HEAPTUPLE_DEAD:
 				tupdead = true;
@@ -1072,7 +1150,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
  * want to consider freezing normal tuples which will not be removed.
 */
 static void
-prune_prepare_freeze_tuple(Page page, OffsetNumber offnum,
+prune_prepare_freeze_tuple(Page page, OffsetNumber offnum, PruneState *prstate,
 						   HeapPageFreeze *pagefrz,
 						   HeapTupleFreeze *frozen,
 						   PruneFreezeResult *presult)
@@ -1089,8 +1167,8 @@ prune_prepare_freeze_tuple(Page page, OffsetNumber offnum,
 		return;
 
 	/* We do not consider freezing tuples which will be removed. */
-	if (presult->htsv[offnum] == HEAPTUPLE_DEAD ||
-		presult->htsv[offnum] == -1)
+	if (prstate->htsv[offnum] == HEAPTUPLE_DEAD ||
+		prstate->htsv[offnum] == -1)
 		return;
 
 	htup = (HeapTupleHeader) PageGetItem(page, itemid);
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index aac38f54c0a..634f4da9a17 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -1442,10 +1442,8 @@ lazy_scan_prune(LVRelState *vacrel,
 	OffsetNumber offnum,
 				maxoff;
 	ItemId		itemid;
+	int			lpdead_items = 0;
 	PruneFreezeResult presult;
-	int			lpdead_items,
-				live_tuples,
-				recently_dead_tuples;
 	HeapPageFreeze pagefrz;
 	OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
 
@@ -1465,9 +1463,6 @@ lazy_scan_prune(LVRelState *vacrel,
 	pagefrz.NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
 	pagefrz.NoFreezePageRelminMxid = vacrel->NewRelminMxid;
 	pagefrz.cutoffs = &vacrel->cutoffs;
-	lpdead_items = 0;
-	live_tuples = 0;
-	recently_dead_tuples = 0;
 
 	/*
 	 * Prune all HOT-update chains and potentially freeze tuples on this page.
@@ -1502,9 +1497,6 @@ lazy_scan_prune(LVRelState *vacrel,
 		vacrel->offnum = offnum;
 		itemid = PageGetItemId(page, offnum);
 
-		/* Redirect items mustn't be touched */
-		if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid))
-			continue;
 
 		if (ItemIdIsDead(itemid))
 		{
@@ -1512,69 +1504,6 @@ lazy_scan_prune(LVRelState *vacrel,
 			continue;
 		}
 
-		Assert(ItemIdIsNormal(itemid));
-
-		/*
-		 * The criteria for counting a tuple as live in this block need to
-		 * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
-		 * and ANALYZE may produce wildly different reltuples values, e.g.
-		 * when there are many recently-dead tuples.
-		 *
-		 * The logic here is a bit simpler than acquire_sample_rows(), as
-		 * VACUUM can't run inside a transaction block, which makes some cases
-		 * impossible (e.g. in-progress insert from the same transaction).
-		 *
-		 * We treat LP_DEAD items (which are the closest thing to DEAD tuples
-		 * that might be seen here) differently, too: we assume that they'll
-		 * become LP_UNUSED before VACUUM finishes.  This difference is only
-		 * superficial.  VACUUM effectively agrees with ANALYZE about DEAD
-		 * items, in the end.  VACUUM won't remember LP_DEAD items, but only
-		 * because they're not supposed to be left behind when it is done.
-		 * (Cases where we bypass index vacuuming will violate this optimistic
-		 * assumption, but the overall impact of that should be negligible.)
-		 */
-		switch (htsv_get_valid_status(presult.htsv[offnum]))
-		{
-			case HEAPTUPLE_LIVE:
-
-				/*
-				 * Count it as live.  Not only is this natural, but it's also
-				 * what acquire_sample_rows() does.
-				 */
-				live_tuples++;
-				break;
-			case HEAPTUPLE_RECENTLY_DEAD:
-
-				/*
-				 * If tuple is recently dead then we must not remove it from
-				 * the relation.  (We only remove items that are LP_DEAD from
-				 * pruning.)
-				 */
-				recently_dead_tuples++;
-				break;
-			case HEAPTUPLE_INSERT_IN_PROGRESS:
-
-				/*
-				 * We do not count these rows as live, because we expect the
-				 * inserting transaction to update the counters at commit, and
-				 * we assume that will happen only after we report our
-				 * results.  This assumption is a bit shaky, but it is what
-				 * acquire_sample_rows() does, so be consistent.
-				 */
-				break;
-			case HEAPTUPLE_DELETE_IN_PROGRESS:
-
-				/*
-				 * This an expected case during concurrent vacuum. Count such
-				 * rows as live.  As above, we assume the deleting transaction
-				 * will commit and update the counters after we report.
-				 */
-				live_tuples++;
-				break;
-			default:
-				elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
-				break;
-		}
 	}
 
 	vacrel->offnum = InvalidOffsetNumber;
@@ -1652,8 +1581,8 @@ lazy_scan_prune(LVRelState *vacrel,
 	vacrel->tuples_deleted += presult.ndeleted;
 	vacrel->tuples_frozen += presult.nfrozen;
 	vacrel->lpdead_items += lpdead_items;
-	vacrel->live_tuples += live_tuples;
-	vacrel->recently_dead_tuples += recently_dead_tuples;
+	vacrel->live_tuples += presult.live_tuples;
+	vacrel->recently_dead_tuples += presult.recently_dead_tuples;
 
 	/* Can't truncate this page */
 	if (presult.hastup)
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 87f8649f79d..f4bf60192f8 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -198,24 +198,14 @@ typedef struct HeapPageFreeze
  */
 typedef struct PruneFreezeResult
 {
+	int			live_tuples;
+	int			recently_dead_tuples;
 	int			ndeleted;		/* Number of tuples deleted from the page */
 	int			nnewlpdead;		/* Number of newly LP_DEAD items */
 	bool		all_visible;	/* Whether or not the page is all visible */
 	bool		hastup;			/* Does page make rel truncation unsafe */
 	TransactionId frz_conflict_horizon; /* Newest xmin on the page */
 
-	/*
-	 * Tuple visibility is only computed once for each tuple, for correctness
-	 * and efficiency reasons; see comment in heap_page_prune_and_freeze() for
-	 * details. This is of type int8[], instead of HTSV_Result[], so we can
-	 * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD
-	 * items.
-	 *
-	 * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
-	 * 1. Otherwise every access would need to subtract 1.
-	 */
-	int8		htsv[MaxHeapTuplesPerPage + 1];
-
 	bool		all_visible_except_removable;
 
 	/* Whether or not the page can be set all frozen in the VM */
@@ -231,20 +221,6 @@ typedef struct PruneFreezeResult
 	MultiXactId new_relminmxid;
 } PruneFreezeResult;
 
-/*
- * Pruning calculates tuple visibility once and saves the results in an array
- * of int8. See PruneFreezeResult.htsv for details. This helper function is
- * meant to guard against examining visibility status array members which have
- * not yet been computed.
- */
-static inline HTSV_Result
-htsv_get_valid_status(int status)
-{
-	Assert(status >= HEAPTUPLE_DEAD &&
-		   status <= HEAPTUPLE_DELETE_IN_PROGRESS);
-	return (HTSV_Result) status;
-}
-
 /* ----------------
  *		function prototypes for heap access method
  *
-- 
2.40.1

