From 2565b8554e321e8ca9a87f36a48f9ab7f86ab247 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Tue, 13 Aug 2024 20:01:07 +0300
Subject: [PATCH v6 10/12] Make SnapBuildWaitSnapshot work without
 xl_running_xacts.xids array

SnapBuildWaitSnapshot looped through all the XIDs in the
xl_running_xacts, waiting for them to finish. Change it to grab the
list of running XIDs from the proc array instead. This removes the
last usage of the XIDs array in the xl_running_xacts record, allowing
it to be removed in the next commit.

When SnapBuildWaitSnapshot() is called with running->nextXid as the
'cutoff' point, the new code should wait for exactly the same set of
transactions as before. But when called with initial_xmin_horizon as
the 'cutoff', this might wait for more transactions than before: those
between running->nextXid and initial_xmin_horizon. For example,
imagine that we see a running-xacts record with nextXid 100, and
initial_xmin_horizon is 200. Before, we would wait for all XIDs < 100
to complete, and then log the standby snapshot and proceed, but now we
will wait for all XIDs < 200. I believe that's a good thing, because
we won't actually be able to move to the next state in the snapshot
building until all transactions < 200 have completed. The
running-xacts snapshot that we logged after waiting up to XID 100
would not be useful to us either, if there are still XIDs between 100
and 200 running.

SnapBuildWaitSnapshot() used to do useless work when called in a
standby, because in a standby, there are no XID locks and the
XactLockTableWait() calls returned immediately, even if the XIDs were
in fact still running in the primary. But as the comment says, the
waiting isn't necessary for correctness, so that was harmless. In any
case, stop doing the futile work on a standby.
---
 src/backend/replication/logical/snapbuild.c | 50 ++++++++++++++-------
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 97d278052df..252526ecf91 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -164,7 +164,7 @@ static inline bool SnapBuildXidHasCatalogChanges(SnapBuild *builder, Transaction
 
 /* xlog reading helper functions for SnapBuildProcessRunningXacts */
 static bool SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running);
-static void SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff);
+static void SnapBuildWaitSnapshot(TransactionId cutoff);
 
 /* serialization functions */
 static void SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn);
@@ -1192,14 +1192,17 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
 		NormalTransactionIdPrecedes(running->oldestRunningXid,
 									builder->initial_xmin_horizon))
 	{
+		TransactionId cutoff;
+
 		ereport(DEBUG1,
 				(errmsg_internal("skipping snapshot at %X/%X while building logical decoding snapshot, xmin horizon too low",
 								 LSN_FORMAT_ARGS(lsn)),
 				 errdetail_internal("initial xmin horizon of %u vs the snapshot's %u",
 									builder->initial_xmin_horizon, running->oldestRunningXid)));
 
-
-		SnapBuildWaitSnapshot(running, builder->initial_xmin_horizon);
+		cutoff = builder->initial_xmin_horizon;
+		TransactionIdRetreat(cutoff);
+		SnapBuildWaitSnapshot(cutoff);
 
 		return true;
 	}
@@ -1286,7 +1289,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
 				 errdetail("Waiting for transactions (approximately %d) older than %u to end.",
 						   running->xcnt, running->nextXid)));
 
-		SnapBuildWaitSnapshot(running, running->nextXid);
+		SnapBuildWaitSnapshot(running->nextXid);
 	}
 
 	/*
@@ -1310,7 +1313,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
 				 errdetail("Waiting for transactions (approximately %d) older than %u to end.",
 						   running->xcnt, running->nextXid)));
 
-		SnapBuildWaitSnapshot(running, running->nextXid);
+		SnapBuildWaitSnapshot(running->nextXid);
 	}
 
 	/*
@@ -1343,8 +1346,8 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
 }
 
 /* ---
- * Iterate through xids in record, wait for all older than the cutoff to
- * finish.  Then, if possible, log a new xl_running_xacts record.
+ * Wait for all transactions older than or equal to the cutoff to finish.
+ * Then, if possible, log a new xl_running_xacts record.
  *
  * This isn't required for the correctness of decoding, but to:
  * a) allow isolationtester to notice that we're currently waiting for
@@ -1354,13 +1357,31 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
  * ---
  */
 static void
-SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff)
+SnapBuildWaitSnapshot(TransactionId cutoff)
 {
-	int			off;
+	RunningTransactions running;
+
+	if (RecoveryInProgress())
+	{
+		/*
+		 * During recovery, we have no mechanism for waiting for an XID to
+		 * finish, and we cannot create new running-xacts records either.
+		 */
+		return;
+	}
+
+	running = GetRunningTransactionData();
+
+	/*
+	 * GetRunningTransactionData returns with XidGenLock and ProcArrayLock
+	 * held, but we don't need them.
+	 */
+	LWLockRelease(XidGenLock);
+	LWLockRelease(ProcArrayLock);
 
-	for (off = 0; off < running->xcnt; off++)
+	for (int i = 0; i < running->xcnt; i++)
 	{
-		TransactionId xid = running->xids[off];
+		TransactionId xid = running->xids[i];
 
 		/*
 		 * Upper layers should prevent that we ever need to wait on ourselves.
@@ -1370,7 +1391,7 @@ SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff)
 		if (TransactionIdIsCurrentTransactionId(xid))
 			elog(ERROR, "waiting for ourselves");
 
-		if (TransactionIdFollows(xid, cutoff))
+		if (TransactionIdFollowsOrEquals(xid, cutoff))
 			continue;
 
 		XactLockTableWait(xid, NULL, NULL, XLTW_None);
@@ -1382,10 +1403,7 @@ SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff)
 	 * wait for bgwriter or checkpointer to log one.  During recovery we can't
 	 * enforce that, so we'll have to wait.
 	 */
-	if (!RecoveryInProgress())
-	{
-		LogStandbySnapshot();
-	}
+	LogStandbySnapshot();
 }
 
 #define SnapBuildOnDiskConstantSize \
-- 
2.39.5

