From f2a0cebcdd202fbe39b5f0aa8c2034df1b6a5ccd Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Thu, 23 Jul 2020 14:19:48 -0700
Subject: [PATCH v12 1/7] Track latest completed xid as a FullTransactionId.

The reason for doing so is that a subsequent commit will need that to
avoid wraparound issues. As the subsequent change is large this was
split out for easier review.

The reason this is not a perfect straight-forward change is that we do
not want track 64bit xids in the procarray or the WAL. Therefore we
need to advance lastestCompletedXid in relation to 32 bit xids. The
code for that is now centralized in MaintainLatestCompletedXid*.

Author: Andres Freund
Reviewed-By: Robert Haas, Thomas Munro, David Rowley
Discussion: https://postgr.es/m/20200301083601.ews6hz5dduc3w2se@alap3.anarazel.de
---
 src/include/access/transam.h        |  37 ++++++-
 src/backend/access/transam/README   |  16 +--
 src/backend/access/transam/varsup.c |  54 +++++++++-
 src/backend/access/transam/xlog.c   |   7 +-
 src/backend/storage/ipc/procarray.c | 150 +++++++++++++++++++++-------
 5 files changed, 212 insertions(+), 52 deletions(-)

diff --git a/src/include/access/transam.h b/src/include/access/transam.h
index a91a0c7487d..9502217932e 100644
--- a/src/include/access/transam.h
+++ b/src/include/access/transam.h
@@ -54,6 +54,8 @@
 #define FullTransactionIdFollowsOrEquals(a, b) ((a).value >= (b).value)
 #define FullTransactionIdIsValid(x)		TransactionIdIsValid(XidFromFullTransactionId(x))
 #define InvalidFullTransactionId		FullTransactionIdFromEpochAndXid(0, InvalidTransactionId)
+#define FirstNormalFullTransactionId	FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId)
+#define FullTransactionIdIsNormal(x)	FullTransactionIdFollowsOrEquals(x, FirstNormalFullTransactionId)
 
 /*
  * A 64 bit value that contains an epoch and a TransactionId.  This is
@@ -102,6 +104,31 @@ FullTransactionIdAdvance(FullTransactionId *dest)
 		dest->value++;
 }
 
+/*
+ * Retreat a FullTransactionId variable, stepping over xids that would appear
+ * to be special only when viewed as 32bit XIDs.
+ */
+static inline void
+FullTransactionIdRetreat(FullTransactionId *dest)
+{
+	dest->value--;
+
+	/*
+	 * In contrast to 32bit XIDs don't step over the "actual" special xids.
+	 * For 64bit xids these can't be reached as part of a wraparound as they
+	 * can in the 32bit case.
+	 */
+	if (FullTransactionIdPrecedes(*dest, FirstNormalFullTransactionId))
+		return;
+
+	/*
+	 * But we do need to step over XIDs that'd appear special only for 32bit
+	 * XIDs.
+	 */
+	while (XidFromFullTransactionId(*dest) < FirstNormalTransactionId)
+		dest->value--;
+}
+
 /* back up a transaction ID variable, handling wraparound correctly */
 #define TransactionIdRetreat(dest)	\
 	do { \
@@ -193,8 +220,8 @@ typedef struct VariableCacheData
 	/*
 	 * These fields are protected by ProcArrayLock.
 	 */
-	TransactionId latestCompletedXid;	/* newest XID that has committed or
-										 * aborted */
+	FullTransactionId latestCompletedFullXid;	/* newest full XID that has
+												 * committed or aborted */
 
 	/*
 	 * These fields are protected by XactTruncationLock
@@ -244,6 +271,12 @@ extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid);
 extern bool ForceTransactionIdLimitUpdate(void);
 extern Oid	GetNewObjectId(void);
 
+#ifdef USE_ASSERT_CHECKING
+extern void AssertTransactionIdInAllowableRange(TransactionId xid);
+#else
+#define AssertTransactionIdInAllowableRange(xid) ((void)true)
+#endif
+
 /*
  * Some frontend programs include this header.  For compilers that emit static
  * inline functions even when they're unused, that leads to unsatisfied
diff --git a/src/backend/access/transam/README b/src/backend/access/transam/README
index eb9aac5fd39..c06e52f9cd0 100644
--- a/src/backend/access/transam/README
+++ b/src/backend/access/transam/README
@@ -257,31 +257,31 @@ simultaneously, we have one backend take ProcArrayLock and clear the XIDs
 of multiple processes at once.)
 
 ProcArrayEndTransaction also holds the lock while advancing the shared
-latestCompletedXid variable.  This allows GetSnapshotData to use
-latestCompletedXid + 1 as xmax for its snapshot: there can be no
+latestCompletedFullXid variable.  This allows GetSnapshotData to use
+latestCompletedFullXid + 1 as xmax for its snapshot: there can be no
 transaction >= this xid value that the snapshot needs to consider as
 completed.
 
 In short, then, the rule is that no transaction may exit the set of
-currently-running transactions between the time we fetch latestCompletedXid
+currently-running transactions between the time we fetch latestCompletedFullXid
 and the time we finish building our snapshot.  However, this restriction
 only applies to transactions that have an XID --- read-only transactions
 can end without acquiring ProcArrayLock, since they don't affect anyone
-else's snapshot nor latestCompletedXid.
+else's snapshot nor latestCompletedFullXid.
 
 Transaction start, per se, doesn't have any interlocking with these
 considerations, since we no longer assign an XID immediately at transaction
 start.  But when we do decide to allocate an XID, GetNewTransactionId must
 store the new XID into the shared ProcArray before releasing XidGenLock.
-This ensures that all top-level XIDs <= latestCompletedXid are either
+This ensures that all top-level XIDs <= latestCompletedFullXid are either
 present in the ProcArray, or not running anymore.  (This guarantee doesn't
 apply to subtransaction XIDs, because of the possibility that there's not
 room for them in the subxid array; instead we guarantee that they are
 present or the overflow flag is set.)  If a backend released XidGenLock
 before storing its XID into MyPgXact, then it would be possible for another
-backend to allocate and commit a later XID, causing latestCompletedXid to
+backend to allocate and commit a later XID, causing latestCompletedFullXid to
 pass the first backend's XID, before that value became visible in the
-ProcArray.  That would break GetOldestXmin, as discussed below.
+ProcArray.  That would break ComputeXidHorizons, as discussed below.
 
 We allow GetNewTransactionId to store the XID into MyPgXact->xid (or the
 subxid array) without taking ProcArrayLock.  This was once necessary to
@@ -311,7 +311,7 @@ currently-active XIDs: no xact, in particular not the oldest, can exit
 while we hold shared ProcArrayLock.  So GetOldestXmin's view of the minimum
 active XID will be the same as that of any concurrent GetSnapshotData, and
 so it can't produce an overestimate.  If there is no active transaction at
-all, GetOldestXmin returns latestCompletedXid + 1, which is a lower bound
+all, GetOldestXmin returns latestCompletedFullXid + 1, which is a lower bound
 for the xmin that might be computed by concurrent or later GetSnapshotData
 calls.  (We know that no XID less than this could be about to appear in
 the ProcArray, because of the XidGenLock interlock discussed above.)
diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
index e14b53bf9e3..66eb74aa9f8 100644
--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
@@ -187,8 +187,8 @@ GetNewTransactionId(bool isSubXact)
 	/*
 	 * We must store the new XID into the shared ProcArray before releasing
 	 * XidGenLock.  This ensures that every active XID older than
-	 * latestCompletedXid is present in the ProcArray, which is essential for
-	 * correct OldestXmin tracking; see src/backend/access/transam/README.
+	 * latestCompletedFullXid is present in the ProcArray, which is essential
+	 * for correct OldestXmin tracking; see src/backend/access/transam/README.
 	 *
 	 * Note that readers of PGXACT xid fields should be careful to fetch the
 	 * value only once, rather than assume they can read a value multiple
@@ -566,3 +566,53 @@ GetNewObjectId(void)
 
 	return result;
 }
+
+
+#ifdef USE_ASSERT_CHECKING
+
+/*
+ * Assert that xid is between [oldestXid, nextFullXid], which is the range we
+ * expect XIDs coming from tables etc to be in.
+ *
+ * As ShmemVariableCache->oldestXid could change just after this call without
+ * further precautions, and as a wrapped-around xid could again fall within
+ * the valid range, this assertion can only detect if something is definitely
+ * wrong, but not establish correctness.
+ *
+ * This intentionally does not expose a return value, to avoid code being
+ * introduced that depends on the return value.
+ */
+void
+AssertTransactionIdInAllowableRange(TransactionId xid)
+{
+	TransactionId oldest_xid;
+	TransactionId next_xid;
+
+	Assert(TransactionIdIsValid(xid));
+
+	/* we may see bootstrap / frozen */
+	if (!TransactionIdIsNormal(xid))
+		return;
+
+	/*
+	 * We can't acquire XidGenLock, as this may be called with XidGenLock
+	 * already held (or with other locks that don't allow XidGenLock to be
+	 * nested). That's ok for our purposes though, since we already rely on
+	 * 32bit reads to be atomic. While nextFullXid is 64 bit, we only look at
+	 * the lower 32bit, so a skewed read doesn't hurt.
+	 *
+	 * There's no increased danger of falling outside [oldest, next] by
+	 * accessing them without a lock. xid needs to have been created with
+	 * GetNewTransactionId() in the originating session, and the locks there
+	 * pair with the memory barrier below.  We do however accept xid to be <=
+	 * to next_xid, instead of just <, as xid could be from the procarray,
+	 * before we see the updated nextFullXid value.
+	 */
+	pg_memory_barrier();
+	oldest_xid = ShmemVariableCache->oldestXid;
+	next_xid = XidFromFullTransactionId(ShmemVariableCache->nextFullXid);
+
+	Assert(TransactionIdFollowsOrEquals(xid, oldest_xid) ||
+		   TransactionIdPrecedesOrEquals(xid, next_xid));
+}
+#endif
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 184c6672f3b..e1a043763cf 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7866,10 +7866,11 @@ StartupXLOG(void)
 	XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
 	XLogCtl->lastSegSwitchLSN = EndOfLog;
 
-	/* also initialize latestCompletedXid, to nextXid - 1 */
+	/* also initialize latestCompletedFullXid, to nextFullXid - 1 */
 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-	ShmemVariableCache->latestCompletedXid = XidFromFullTransactionId(ShmemVariableCache->nextFullXid);
-	TransactionIdRetreat(ShmemVariableCache->latestCompletedXid);
+	ShmemVariableCache->latestCompletedFullXid =
+		ShmemVariableCache->nextFullXid;
+	FullTransactionIdRetreat(&ShmemVariableCache->latestCompletedFullXid);
 	LWLockRelease(ProcArrayLock);
 
 	/*
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index b4485335644..82798760752 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -175,6 +175,10 @@ static void KnownAssignedXidsReset(void);
 static inline void ProcArrayEndTransactionInternal(PGPROC *proc,
 												   PGXACT *pgxact, TransactionId latestXid);
 static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
+static void MaintainLatestCompletedXid(TransactionId latestXid);
+static void MaintainLatestCompletedXidRecovery(TransactionId latestXid);
+
+static inline FullTransactionId FullXidViaRelative(FullTransactionId rel, TransactionId xid);
 
 /*
  * Report shared-memory space needed by CreateSharedProcArray.
@@ -349,9 +353,7 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
 		Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
 
 		/* Advance global latestCompletedXid while holding the lock */
-		if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
-								  latestXid))
-			ShmemVariableCache->latestCompletedXid = latestXid;
+		MaintainLatestCompletedXid(latestXid);
 	}
 	else
 	{
@@ -464,9 +466,7 @@ ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact,
 	pgxact->overflowed = false;
 
 	/* Also advance global latestCompletedXid while holding the lock */
-	if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
-							  latestXid))
-		ShmemVariableCache->latestCompletedXid = latestXid;
+	MaintainLatestCompletedXid(latestXid);
 }
 
 /*
@@ -621,6 +621,59 @@ ProcArrayClearTransaction(PGPROC *proc)
 	pgxact->overflowed = false;
 }
 
+/*
+ * Update ShmemVariableCache->latestCompletedFullXid to point to latestXid if
+ * currently older.
+ */
+static void
+MaintainLatestCompletedXid(TransactionId latestXid)
+{
+	FullTransactionId cur_latest = ShmemVariableCache->latestCompletedFullXid;
+
+	Assert(FullTransactionIdIsValid(cur_latest));
+	Assert(!RecoveryInProgress());
+	Assert(LWLockHeldByMe(ProcArrayLock));
+
+	if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
+	{
+		ShmemVariableCache->latestCompletedFullXid =
+			FullXidViaRelative(cur_latest, latestXid);
+	}
+
+	Assert(IsBootstrapProcessingMode() ||
+		   FullTransactionIdIsNormal(ShmemVariableCache->latestCompletedFullXid));
+}
+
+/*
+ * Same as MaintainLatestCompletedXid, except for use during WAL replay.
+ */
+static void
+MaintainLatestCompletedXidRecovery(TransactionId latestXid)
+{
+	FullTransactionId cur_latest = ShmemVariableCache->latestCompletedFullXid;
+	FullTransactionId rel;
+
+	Assert(AmStartupProcess() || !IsUnderPostmaster);
+	Assert(LWLockHeldByMe(ProcArrayLock));
+
+	/*
+	 * Need a FullTransactionId to compare latestXid with. Can't rely on
+	 * latestCompletedFullXid to be initialized in recovery. But in recovery
+	 * it's safe to access nextFullXid without a lock for the startup process.
+	 */
+	rel = ShmemVariableCache->nextFullXid;
+	Assert(FullTransactionIdIsValid(ShmemVariableCache->nextFullXid));
+
+	if (!FullTransactionIdIsValid(cur_latest) ||
+		TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
+	{
+		ShmemVariableCache->latestCompletedFullXid =
+			FullXidViaRelative(rel, latestXid);
+	}
+
+	Assert(FullTransactionIdIsNormal(ShmemVariableCache->latestCompletedFullXid));
+}
+
 /*
  * ProcArrayInitRecovery -- initialize recovery xid mgmt environment
  *
@@ -841,7 +894,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
 	 * Now we've got the running xids we need to set the global values that
 	 * are used to track snapshots as they evolve further.
 	 *
-	 * - latestCompletedXid which will be the xmax for snapshots
+	 * - latestCompletedFullXid which will be the xmax for snapshots
 	 * - lastOverflowedXid which shows whether snapshots overflow
 	 * - nextXid
 	 *
@@ -867,14 +920,11 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
 
 	/*
 	 * If a transaction wrote a commit record in the gap between taking and
-	 * logging the snapshot then latestCompletedXid may already be higher than
-	 * the value from the snapshot, so check before we use the incoming value.
+	 * logging the snapshot then latestCompletedFullXid may already be higher
+	 * than the value from the snapshot, so check before we use the incoming
+	 * value. It also might not yet be set at all.
 	 */
-	if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
-							  running->latestCompletedXid))
-		ShmemVariableCache->latestCompletedXid = running->latestCompletedXid;
-
-	Assert(TransactionIdIsNormal(ShmemVariableCache->latestCompletedXid));
+	MaintainLatestCompletedXidRecovery(running->latestCompletedXid);
 
 	LWLockRelease(ProcArrayLock);
 
@@ -1048,10 +1098,11 @@ TransactionIdIsInProgress(TransactionId xid)
 	LWLockAcquire(ProcArrayLock, LW_SHARED);
 
 	/*
-	 * Now that we have the lock, we can check latestCompletedXid; if the
+	 * Now that we have the lock, we can check latestCompletedFullXid; if the
 	 * target Xid is after that, it's surely still running.
 	 */
-	if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, xid))
+	if (TransactionIdPrecedes(XidFromFullTransactionId(ShmemVariableCache->latestCompletedFullXid),
+							  xid))
 	{
 		LWLockRelease(ProcArrayLock);
 		xc_by_latest_xid_inc();
@@ -1283,7 +1334,7 @@ TransactionIdIsActive(TransactionId xid)
  * anything older is definitely not considered as running by anyone anymore,
  * but the exact value calculated depends on a number of things. For example,
  * if rel = NULL and there are no transactions running in the current
- * database, GetOldestXmin() returns latestCompletedXid. If a transaction
+ * database, GetOldestXmin() returns latestCompletedFullXid. If a transaction
  * begins after that, its xmin will include in-progress transactions in other
  * databases that started earlier, so another call will return a lower value.
  * Nonetheless it is safe to vacuum a table in the current database with the
@@ -1325,14 +1376,14 @@ GetOldestXmin(Relation rel, int flags)
 	LWLockAcquire(ProcArrayLock, LW_SHARED);
 
 	/*
-	 * We initialize the MIN() calculation with latestCompletedXid + 1. This
-	 * is a lower bound for the XIDs that might appear in the ProcArray later,
-	 * and so protects us against overestimating the result due to future
-	 * additions.
+	 * We initialize the MIN() calculation with latestCompletedFullXid + 1.
+	 * This is a lower bound for the XIDs that might appear in the ProcArray
+	 * later, and so protects us against overestimating the result due to
+	 * future additions.
 	 */
-	result = ShmemVariableCache->latestCompletedXid;
-	Assert(TransactionIdIsNormal(result));
+	result = XidFromFullTransactionId(ShmemVariableCache->latestCompletedFullXid);
 	TransactionIdAdvance(result);
+	Assert(TransactionIdIsNormal(result));
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
@@ -1511,6 +1562,7 @@ GetSnapshotData(Snapshot snapshot)
 	int			count = 0;
 	int			subcount = 0;
 	bool		suboverflowed = false;
+	FullTransactionId latest_completed;
 	TransactionId replication_slot_xmin = InvalidTransactionId;
 	TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
 
@@ -1554,10 +1606,11 @@ GetSnapshotData(Snapshot snapshot)
 	 */
 	LWLockAcquire(ProcArrayLock, LW_SHARED);
 
-	/* xmax is always latestCompletedXid + 1 */
-	xmax = ShmemVariableCache->latestCompletedXid;
-	Assert(TransactionIdIsNormal(xmax));
+	latest_completed = ShmemVariableCache->latestCompletedFullXid;
+	/* xmax is always latestCompletedFullXid + 1 */
+	xmax = XidFromFullTransactionId(latest_completed);
 	TransactionIdAdvance(xmax);
+	Assert(TransactionIdIsNormal(xmax));
 
 	/* initialize xmin calculation with xmax */
 	globalxmin = xmin = xmax;
@@ -1984,9 +2037,10 @@ GetRunningTransactionData(void)
 	LWLockAcquire(ProcArrayLock, LW_SHARED);
 	LWLockAcquire(XidGenLock, LW_SHARED);
 
-	latestCompletedXid = ShmemVariableCache->latestCompletedXid;
-
-	oldestRunningXid = XidFromFullTransactionId(ShmemVariableCache->nextFullXid);
+	latestCompletedXid =
+		XidFromFullTransactionId(ShmemVariableCache->latestCompletedFullXid);
+	oldestRunningXid =
+		XidFromFullTransactionId(ShmemVariableCache->nextFullXid);
 
 	/*
 	 * Spin over procArray collecting all xids
@@ -3206,10 +3260,8 @@ XidCacheRemoveRunningXids(TransactionId xid,
 	if (j < 0 && !MyPgXact->overflowed)
 		elog(WARNING, "did not find subXID %u in MyProc", xid);
 
-	/* Also advance global latestCompletedXid while holding the lock */
-	if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
-							  latestXid))
-		ShmemVariableCache->latestCompletedXid = latestXid;
+	/* Also advance global latestCompletedFullXid while holding the lock */
+	MaintainLatestCompletedXid(latestXid);
 
 	LWLockRelease(ProcArrayLock);
 }
@@ -3236,6 +3288,32 @@ DisplayXidCache(void)
 }
 #endif							/* XIDCACHE_DEBUG */
 
+/*
+ * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it
+ * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel).
+ *
+ * Be very careful about when to use this function. It can only safely be used
+ * when there is a guarantee that xid is within MaxTransactionId / 2 xids of
+ * rel. That e.g. can be guaranteed if the the caller assures a snapshot is
+ * held by the backend and xid is from a table (where vacuum/freezing ensures
+ * the xid has to be within that range), or if xid is from the procarray and
+ * prevents xid wraparound that way.
+ */
+static inline FullTransactionId
+FullXidViaRelative(FullTransactionId rel, TransactionId xid)
+{
+	TransactionId rel_xid = XidFromFullTransactionId(rel);
+
+	Assert(TransactionIdIsValid(xid));
+	Assert(TransactionIdIsValid(rel_xid));
+
+	/* not guaranteed to find issues, but likely to catch mistakes */
+	AssertTransactionIdInAllowableRange(xid);
+
+	return FullTransactionIdFromU64(U64FromFullTransactionId(rel)
+									+ (int32) (xid - rel_xid));
+}
+
 
 /* ----------------------------------------------
  *		KnownAssignedTransactionIds sub-module
@@ -3387,10 +3465,8 @@ ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids,
 
 	KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
 
-	/* As in ProcArrayEndTransaction, advance latestCompletedXid */
-	if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
-							  max_xid))
-		ShmemVariableCache->latestCompletedXid = max_xid;
+	/* As in ProcArrayEndTransaction, advance latestCompletedFullXid */
+	MaintainLatestCompletedXidRecovery(max_xid);
 
 	LWLockRelease(ProcArrayLock);
 }
-- 
2.25.0.114.g5b0ca878e0

