From bd4f9ab115cf3f881f4d780b03c102e550238e6d Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Mon, 6 Apr 2020 21:28:55 -0700
Subject: [PATCH v7 11/11] snapshot scalability: cache snapshots using a xact
 completion counter.

---
 src/include/access/transam.h                |   8 ++
 src/include/utils/snapshot.h                |   7 ++
 src/backend/replication/logical/snapbuild.c |   1 +
 src/backend/storage/ipc/procarray.c         | 111 ++++++++++++++++----
 src/backend/utils/time/snapmgr.c            |   4 +
 5 files changed, 109 insertions(+), 22 deletions(-)

diff --git a/src/include/access/transam.h b/src/include/access/transam.h
index 924e5fa724e..73ed8c25dff 100644
--- a/src/include/access/transam.h
+++ b/src/include/access/transam.h
@@ -211,6 +211,14 @@ typedef struct VariableCacheData
 	FullTransactionId latestCompletedFullXid;	/* newest full XID that has
 												 * committed or aborted */
 
+	/*
+	 * Number of top-level transactions that completed in some form since the
+	 * start of the server. This currently is solely used to check whether
+	 * GetSnapshotData() needs to recompute the contents of the snapshot, or
+	 * not. There are likely other users of this.  Always above 1.
+	 */
+	uint64 xactCompletionCount;
+
 	/*
 	 * These fields are protected by CLogTruncationLock
 	 */
diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h
index 2bc415376ac..dc37798fe9e 100644
--- a/src/include/utils/snapshot.h
+++ b/src/include/utils/snapshot.h
@@ -207,6 +207,13 @@ typedef struct SnapshotData
 
 	TimestampTz whenTaken;		/* timestamp when snapshot was taken */
 	XLogRecPtr	lsn;			/* position in the WAL stream when taken */
+
+	/*
+	 * The transaction completion count at the time GetSnapshotData() built
+	 * this snapshot. Allows to avoid re-computing static snapshots when no
+	 * transactions completed since the last GetSnapshotData()..
+	 */
+	uint64		snapXactCompletionCount;
 } SnapshotData;
 
 #endif							/* SNAPSHOT_H */
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index e9701ea7221..9d5d68f3fa7 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -524,6 +524,7 @@ SnapBuildBuildSnapshot(SnapBuild *builder)
 	snapshot->curcid = FirstCommandId;
 	snapshot->active_count = 0;
 	snapshot->regd_count = 0;
+	snapshot->snapXactCompletionCount = 0;
 
 	return snapshot;
 }
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 92ed8d20519..7bd847d70d9 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -365,6 +365,7 @@ CreateSharedProcArray(void)
 		procArray->lastOverflowedXid = InvalidTransactionId;
 		procArray->replication_slot_xmin = InvalidTransactionId;
 		procArray->replication_slot_catalog_xmin = InvalidTransactionId;
+		ShmemVariableCache->xactCompletionCount = 1;
 	}
 
 	allProcs = ProcGlobal->allProcs;
@@ -499,6 +500,9 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
 		/* Advance global latestCompletedXid while holding the lock */
 		MaintainLatestCompletedXid(latestXid);
 
+		/* Same with CSN */
+		ShmemVariableCache->xactCompletionCount++;
+
 		ProcGlobal->xids[proc->pgxactoff] = 0;
 		ProcGlobal->subxidStates[proc->pgxactoff].overflowed = false;
 		ProcGlobal->subxidStates[proc->pgxactoff].count = 0;
@@ -631,6 +635,7 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
 {
 	size_t		pgxactoff = proc->pgxactoff;
 
+	Assert(LWLockHeldByMe(ProcArrayLock));
 	Assert(TransactionIdIsValid(ProcGlobal->xids[pgxactoff]));
 	Assert(ProcGlobal->xids[pgxactoff] == proc->xidCopy);
 
@@ -662,6 +667,9 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
 
 	/* Also advance global latestCompletedXid while holding the lock */
 	MaintainLatestCompletedXid(latestXid);
+
+	/* Same with CSN */
+	ShmemVariableCache->xactCompletionCount++;
 }
 
 /*
@@ -1826,6 +1834,77 @@ GetMaxSnapshotSubxidCount(void)
 	return TOTAL_MAX_CACHED_SUBXIDS;
 }
 
+static void
+GetSnapshotDataFillTooOld(Snapshot snapshot)
+{
+	if (old_snapshot_threshold < 0)
+	{
+		/*
+		 * If not using "snapshot too old" feature, fill related fields with
+		 * dummy values that don't require any locking.
+		 */
+		snapshot->lsn = InvalidXLogRecPtr;
+		snapshot->whenTaken = 0;
+	}
+	else
+	{
+		/*
+		 * Capture the current time and WAL stream location in case this
+		 * snapshot becomes old enough to need to fall back on the special
+		 * "old snapshot" logic.
+		 */
+		snapshot->lsn = GetXLogInsertRecPtr();
+		snapshot->whenTaken = GetSnapshotCurrentTimestamp();
+		MaintainOldSnapshotTimeMapping(snapshot->whenTaken, snapshot->xmin);
+	}
+}
+
+/*
+ * Helper function for GetSnapshotData() that check if the bulk of the
+ * visibility information in the snapshot is still valid. If so, it updates
+ * the fields that need to change and returns true. false is returned
+ * otherwise.
+ *
+ * This very likely can be evolved to not need ProcArrayLock held (at very
+ * least in the case we already hold a snapshot), but that's for another day.
+ */
+static bool
+GetSnapshotDataReuse(Snapshot snapshot)
+{
+	uint64 curXactCompletionCount;
+
+	Assert(LWLockHeldByMe(ProcArrayLock));
+
+	if (unlikely(snapshot->snapXactCompletionCount == 0))
+		return false;
+
+	curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
+	if (curXactCompletionCount != snapshot->snapXactCompletionCount)
+		return false;
+
+	/*
+	 * It is safe to re-enter the snapshot's xmin. This can't cause xmin to go
+	 * backwards, as ProcArrayLock prevents concurrent commits of transactions
+	 * with xids, and the completion count check ensures we'd have gotten the
+	 * same result computing the snapshot the hard way (as only running xids
+	 * matter).
+	 */
+	if (!TransactionIdIsValid(MyProc->xmin))
+		MyProc->xmin = TransactionXmin = snapshot->xmin;
+
+	RecentXmin = snapshot->xmin;
+	Assert(TransactionIdPrecedesOrEquals(TransactionXmin, RecentXmin));
+
+	snapshot->curcid = GetCurrentCommandId(false);
+	snapshot->active_count = 0;
+	snapshot->regd_count = 0;
+	snapshot->copied = false;
+
+	GetSnapshotDataFillTooOld(snapshot);
+
+	return true;
+}
+
 /*
  * GetSnapshotData -- returns information about running transactions.
  *
@@ -1873,7 +1952,7 @@ GetSnapshotData(Snapshot snapshot)
 	TransactionId oldestxid;
 	int			mypgxactoff;
 	TransactionId myxid;
-
+	uint64		curXactCompletionCount;
 	TransactionId replication_slot_xmin = InvalidTransactionId;
 	TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
 
@@ -1917,12 +1996,19 @@ GetSnapshotData(Snapshot snapshot)
 	 */
 	LWLockAcquire(ProcArrayLock, LW_SHARED);
 
+	if (GetSnapshotDataReuse(snapshot))
+	{
+		LWLockRelease(ProcArrayLock);
+		return snapshot;
+	}
+
 	latest_completed = ShmemVariableCache->latestCompletedFullXid;
 	mypgxactoff = MyProc->pgxactoff;
 	myxid = other_xids[mypgxactoff];
 	Assert(myxid == MyProc->xidCopy);
 
 	oldestxid = ShmemVariableCache->oldestXid;
+	curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
 
 	/* xmax is always latestCompletedXid + 1 */
 	xmax = XidFromFullTransactionId(latest_completed);
@@ -2179,7 +2265,7 @@ GetSnapshotData(Snapshot snapshot)
 	snapshot->xcnt = count;
 	snapshot->subxcnt = subcount;
 	snapshot->suboverflowed = suboverflowed;
-
+	snapshot->snapXactCompletionCount = curXactCompletionCount;
 	snapshot->curcid = GetCurrentCommandId(false);
 
 	/*
@@ -2190,26 +2276,7 @@ GetSnapshotData(Snapshot snapshot)
 	snapshot->regd_count = 0;
 	snapshot->copied = false;
 
-	if (old_snapshot_threshold < 0)
-	{
-		/*
-		 * If not using "snapshot too old" feature, fill related fields with
-		 * dummy values that don't require any locking.
-		 */
-		snapshot->lsn = InvalidXLogRecPtr;
-		snapshot->whenTaken = 0;
-	}
-	else
-	{
-		/*
-		 * Capture the current time and WAL stream location in case this
-		 * snapshot becomes old enough to need to fall back on the special
-		 * "old snapshot" logic.
-		 */
-		snapshot->lsn = GetXLogInsertRecPtr();
-		snapshot->whenTaken = GetSnapshotCurrentTimestamp();
-		MaintainOldSnapshotTimeMapping(snapshot->whenTaken, xmin);
-	}
+	GetSnapshotDataFillTooOld(snapshot);
 
 	return snapshot;
 }
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 01f1c133014..62e5d747d64 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -597,6 +597,8 @@ SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
 	CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
 	/* NB: curcid should NOT be copied, it's a local matter */
 
+	CurrentSnapshot->snapXactCompletionCount = 0;
+
 	/*
 	 * Now we have to fix what GetSnapshotData did with MyProc->xmin and
 	 * TransactionXmin.  There is a race condition: to make sure we are not
@@ -672,6 +674,7 @@ CopySnapshot(Snapshot snapshot)
 	newsnap->regd_count = 0;
 	newsnap->active_count = 0;
 	newsnap->copied = true;
+	newsnap->snapXactCompletionCount = 0;
 
 	/* setup XID array */
 	if (snapshot->xcnt > 0)
@@ -2224,6 +2227,7 @@ RestoreSnapshot(char *start_address)
 	snapshot->curcid = serialized_snapshot.curcid;
 	snapshot->whenTaken = serialized_snapshot.whenTaken;
 	snapshot->lsn = serialized_snapshot.lsn;
+	snapshot->snapXactCompletionCount = 0;
 
 	/* Copy XIDs, if present. */
 	if (serialized_snapshot.xcnt > 0)
-- 
2.25.0.114.g5b0ca878e0

