From 6b66d743b5113fe504260b6a230ae23d002c4d66 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Wed, 11 Dec 2024 23:59:49 +0200
Subject: [PATCH 4/5] WIP: Add checks that no snapshots are "leaked"

GetTransactionSnapshot() and friends currently return a pointer to a
statically allocated SnapshotData. That makes it OK to call
GetTransactionSnapshot() and throw away the result, without leaking
the snapshot. The comment on GetTransactionSnapshot() said that you
"the returned value *may* point to static storage", but we were
actually relying on that in a few places, to not leak.

This adds an assertion that every call to GetTransactionSnapshot() is
paired with a call to PushActiveSnapshot() or RegisterSnapshot(). With
this, GetTransactionSnapshot() coult return a dynamically allocated
Snapshot without leaking.
---
 contrib/amcheck/verify_heapam.c        | 16 +++++++----
 src/backend/access/transam/parallel.c  |  4 ++-
 src/backend/executor/execReplication.c |  6 ++--
 src/backend/executor/spi.c             |  3 +-
 src/backend/storage/lmgr/predicate.c   |  7 +++--
 src/backend/tcop/pquery.c              | 24 ++++++++--------
 src/backend/utils/adt/ri_triggers.c    |  9 ++++--
 src/backend/utils/time/snapmgr.c       | 39 +++++++++++++++++++++++++-
 8 files changed, 81 insertions(+), 27 deletions(-)

diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c
index e16557aca36..b0412e3064e 100644
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -231,6 +231,7 @@ verify_heapam(PG_FUNCTION_ARGS)
 	BlockNumber last_block;
 	BlockNumber nblocks;
 	const char *skip;
+	Snapshot	snapshot;
 
 	/* Check supplied arguments */
 	if (PG_ARGISNULL(0))
@@ -272,12 +273,6 @@ verify_heapam(PG_FUNCTION_ARGS)
 	ctx.cached_xid = InvalidTransactionId;
 	ctx.toasted_attributes = NIL;
 
-	/*
-	 * Any xmin newer than the xmin of our snapshot can't become all-visible
-	 * while we're running.
-	 */
-	ctx.safe_xmin = GetTransactionSnapshot()->xmin;
-
 	/*
 	 * If we report corruption when not examining some individual attribute,
 	 * we need attnum to be reported as NULL.  Set that up before any
@@ -338,6 +333,13 @@ verify_heapam(PG_FUNCTION_ARGS)
 		PG_RETURN_NULL();
 	}
 
+	/*
+	 * Any xmin newer than the xmin of our snapshot can't become all-visible
+	 * while we're running.
+	 */
+	snapshot = RegisterSnapshot(GetTransactionSnapshot());
+	ctx.safe_xmin = snapshot->xmin;
+
 	ctx.bstrategy = GetAccessStrategy(BAS_BULKREAD);
 	ctx.buffer = InvalidBuffer;
 	ctx.page = NULL;
@@ -802,6 +804,8 @@ verify_heapam(PG_FUNCTION_ARGS)
 	if (vmbuffer != InvalidBuffer)
 		ReleaseBuffer(vmbuffer);
 
+	UnregisterSnapshot(snapshot);
+
 	/* Close the associated toast table and indexes, if any. */
 	if (ctx.toast_indexes)
 		toast_close_indexes(ctx.toast_indexes, ctx.num_toast_indexes,
diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c
index 0a1e089ec1d..7d6d9636e7e 100644
--- a/src/backend/access/transam/parallel.c
+++ b/src/backend/access/transam/parallel.c
@@ -222,7 +222,7 @@ InitializeParallelDSM(ParallelContext *pcxt)
 	int			i;
 	FixedParallelState *fps;
 	dsm_handle	session_dsm_handle = DSM_HANDLE_INVALID;
-	Snapshot	transaction_snapshot = GetTransactionSnapshot();
+	Snapshot	transaction_snapshot = RegisterSnapshot(GetTransactionSnapshot());
 	Snapshot	active_snapshot = GetActiveSnapshot();
 
 	/* We might be running in a very short-lived memory context. */
@@ -494,6 +494,8 @@ InitializeParallelDSM(ParallelContext *pcxt)
 
 	/* Restore previous memory context. */
 	MemoryContextSwitchTo(oldcontext);
+
+	UnregisterSnapshot(transaction_snapshot);
 }
 
 /*
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 68deea50f66..5a1efe9c3a7 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -285,7 +285,7 @@ retry:
 
 		PushActiveSnapshot(GetLatestSnapshot());
 
-		res = table_tuple_lock(rel, &(outslot->tts_tid), GetLatestSnapshot(),
+		res = table_tuple_lock(rel, &(outslot->tts_tid), GetActiveSnapshot(),
 							   outslot,
 							   GetCurrentCommandId(false),
 							   lockmode,
@@ -443,7 +443,7 @@ retry:
 
 		PushActiveSnapshot(GetLatestSnapshot());
 
-		res = table_tuple_lock(rel, &(outslot->tts_tid), GetLatestSnapshot(),
+		res = table_tuple_lock(rel, &(outslot->tts_tid), GetActiveSnapshot(),
 							   outslot,
 							   GetCurrentCommandId(false),
 							   lockmode,
@@ -500,7 +500,7 @@ retry:
 
 	PushActiveSnapshot(GetLatestSnapshot());
 
-	res = table_tuple_lock(rel, &conflictTid, GetLatestSnapshot(),
+	res = table_tuple_lock(rel, &conflictTid, GetActiveSnapshot(),
 						   *conflictslot,
 						   GetCurrentCommandId(false),
 						   LockTupleShare,
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index c1d8fd08c6c..0cc83de5870 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -1752,7 +1752,8 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan,
 	else
 	{
 		CommandCounterIncrement();
-		snapshot = GetTransactionSnapshot();
+		/* let PortalStart call GetTransactionSnapshot() */
+		snapshot = InvalidSnapshot;
 	}
 
 	/*
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index 2030322f957..a365a4359d2 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -3950,6 +3950,8 @@ ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial,
 	LWLockRelease(SerializableXactHashLock);
 }
 
+extern List *dangling_snapshots;
+
 /*
  * Tests whether the given top level transaction is concurrent with
  * (overlaps) our current transaction.
@@ -3967,6 +3969,7 @@ XidIsConcurrent(TransactionId xid)
 	Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny()));
 
 	snap = GetTransactionSnapshot();
+	dangling_snapshots = list_delete_ptr(dangling_snapshots, snap);
 
 	if (TransactionIdPrecedes(xid, snap->xmin))
 		return false;
@@ -4214,7 +4217,7 @@ CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag)
 		}
 		else if (!SxactIsDoomed(sxact)
 				 && (!SxactIsCommitted(sxact)
-					 || TransactionIdPrecedes(GetTransactionSnapshot()->xmin,
+					 || TransactionIdPrecedes(TransactionXmin,
 											  sxact->finishedBefore))
 				 && !RWConflictExists(sxact, MySerializableXact))
 		{
@@ -4227,7 +4230,7 @@ CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag)
 			 */
 			if (!SxactIsDoomed(sxact)
 				&& (!SxactIsCommitted(sxact)
-					|| TransactionIdPrecedes(GetTransactionSnapshot()->xmin,
+					|| TransactionIdPrecedes(TransactionXmin,
 											 sxact->finishedBefore))
 				&& !RWConflictExists(sxact, MySerializableXact))
 			{
diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c
index 89d704df8d1..4ce8417f63b 100644
--- a/src/backend/tcop/pquery.c
+++ b/src/backend/tcop/pquery.c
@@ -1242,18 +1242,20 @@ PortalRunMulti(Portal portal,
 				{
 					snapshot = RegisterSnapshot(snapshot);
 					portal->holdSnapshot = snapshot;
-				}
 
-				/*
-				 * We can't have the holdSnapshot also be the active one,
-				 * because UpdateActiveSnapshotCommandId would complain.  So
-				 * force an extra snapshot copy.  Plain PushActiveSnapshot
-				 * would have copied the transaction snapshot anyway, so this
-				 * only adds a copy step when setHoldSnapshot is true.  (It's
-				 * okay for the command ID of the active snapshot to diverge
-				 * from what holdSnapshot has.)
-				 */
-				PushCopiedSnapshot(snapshot);
+					/* XXX
+					 * We can't have the holdSnapshot also be the active one,
+					 * because UpdateActiveSnapshotCommandId would complain.  So
+					 * force an extra snapshot copy.  Plain PushActiveSnapshot
+					 * would have copied the transaction snapshot anyway, so this
+					 * only adds a copy step when setHoldSnapshot is true.  (It's
+					 * okay for the command ID of the active snapshot to diverge
+					 * from what holdSnapshot has.)
+					 */
+					PushCopiedSnapshot(snapshot);
+				}
+				else
+					PushActiveSnapshot(snapshot);
 
 				/*
 				 * As for PORTAL_ONE_SELECT portals, it does not seem
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index 3185f48afa6..bb6fa20ee03 100644
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -2465,8 +2465,8 @@ ri_PerformCheck(const RI_ConstraintInfo *riinfo,
 	if (IsolationUsesXactSnapshot() && detectNewRows)
 	{
 		CommandCounterIncrement();	/* be sure all my own work is visible */
-		test_snapshot = GetLatestSnapshot();
-		crosscheck_snapshot = GetTransactionSnapshot();
+		test_snapshot = RegisterSnapshot(GetLatestSnapshot());
+		crosscheck_snapshot = RegisterSnapshot(GetTransactionSnapshot());
 	}
 	else
 	{
@@ -2495,6 +2495,11 @@ ri_PerformCheck(const RI_ConstraintInfo *riinfo,
 									  test_snapshot, crosscheck_snapshot,
 									  false, false, limit);
 
+	if (test_snapshot != NULL)
+		UnregisterSnapshot(test_snapshot);
+	if (crosscheck_snapshot != NULL)
+		UnregisterSnapshot(crosscheck_snapshot);
+
 	/* Restore UID and security context */
 	SetUserIdAndSecContext(save_userid, save_sec_context);
 
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 05f16666192..f4831ed989c 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -254,6 +254,20 @@ typedef struct SerializedSnapshotData
 	CommandId	curcid;
 } SerializedSnapshotData;
 
+List *dangling_snapshots = NIL;
+
+static inline Snapshot
+add_dangling(Snapshot snapshot)
+{
+	MemoryContext save_cxt = CurrentMemoryContext;
+
+	MemoryContextSwitchTo(TopMemoryContext);
+	dangling_snapshots = lappend(dangling_snapshots, snapshot);
+	MemoryContextSwitchTo(save_cxt);
+	return snapshot;
+}
+
+
 /*
  * GetTransactionSnapshot
  *		Get the appropriate snapshot for a new query in a transaction.
@@ -275,6 +289,7 @@ GetTransactionSnapshot(void)
 	if (HistoricSnapshotActive())
 	{
 		Assert(!FirstSnapshotSet);
+		add_dangling(HistoricSnapshot);
 		return HistoricSnapshot;
 	}
 
@@ -319,17 +334,22 @@ GetTransactionSnapshot(void)
 			CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
 
 		FirstSnapshotSet = true;
+		add_dangling(CurrentSnapshot);
 		return CurrentSnapshot;
 	}
 
 	if (IsolationUsesXactSnapshot())
+	{
+		add_dangling(CurrentSnapshot);
 		return CurrentSnapshot;
+	}
 
 	/* Don't allow catalog snapshot to be older than xact snapshot. */
 	InvalidateCatalogSnapshot();
 
 	CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
 
+	add_dangling(CurrentSnapshot);
 	return CurrentSnapshot;
 }
 
@@ -357,10 +377,12 @@ GetLatestSnapshot(void)
 
 	/* If first call in transaction, go ahead and set the xact snapshot */
 	if (!FirstSnapshotSet)
+	{
 		return GetTransactionSnapshot();
+	}
 
 	SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
-
+	add_dangling(SecondarySnapshot);
 	return SecondarySnapshot;
 }
 
@@ -380,7 +402,10 @@ GetCatalogSnapshot(Oid relid)
 	 * finishing decoding.
 	 */
 	if (HistoricSnapshotActive())
+	{
+		add_dangling(HistoricSnapshot);
 		return HistoricSnapshot;
+	}
 
 	return GetNonHistoricCatalogSnapshot(relid);
 }
@@ -426,6 +451,7 @@ GetNonHistoricCatalogSnapshot(Oid relid)
 		pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
 	}
 
+	add_dangling(CatalogSnapshot);
 	return CatalogSnapshot;
 }
 
@@ -684,6 +710,8 @@ PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level)
 {
 	ActiveSnapshotElt *newactive;
 
+	dangling_snapshots = list_delete_ptr(dangling_snapshots, snapshot);
+
 	Assert(snapshot != InvalidSnapshot);
 	Assert(ActiveSnapshot == NULL || snap_level >= ActiveSnapshot->as_level);
 
@@ -695,7 +723,9 @@ PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level)
 	 */
 	if (snapshot == CurrentSnapshot || snapshot == SecondarySnapshot ||
 		!snapshot->copied)
+	{
 		newactive->as_snap = CopySnapshot(snapshot);
+	}
 	else
 		newactive->as_snap = snapshot;
 
@@ -828,6 +858,8 @@ RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
 	if (snapshot == InvalidSnapshot)
 		return InvalidSnapshot;
 
+	dangling_snapshots = list_delete_ptr(dangling_snapshots, snapshot);
+
 	/* Static snapshot?  Create a persistent copy */
 	snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
 
@@ -1019,6 +1051,11 @@ AtEOXact_Snapshot(bool isCommit, bool resetXmin)
 	}
 	FirstXactSnapshot = NULL;
 
+	foreach_ptr (Snapshot, snapshot, dangling_snapshots)
+	{
+		elog(PANIC, "had a dangling snapshot %p", snapshot);
+	}
+
 	/*
 	 * If we exported any snapshots, clean them up.
 	 */
-- 
2.39.5

