From f6d9c033a1e686b1600a435c857e728f98901997 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Fri, 13 Dec 2024 14:26:07 +0200
Subject: [PATCH 2/5] Assert that a snapshot is active or registered before
 it's used

This is to catch potential bugs where a snapshot might get invalidated
while it's still in use. No such bugs were found by this, but the
advice in GetTransactionSnapshot() that you "should call
RegisterSnapshot or PushActiveSnapshot on the returned snap if it is
to be used very long" felt too unclear to me.

Fix a few cases that were playing fast and loose with that and just
assumed that the snapshot cannot be invalidated during a scan. Those
assumptions were not wrong, but they're not performance critical, so
let's drop the excuses and just register the snapshot. This allows us
to have an assertion in HeapTupleSatisfiesMVCC that the snapshot is
appropriately registered.

Adjust the comment in GetTransactionSnapshot() a little, because in a
few places we rely on the fact that GetTransactionSnapshot() returns a
statically allocated Snapshot; we'd have leaks otherwise.
---
 src/backend/access/heap/heapam_visibility.c |  9 +++++++++
 src/backend/access/index/genam.c            |  8 ++------
 src/backend/commands/dbcommands.c           |  3 ++-
 src/backend/utils/cache/relcache.c          | 15 +++++++++------
 src/backend/utils/time/snapmgr.c            |  8 ++++----
 5 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c
index 9243feed01f..6c0f872c730 100644
--- a/src/backend/access/heap/heapam_visibility.c
+++ b/src/backend/access/heap/heapam_visibility.c
@@ -962,6 +962,15 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
 {
 	HeapTupleHeader tuple = htup->t_data;
 
+	/*
+	 * Assert that the caller has registered the snapshot. This function
+	 * doesn't care about the registration as such, but in general you
+	 * shouldn't try to use a snapshot without registration because it might
+	 * get invalidated while it's still in use, and this is a convenient place
+	 * to check for that.
+	 */
+	Assert(snapshot->regd_count > 0 || snapshot->active_count > 0);
+
 	Assert(ItemPointerIsValid(&htup->t_self));
 	Assert(htup->t_tableOid != InvalidOid);
 
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index 4b4ebff6a17..b12bb6c8d70 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -576,17 +576,13 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
 
 	Assert(tup == ExecFetchSlotHeapTuple(sysscan->slot, false, NULL));
 
-	/*
-	 * Trust that table_tuple_satisfies_snapshot() and its subsidiaries
-	 * (commonly LockBuffer() and HeapTupleSatisfiesMVCC()) do not themselves
-	 * acquire snapshots, so we need not register the snapshot.  Those
-	 * facilities are too low-level to have any business scanning tables.
-	 */
 	freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel));
+	freshsnap = RegisterSnapshot(freshsnap);
 
 	result = table_tuple_satisfies_snapshot(sysscan->heap_rel,
 											sysscan->slot,
 											freshsnap);
+	UnregisterSnapshot(freshsnap);
 
 	/*
 	 * Handle the concurrent abort while fetching the catalog tuple during
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index aa91a396967..034c5938c66 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -288,7 +288,7 @@ ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath)
 	 * snapshot - or the active snapshot - might not be new enough for that,
 	 * but the return value of GetLatestSnapshot() should work fine.
 	 */
-	snapshot = GetLatestSnapshot();
+	snapshot = RegisterSnapshot(GetLatestSnapshot());
 
 	/* Process the relation block by block. */
 	for (blkno = 0; blkno < nblocks; blkno++)
@@ -313,6 +313,7 @@ ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath)
 
 		UnlockReleaseBuffer(buf);
 	}
+	UnregisterSnapshot(snapshot);
 
 	/* Release relation lock. */
 	UnlockRelationId(&relid, AccessShareLock);
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 422509f18d7..dc299ccb760 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -371,14 +371,13 @@ ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
 	pg_class_desc = table_open(RelationRelationId, AccessShareLock);
 
 	/*
-	 * The caller might need a tuple that's newer than the one the historic
-	 * snapshot; currently the only case requiring to do so is looking up the
-	 * relfilenumber of non mapped system relations during decoding. That
-	 * snapshot can't change in the midst of a relcache build, so there's no
-	 * need to register the snapshot.
+	 * The caller might need a tuple that's newer than what's visible to the
+	 * historic snapshot; currently the only case requiring to do so is
+	 * looking up the relfilenumber of non mapped system relations during
+	 * decoding.
 	 */
 	if (force_non_historic)
-		snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId);
+		snapshot = RegisterSnapshot(GetNonHistoricCatalogSnapshot(RelationRelationId));
 
 	pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
 									   indexOK && criticalRelcachesBuilt,
@@ -395,6 +394,10 @@ ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
 
 	/* all done */
 	systable_endscan(pg_class_scan);
+
+	if (snapshot)
+		UnregisterSnapshot(snapshot);
+
 	table_close(pg_class_desc, AccessShareLock);
 
 	return pg_class_tuple;
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index a1a0c2adeb6..3c408762728 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -203,10 +203,10 @@ typedef struct SerializedSnapshotData
  * GetTransactionSnapshot
  *		Get the appropriate snapshot for a new query in a transaction.
  *
- * Note that the return value may point at static storage that will be modified
- * by future calls and by CommandCounterIncrement().  Callers should call
- * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be
- * used very long.
+ * Note that the return value points at static storage that will be modified
+ * by future calls and by CommandCounterIncrement().  Callers must call
+ * RegisterSnapshot or PushActiveSnapshot on the returned snap before doing
+ * any other non-trivial work that could invalidate it.
  */
 Snapshot
 GetTransactionSnapshot(void)
-- 
2.39.5

