From 3228848876610c7b13216ffca6b42a9f5465e300 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Thu, 13 Mar 2025 16:45:12 +0200
Subject: [PATCH v6 02/12] Simplify historic snapshot refcounting

ReorderBufferProcessTXN() handled "copied" snapshots created with
ReorderBufferCopySnap() differently from "base" historic snapshots
created by snapbuild.c. The base snapshots used a reference count,
while copied snapshots did not. Simplify by using the reference count
for both.
---
 .../replication/logical/reorderbuffer.c       | 97 ++++++++-----------
 src/backend/replication/logical/snapbuild.c   | 48 +--------
 src/include/replication/snapbuild.h           |  1 +
 src/include/utils/snapshot.h                  |  2 -
 4 files changed, 46 insertions(+), 102 deletions(-)

diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index e8196a8d5d5..e47970f1c82 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -103,7 +103,7 @@
 #include "replication/logical.h"
 #include "replication/reorderbuffer.h"
 #include "replication/slot.h"
-#include "replication/snapbuild.h"	/* just for SnapBuildSnapDecRefcount */
+#include "replication/snapbuild.h"
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
 #include "storage/procarray.h"
@@ -268,7 +268,6 @@ static void ReorderBufferSerializedPath(char *path, ReplicationSlot *slot,
 										TransactionId xid, XLogSegNo segno);
 static int	ReorderBufferTXNSizeCompare(const pairingheap_node *a, const pairingheap_node *b, void *arg);
 
-static void ReorderBufferFreeSnap(ReorderBuffer *rb, HistoricMVCCSnapshot snap);
 static HistoricMVCCSnapshot ReorderBufferCopySnap(ReorderBuffer *rb, HistoricMVCCSnapshot orig_snap,
 												  ReorderBufferTXN *txn, CommandId cid);
 
@@ -543,7 +542,7 @@ ReorderBufferFreeChange(ReorderBuffer *rb, ReorderBufferChange *change,
 		case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT:
 			if (change->data.snapshot)
 			{
-				ReorderBufferFreeSnap(rb, change->data.snapshot);
+				SnapBuildSnapDecRefcount(change->data.snapshot);
 				change->data.snapshot = NULL;
 			}
 			break;
@@ -1593,7 +1592,8 @@ ReorderBufferCleanupTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
 	if (txn->snapshot_now != NULL)
 	{
 		Assert(rbtxn_is_streamed(txn));
-		ReorderBufferFreeSnap(rb, txn->snapshot_now);
+		SnapBuildSnapDecRefcount(txn->snapshot_now);
+		txn->snapshot_now = NULL;
 	}
 
 	/*
@@ -1902,7 +1902,6 @@ ReorderBufferCopySnap(ReorderBuffer *rb, HistoricMVCCSnapshot orig_snap,
 	snap = MemoryContextAllocZero(rb->context, size);
 	memcpy(snap, orig_snap, sizeof(HistoricMVCCSnapshotData));
 
-	snap->copied = true;
 	snap->refcount = 1;			/* mark as active so nobody frees it */
 	snap->regd_count = 0;
 	snap->committed_xids = (TransactionId *) (snap + 1);
@@ -1942,18 +1941,6 @@ ReorderBufferCopySnap(ReorderBuffer *rb, HistoricMVCCSnapshot orig_snap,
 	return snap;
 }
 
-/*
- * Free a previously ReorderBufferCopySnap'ed snapshot
- */
-static void
-ReorderBufferFreeSnap(ReorderBuffer *rb, HistoricMVCCSnapshot snap)
-{
-	if (snap->copied)
-		pfree(snap);
-	else
-		SnapBuildSnapDecRefcount(snap);
-}
-
 /*
  * If the transaction was (partially) streamed, we need to prepare or commit
  * it in a 'streamed' way.  That is, we first stream the remaining part of the
@@ -2104,11 +2091,8 @@ ReorderBufferSaveTXNSnapshot(ReorderBuffer *rb, ReorderBufferTXN *txn,
 	txn->command_id = command_id;
 
 	/* Avoid copying if it's already copied. */
-	if (snapshot_now->copied)
-		txn->snapshot_now = snapshot_now;
-	else
-		txn->snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
-												  txn, command_id);
+	txn->snapshot_now = snapshot_now;
+	SnapBuildSnapIncRefcount(txn->snapshot_now);
 }
 
 /*
@@ -2208,6 +2192,8 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
 
 	/* setup the initial snapshot */
 	SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
+	/* increase refcount for the installed historic snapshot */
+	SnapBuildSnapIncRefcount(snapshot_now);
 
 	/*
 	 * Decoding needs access to syscaches et al., which in turn use
@@ -2511,33 +2497,12 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
 				case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT:
 					/* get rid of the old */
 					TeardownHistoricSnapshot(false);
-
-					if (snapshot_now->copied)
-					{
-						ReorderBufferFreeSnap(rb, snapshot_now);
-						snapshot_now =
-							ReorderBufferCopySnap(rb, change->data.snapshot,
-												  txn, command_id);
-					}
-
-					/*
-					 * Restored from disk, need to be careful not to double
-					 * free. We could introduce refcounting for that, but for
-					 * now this seems infrequent enough not to care.
-					 */
-					else if (change->data.snapshot->copied)
-					{
-						snapshot_now =
-							ReorderBufferCopySnap(rb, change->data.snapshot,
-												  txn, command_id);
-					}
-					else
-					{
-						snapshot_now = change->data.snapshot;
-					}
+					SnapBuildSnapDecRefcount(snapshot_now);
 
 					/* and continue with the new one */
+					snapshot_now = change->data.snapshot;
 					SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
+					SnapBuildSnapIncRefcount(snapshot_now);
 					break;
 
 				case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
@@ -2547,16 +2512,26 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
 					{
 						command_id = change->data.command_id;
 
-						if (!snapshot_now->copied)
+						TeardownHistoricSnapshot(false);
+
+						/*
+						 * Construct a new snapshot with the new command ID.
+						 *
+						 * If this is the only reference to the snapshot, and
+						 * it's a "copied" snapshot that already contains all
+						 * the replayed transaction's XIDs (curxnct > 0), we
+						 * can take a shortcut and update the snapshot's
+						 * command ID in place.
+						 */
+						if (snapshot_now->refcount == 1 && snapshot_now->curxcnt > 0)
+							snapshot_now->curcid = command_id;
+						else
 						{
-							/* we don't use the global one anymore */
+							SnapBuildSnapDecRefcount(snapshot_now);
 							snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
 																 txn, command_id);
 						}
 
-						snapshot_now->curcid = command_id;
-
-						TeardownHistoricSnapshot(false);
 						SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
 					}
 
@@ -2646,11 +2621,11 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
 		 */
 		if (streaming)
 			ReorderBufferSaveTXNSnapshot(rb, txn, snapshot_now, command_id);
-		else if (snapshot_now->copied)
-			ReorderBufferFreeSnap(rb, snapshot_now);
 
 		/* cleanup */
 		TeardownHistoricSnapshot(false);
+		SnapBuildSnapDecRefcount(snapshot_now);
+		snapshot_now = NULL;
 
 		/*
 		 * Aborting the current (sub-)transaction as a whole has the right
@@ -2703,6 +2678,11 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
 
 		TeardownHistoricSnapshot(true);
 
+		/*
+		 * don't decrement the refcount on snapshot_now yet, we still use it
+		 * in the ReorderBufferResetTXN() call below.
+		 */
+
 		/*
 		 * Force cache invalidation to happen outside of a valid transaction
 		 * to prevent catalog access as we just caught an error.
@@ -2751,9 +2731,15 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
 			ReorderBufferResetTXN(rb, txn, snapshot_now,
 								  command_id, prev_lsn,
 								  specinsert);
+
+			SnapBuildSnapDecRefcount(snapshot_now);
+			snapshot_now = NULL;
 		}
 		else
 		{
+			SnapBuildSnapDecRefcount(snapshot_now);
+			snapshot_now = NULL;
+
 			ReorderBufferCleanupTXN(rb, txn);
 			MemoryContextSwitchTo(ecxt);
 			PG_RE_THROW();
@@ -4256,8 +4242,7 @@ ReorderBufferStreamTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
 											 txn, command_id);
 
 		/* Free the previously copied snapshot. */
-		Assert(txn->snapshot_now->copied);
-		ReorderBufferFreeSnap(rb, txn->snapshot_now);
+		SnapBuildSnapDecRefcount(txn->snapshot_now);
 		txn->snapshot_now = NULL;
 	}
 
@@ -4647,7 +4632,7 @@ ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
 				newsnap->committed_xids = (TransactionId *)
 					(((char *) newsnap) + sizeof(HistoricMVCCSnapshotData));
 				newsnap->curxip = newsnap->committed_xids + newsnap->xcnt;
-				newsnap->copied = true;
+				newsnap->refcount = 1;
 				break;
 			}
 			/* the base struct contains all the data, easy peasy */
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 7a341418a74..50dca7cb758 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -157,10 +157,6 @@ static void SnapBuildPurgeOlderTxn(SnapBuild *builder);
 /* snapshot building/manipulation/distribution functions */
 static HistoricMVCCSnapshot SnapBuildBuildSnapshot(SnapBuild *builder);
 
-static void SnapBuildFreeSnapshot(HistoricMVCCSnapshot snap);
-
-static void SnapBuildSnapIncRefcount(HistoricMVCCSnapshot snap);
-
 static void SnapBuildDistributeNewCatalogSnapshot(SnapBuild *builder, XLogRecPtr lsn);
 
 static inline bool SnapBuildXidHasCatalogChanges(SnapBuild *builder, TransactionId xid,
@@ -245,29 +241,6 @@ FreeSnapshotBuilder(SnapBuild *builder)
 	MemoryContextDelete(context);
 }
 
-/*
- * Free an unreferenced snapshot that has previously been built by us.
- */
-static void
-SnapBuildFreeSnapshot(HistoricMVCCSnapshot snap)
-{
-	/* make sure we don't get passed an external snapshot */
-	Assert(snap->snapshot_type == SNAPSHOT_HISTORIC_MVCC);
-
-	/* make sure nobody modified our snapshot */
-	Assert(snap->curcid == FirstCommandId);
-	Assert(snap->regd_count == 0);
-
-	/* slightly more likely, so it's checked even without c-asserts */
-	if (snap->copied)
-		elog(ERROR, "cannot free a copied snapshot");
-
-	if (snap->refcount)
-		elog(ERROR, "cannot free a snapshot that's in use");
-
-	pfree(snap);
-}
-
 /*
  * In which state of snapshot building are we?
  */
@@ -310,7 +283,7 @@ SnapBuildXactNeedsSkip(SnapBuild *builder, XLogRecPtr ptr)
  * This is used when handing out a snapshot to some external resource or when
  * adding a Snapshot as builder->snapshot.
  */
-static void
+void
 SnapBuildSnapIncRefcount(HistoricMVCCSnapshot snap)
 {
 	snap->refcount++;
@@ -318,9 +291,6 @@ SnapBuildSnapIncRefcount(HistoricMVCCSnapshot snap)
 
 /*
  * Decrease refcount of a snapshot and free if the refcount reaches zero.
- *
- * Externally visible, so that external resources that have been handed an
- * IncRef'ed Snapshot can adjust its refcount easily.
  */
 void
 SnapBuildSnapDecRefcount(HistoricMVCCSnapshot snap)
@@ -328,19 +298,12 @@ SnapBuildSnapDecRefcount(HistoricMVCCSnapshot snap)
 	/* make sure we don't get passed an external snapshot */
 	Assert(snap->snapshot_type == SNAPSHOT_HISTORIC_MVCC);
 
-	/* make sure nobody modified our snapshot */
-	Assert(snap->curcid == FirstCommandId);
-
 	Assert(snap->refcount > 0);
 	Assert(snap->regd_count == 0);
 
-	/* slightly more likely, so it's checked even without casserts */
-	if (snap->copied)
-		elog(ERROR, "cannot free a copied snapshot");
-
 	snap->refcount--;
 	if (snap->refcount == 0)
-		SnapBuildFreeSnapshot(snap);
+		pfree(snap);
 }
 
 /*
@@ -413,7 +376,6 @@ SnapBuildBuildSnapshot(SnapBuild *builder)
 	snapshot->curxcnt = 0;
 	snapshot->curxip = NULL;
 
-	snapshot->copied = false;
 	snapshot->curcid = FirstCommandId;
 	snapshot->refcount = 0;
 	snapshot->regd_count = 0;
@@ -1037,18 +999,16 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid,
 			SnapBuildSnapDecRefcount(builder->snapshot);
 
 		builder->snapshot = SnapBuildBuildSnapshot(builder);
+		SnapBuildSnapIncRefcount(builder->snapshot);
 
 		/* we might need to execute invalidations, add snapshot */
 		if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, xid))
 		{
-			SnapBuildSnapIncRefcount(builder->snapshot);
 			ReorderBufferSetBaseSnapshot(builder->reorder, xid, lsn,
 										 builder->snapshot);
+			SnapBuildSnapIncRefcount(builder->snapshot);
 		}
 
-		/* refcount of the snapshot builder for the new snapshot */
-		SnapBuildSnapIncRefcount(builder->snapshot);
-
 		/* add a new catalog snapshot to all currently running transactions */
 		SnapBuildDistributeNewCatalogSnapshot(builder, lsn);
 	}
diff --git a/src/include/replication/snapbuild.h b/src/include/replication/snapbuild.h
index 5930ffb55a8..6095013a299 100644
--- a/src/include/replication/snapbuild.h
+++ b/src/include/replication/snapbuild.h
@@ -70,6 +70,7 @@ extern SnapBuild *AllocateSnapshotBuilder(struct ReorderBuffer *reorder,
 										  XLogRecPtr two_phase_at);
 extern void FreeSnapshotBuilder(SnapBuild *builder);
 
+extern void SnapBuildSnapIncRefcount(HistoricMVCCSnapshot snap);
 extern void SnapBuildSnapDecRefcount(HistoricMVCCSnapshot snap);
 
 extern MVCCSnapshot SnapBuildInitialSnapshot(SnapBuild *builder);
diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h
index 93c1f51784f..bca0ad16e68 100644
--- a/src/include/utils/snapshot.h
+++ b/src/include/utils/snapshot.h
@@ -218,8 +218,6 @@ typedef struct HistoricMVCCSnapshotData
 
 	CommandId	curcid;			/* in my xact, CID < curcid are visible */
 
-	bool		copied;			/* false if it's a "base" snapshot */
-
 	uint32		refcount;		/* refcount managed by snapbuild.c  */
 	uint32		regd_count;		/* refcount registered with resource owners */
 
-- 
2.39.5

