From 70dc0fe41f0aa6afb15358b6362900259e3d08a3 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Thu, 19 Mar 2026 21:27:51 +0200
Subject: [PATCH 1/1] Fix multixact backwards-compatibility with CHECKPOINT
 race condition

If a CHECKPOINT record with nextMulti N is written to the WAL before
the CREATE_ID record for N, and N happens to be the first multixid on
an offset page, the backwards compatibility logic to tolerate WAL
generated by older minor versions failed to compensate for the missing
XLOG_MULTIXACT_ZERO_OFF_PAGE record. In that case, the
latest_page_number was initialized at the start of WAL replay to the
page for nextMulti from the CHECKPOINT record, even if we had not seen
the CREATE_ID record for that multixid yet, which fooled the backwards
compatibility logic to think that the page was already initialized.

To fix, track the last XLOG_MULTIXACT_ZERO_OFF_PAGE that we've seen
separately from latest_page_number. If we haven't seen any
XLOG_MULTIXACT_ZERO_OFF_PAGE records yet, use
SimpleLruDoesPhysicalPageExist() to check if the page needs to be
initialized.
---
 src/backend/access/transam/multixact.c | 65 +++++++++++++++++++-------
 1 file changed, 47 insertions(+), 18 deletions(-)

diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index f9bd1dd19e6..5dcb7f9068f 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -417,7 +417,17 @@ static MemoryContext MXactContext = NULL;
 #define debug_elog6(a,b,c,d,e,f)
 #endif
 
-/* hack to deal with WAL generated with older minor versions */
+/*
+ * Hack to deal with WAL generated with older minor versions.
+ *
+ * last_initialized_offsets_page is the XLOG_MULTIXACT_ZERO_OFF_PAGE record
+ * that we saw during WAL replay, or -1 if we haven't seen any yet.
+ *
+ * pre_initialized_offsets_page is the last page that was implicitly
+ * initialized by replaying a XLOG_MULTIXACT_CREATE_ID record, when we had not
+ * seen a XLOG_MULTIXACT_ZERO_OFF_PAGE record for the page yet.
+ */
+static int64 last_initialized_offsets_page = -1;
 static int64 pre_initialized_offsets_page = -1;
 
 /* internal MultiXactId management */
@@ -982,29 +992,46 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
 	 * such a version, the next page might not be initialized yet.  Initialize
 	 * it now.
 	 */
-	if (InRecovery &&
-		next_pageno != pageno &&
-		pg_atomic_read_u64(&MultiXactOffsetCtl->shared->latest_page_number) == pageno)
+	if (InRecovery && next_pageno != pageno)
 	{
-		elog(DEBUG1, "next offsets page is not initialized, initializing it now");
+		bool		init_needed;
 
-		lock = SimpleLruGetBankLock(MultiXactOffsetCtl, next_pageno);
-		LWLockAcquire(lock, LW_EXCLUSIVE);
+		/*
+		 * Check if the page exists. SimpleLruDoesPhysicalPageExist() is
+		 * somewhat expensive, however, so if we have seen any
+		 * XLOG_MULTIXACT_ZERO_OFF_PAGE records during replay, we trust
+		 * without checking that the last page initialized by
+		 * XLOG_MULTIXACT_ZERO_OFF_PAGE already exists, and the one after that
+		 * does not.
+		 */
+		if (last_initialized_offsets_page == -1)
+			init_needed = !SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, next_pageno);
+		else
+			init_needed = (last_initialized_offsets_page == pageno);
 
-		/* Create and zero the page */
-		slotno = SimpleLruZeroPage(MultiXactOffsetCtl, next_pageno);
+		if (init_needed)
+		{
+			elog(DEBUG1, "next offsets page is not initialized, initializing it now");
 
-		/* Make sure it's written out */
-		SimpleLruWritePage(MultiXactOffsetCtl, slotno);
-		Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
+			lock = SimpleLruGetBankLock(MultiXactOffsetCtl, next_pageno);
+			LWLockAcquire(lock, LW_EXCLUSIVE);
 
-		LWLockRelease(lock);
+			/* Create and zero the page */
+			slotno = SimpleLruZeroPage(MultiXactOffsetCtl, next_pageno);
 
-		/*
-		 * Remember that we initialized the page, so that we don't zero it
-		 * again at the XLOG_MULTIXACT_ZERO_OFF_PAGE record.
-		 */
-		pre_initialized_offsets_page = next_pageno;
+			/* Make sure it's written out */
+			SimpleLruWritePage(MultiXactOffsetCtl, slotno);
+			Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
+
+			LWLockRelease(lock);
+
+			/*
+			 * Remember that we initialized the page, so that we don't zero it
+			 * again at the XLOG_MULTIXACT_ZERO_OFF_PAGE record.
+			 */
+			pre_initialized_offsets_page = next_pageno;
+			last_initialized_offsets_page = next_pageno;
+		}
 	}
 
 	/*
@@ -3560,6 +3587,8 @@ multixact_redo(XLogReaderState *record)
 			Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
 
 			LWLockRelease(lock);
+
+			last_initialized_offsets_page = pageno;
 		}
 		else
 			elog(DEBUG1, "skipping initialization of offsets page " INT64_FORMAT " because it was already initialized on multixid creation", pageno);
-- 
2.47.3

