From 4d6479b37e60015cc4cf55579a8ec51337675996 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andreas@proxel.se>
Date: Fri, 21 Nov 2025 10:15:06 +0100
Subject: [PATCH v4] Do not lock in BufferGetLSNAtomic() on archs with 8 byte
 atomic reads

On platforms where we can read or write the whole LSN atomically, we do
not need to lock the buffer header to prevent torn LSNs. We can do this
only on platforms with PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY, and when the
pd_lsn field is properly aligned.

For historical reasons the PageXLogRecPtr was defined as a struct with
two uint32 fields. This replaces it with a single uint64 value, to make
the intent clearer.

Idea by Andres Freund. Initial patch by Andreas Karlsson, improved by
Peter Geoghegan. Minor tweaks by me.

Author: Andreas Karlsson <andreas@proxel.se>
Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Tomas Vondra <tomas@vondra.me>
Discussion: https://postgr.es/m/b6610c3b-3f59-465a-bdbb-8e9259f0abc4@proxel.se
---
 contrib/pageinspect/heapfuncs.c     | 13 +++---------
 contrib/pageinspect/rawpage.c       |  8 +++-----
 src/backend/access/common/bufmask.c |  2 +-
 src/backend/storage/buffer/bufmgr.c | 23 ++++++++++++++-------
 src/include/access/gist.h           |  2 +-
 src/include/storage/bufpage.h       | 32 ++++++++++++++++-------------
 6 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
index 8e31632ce0e..3a61954e1d9 100644
--- a/contrib/pageinspect/heapfuncs.c
+++ b/contrib/pageinspect/heapfuncs.c
@@ -32,6 +32,7 @@
 #include "funcapi.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
+#include "pageinspect.h"
 #include "port/pg_bitutils.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
@@ -132,25 +133,17 @@ heap_page_items(PG_FUNCTION_ARGS)
 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
 	heap_page_items_state *inter_call_data = NULL;
 	FuncCallContext *fctx;
-	int			raw_page_size;
 
 	if (!superuser())
 		ereport(ERROR,
 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 				 errmsg("must be superuser to use raw page functions")));
 
-	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
-
 	if (SRF_IS_FIRSTCALL())
 	{
 		TupleDesc	tupdesc;
 		MemoryContext mctx;
 
-		if (raw_page_size < SizeOfPageHeaderData)
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-					 errmsg("input page too small (%d bytes)", raw_page_size)));
-
 		fctx = SRF_FIRSTCALL_INIT();
 		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
 
@@ -163,7 +156,7 @@ heap_page_items(PG_FUNCTION_ARGS)
 		inter_call_data->tupd = tupdesc;
 
 		inter_call_data->offset = FirstOffsetNumber;
-		inter_call_data->page = VARDATA(raw_page);
+		inter_call_data->page = get_page_from_raw(raw_page);
 
 		fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
 		fctx->user_fctx = inter_call_data;
@@ -209,7 +202,7 @@ heap_page_items(PG_FUNCTION_ARGS)
 		if (ItemIdHasStorage(id) &&
 			lp_len >= MinHeapTupleSize &&
 			lp_offset == MAXALIGN(lp_offset) &&
-			lp_offset + lp_len <= raw_page_size)
+			lp_offset + lp_len <= BLCKSZ)
 		{
 			HeapTupleHeader tuphdr;
 
diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c
index af54afe5b09..86fe245cac5 100644
--- a/contrib/pageinspect/rawpage.c
+++ b/contrib/pageinspect/rawpage.c
@@ -208,11 +208,9 @@ get_raw_page_internal(text *relname, ForkNumber forknum, BlockNumber blkno)
  * Get a palloc'd, maxalign'ed page image from the result of get_raw_page()
  *
  * On machines with MAXALIGN = 8, the payload of a bytea is not maxaligned,
- * since it will start 4 bytes into a palloc'd value.  On alignment-picky
- * machines, this will cause failures in accesses to 8-byte-wide values
- * within the page.  We don't need to worry if accessing only 4-byte or
- * smaller fields, but when examining a struct that contains 8-byte fields,
- * use this function for safety.
+ * since it will start 4 bytes into a palloc'd value.  PageHeaderData requires
+ * 8 byte alignment, so always use this function when accessing page header
+ * fields from a raw page bytea.
  */
 Page
 get_page_from_raw(bytea *raw_page)
diff --git a/src/backend/access/common/bufmask.c b/src/backend/access/common/bufmask.c
index 1a9e7bea5d2..79c85876c58 100644
--- a/src/backend/access/common/bufmask.c
+++ b/src/backend/access/common/bufmask.c
@@ -32,7 +32,7 @@ mask_page_lsn_and_checksum(Page page)
 {
 	PageHeader	phdr = (PageHeader) page;
 
-	PageXLogRecPtrSet(phdr->pd_lsn, (uint64) MASK_MARKER);
+	phdr->pd_lsn = PageXLogRecPtrGet((uint64) MASK_MARKER);
 	phdr->pd_checksum = MASK_MARKER;
 }
 
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 5f3d083e938..efcf64169aa 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -4628,33 +4628,42 @@ BufferIsPermanent(Buffer buffer)
 
 /*
  * BufferGetLSNAtomic
- *		Retrieves the LSN of the buffer atomically using a buffer header lock.
- *		This is necessary for some callers who may not have an exclusive lock
- *		on the buffer.
+ *		Retrieves the LSN of the buffer atomically.
+ *
+ *		On platforms without 8 byte atomic reads/write we need to take a
+ *		header lock. This is necessary for some callers who may not have an
+ *		exclusive lock on the buffer.
  */
 XLogRecPtr
 BufferGetLSNAtomic(Buffer buffer)
 {
+#ifdef PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
+	Assert(BufferIsValid(buffer));
+	Assert(BufferIsPinned(buffer));
+
+	return PageGetLSN(BufferGetPage(buffer));
+#else
 	char	   *page = BufferGetPage(buffer);
 	BufferDesc *bufHdr;
 	XLogRecPtr	lsn;
 
+	/* Make sure we've got a real buffer, and that we hold a pin on it. */
+	Assert(BufferIsValid(buffer));
+	Assert(BufferIsPinned(buffer));
+
 	/*
 	 * If we don't need locking for correctness, fastpath out.
 	 */
 	if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
 		return PageGetLSN(page);
 
-	/* Make sure we've got a real buffer, and that we hold a pin on it. */
-	Assert(BufferIsValid(buffer));
-	Assert(BufferIsPinned(buffer));
-
 	bufHdr = GetBufferDescriptor(buffer - 1);
 	LockBufHdr(bufHdr);
 	lsn = PageGetLSN(page);
 	UnlockBufHdr(bufHdr);
 
 	return lsn;
+#endif
 }
 
 /* ---------------------------------------------------------------------
diff --git a/src/include/access/gist.h b/src/include/access/gist.h
index 8fa30126f8f..38facdb0196 100644
--- a/src/include/access/gist.h
+++ b/src/include/access/gist.h
@@ -187,7 +187,7 @@ typedef struct GISTENTRY
 #define GistClearFollowRight(page)	( GistPageGetOpaque(page)->flags &= ~F_FOLLOW_RIGHT)
 
 #define GistPageGetNSN(page) ( PageXLogRecPtrGet(GistPageGetOpaque(page)->nsn))
-#define GistPageSetNSN(page, val) ( PageXLogRecPtrSet(GistPageGetOpaque(page)->nsn, val))
+#define GistPageSetNSN(page, val) ( GistPageGetOpaque(page)->nsn = PageXLogRecPtrGet(val))
 
 
 /*
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 92a6bb9b0c0..e994526ca52 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -91,24 +91,27 @@ typedef uint16 LocationIndex;
 
 
 /*
- * For historical reasons, the 64-bit LSN value is stored as two 32-bit
- * values.
+ * For historical reasons, the storage of 64-bit LSN values depends on CPU
+ * endianness; PageXLogRecPtr used to be a struct consisting of two 32-bit
+ * values.  When reading (and writing) the pd_lsn field from page headers, the
+ * caller must convert from (and convert to) the platform's native endianness.
  */
-typedef struct
-{
-	uint32		xlogid;			/* high bits */
-	uint32		xrecoff;		/* low bits */
-} PageXLogRecPtr;
+typedef uint64 PageXLogRecPtr;
 
-static inline XLogRecPtr
-PageXLogRecPtrGet(PageXLogRecPtr val)
+/*
+ * Convert a  pd_lsn taken from a page header into its native
+ * uint64/PageXLogRecPtr representation
+ */
+static inline PageXLogRecPtr
+PageXLogRecPtrGet(PageXLogRecPtr pd_lsn)
 {
-	return (uint64) val.xlogid << 32 | val.xrecoff;
+#ifdef WORDS_BIGENDIAN
+	return pd_lsn;
+#else
+	return (pd_lsn << 32) | (pd_lsn >> 32);
+#endif
 }
 
-#define PageXLogRecPtrSet(ptr, lsn) \
-	((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
-
 /*
  * disk page organization
  *
@@ -387,10 +390,11 @@ PageGetLSN(const PageData *page)
 {
 	return PageXLogRecPtrGet(((const PageHeaderData *) page)->pd_lsn);
 }
+
 static inline void
 PageSetLSN(Page page, XLogRecPtr lsn)
 {
-	PageXLogRecPtrSet(((PageHeader) page)->pd_lsn, lsn);
+	((PageHeader) page)->pd_lsn = PageXLogRecPtrGet(lsn);
 }
 
 static inline bool
-- 
2.53.0

