From 037f1af8ade2235f07edf0dd84f2519e6d92a5f7 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Tue, 19 Nov 2024 12:08:38 -0500
Subject: [PATCH 05/12] bufmgr: Separate slow/fast path of LockBufHdr

This mainly is beneficial for some upcoming patches that use LockBufHdr() in
reasonably common paths. But I do see a very small improvement when scanning a
large table fully resident in the kernel page cache.

Author:
Reviewed-by:
Discussion: https://postgr.es/m/
Backpatch:
---
 src/backend/storage/buffer/bufmgr.c |  8 ++++---
 src/include/storage/buf_internals.h | 33 ++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 5187f261423..dd61ac547b4 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -6215,10 +6215,12 @@ rlocator_comparator(const void *p1, const void *p2)
 }
 
 /*
- * Lock buffer header - set BM_LOCKED in buffer state.
+ * Slow path helper for LockBufHdr().
+ *
+ * See comment in LockBufHdr() for the reason this is moved out of line.
  */
-uint32
-LockBufHdr(BufferDesc *desc)
+pg_noinline uint32
+LockBufHdrSlow(BufferDesc *desc)
 {
 	SpinDelayStatus delayStatus;
 	uint32		old_buf_state;
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 72b36a4af26..4972afa38fc 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -371,7 +371,38 @@ BufferDescriptorGetContentLock(const BufferDesc *bdesc)
  * Functions for acquiring/releasing a shared buffer header's spinlock.  Do
  * not apply these to local buffers!
  */
-extern uint32 LockBufHdr(BufferDesc *desc);
+extern uint32 LockBufHdrSlow(BufferDesc *desc);
+
+/*
+ * Lock buffer header - set BM_LOCKED in buffer state.
+ *
+ * Implemented as a static inline in the header due to freelist.c using it in
+ * a reasonably common path.
+ */
+static inline uint32
+LockBufHdr(BufferDesc *desc)
+{
+	uint32		old_buf_state;
+
+	Assert(!BufferIsLocal(BufferDescriptorGetBuffer(desc)));
+
+	/* fast path, intended to be inlined into calling functions*/
+
+	/* set BM_LOCKED flag */
+	old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED);
+	if (likely(!(old_buf_state & BM_LOCKED)))
+		return old_buf_state | BM_LOCKED;
+
+	/*
+	 * The overhead of the spin delay code and the retry loop itself would be
+	 * noticeable due to the buffer header lock being taken very
+	 * frequently. Therefore it is moved into a separate function marked
+	 * pg_noinline.
+	 */
+
+	/* slow path with loop etc, not intended to be inlined */
+	return LockBufHdrSlow(desc);
+}
 
 static inline void
 UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
-- 
2.39.5

