From 8195dd714cfa14ffd68a04a36ac5496eba421897 Mon Sep 17 00:00:00 2001
From: Greg Burd <greg@burd.me>
Date: Fri, 25 Jul 2025 11:53:10 -0400
Subject: [PATCH v10 3/3] Abstract clock-sweep buffer replacement algorithm

Re-author the clock-sweep algorithm such that it maintains its own state
and has a well defined API.
---
 src/backend/storage/buffer/README     | 20 +++---
 src/backend/storage/buffer/freelist.c | 88 ++++++++++++++-------------
 src/tools/pgindent/typedefs.list      |  1 +
 3 files changed, 57 insertions(+), 52 deletions(-)

diff --git a/src/backend/storage/buffer/README b/src/backend/storage/buffer/README
index d1ab222eeb8..3f31d04d572 100644
--- a/src/backend/storage/buffer/README
+++ b/src/backend/storage/buffer/README
@@ -166,14 +166,13 @@ small limit value) whenever the buffer is pinned.  (This requires only the
 buffer header spinlock, which would have to be taken anyway to increment the
 buffer reference count, so it's nearly free.)
 
-The "clock hand" is a buffer index, nextVictimBuffer, that moves circularly
-through all the available buffers.  nextVictimBuffer and completePasses are
-atomic values.
+The "clock hand" is a buffer index that moves circularly through all the
+available buffers.
 
 The algorithm for a process that needs to obtain a victim buffer is:
 
-1. Select the buffer pointed to by nextVictimBuffer, and circularly advance
-nextVictimBuffer for next time.
+1. Select the buffer pointed to by the clock hand, and circularly advance it
+for next time.
 
 2. If the selected buffer is pinned or has a nonzero usage count, it cannot be
 used.  Decrement its usage count (if nonzero), return to step 3 to examine the
@@ -235,13 +234,12 @@ Background Writer's Processing
 ------------------------------
 
 The background writer is designed to write out pages that are likely to be
-recycled soon, thereby offloading the writing work from active backends.
-To do this, it scans forward circularly from the current position of
-nextVictimBuffer (which it does not change!), looking for buffers that are
-dirty and not pinned nor marked with a positive usage count.  It pins,
-writes, and releases any such buffer.
+recycled soon, thereby offloading the writing work from active backends. To do
+this, it scans forward circularly from the current position of clock (which it
+does not change!), looking for buffers that are dirty and not pinned nor marked
+with a positive usage count.  It pins, writes, and releases any such buffer.
 
-We enforce reading nextVictimBuffer within an atomic action so it needs only to
+We enforce reading the clock hand within an atomic action so it needs only to
 spinlock each buffer header for long enough to check the dirtybit.  Even
 without that assumption, the writer only needs to take the lock long enough to
 read the variable value, not while scanning the buffers. (This is a very
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 906b35be4c1..71839dfdee9 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -23,19 +23,22 @@
 
 #define INT_ACCESS_ONCE(var)	((int)(*((volatile int *)&(var))))
 
+typedef struct ClockSweep
+{
+	pg_atomic_uint64 counter;	/* Only incremented by one */
+	uint32_t	size;			/* Size of the clock */
+} ClockSweep;
 
 /*
  * The shared freelist control information.
  */
-typedef struct {
+typedef struct
+{
 	/*
-	 * The clock-sweep hand is atomically updated by 1 at every tick.  Use the
-	 * macro CLOCK_HAND_POSITION() o find the next victim's index in the
-	 * BufferDescriptor array. To calculate the number of times the clock-sweep
-	 * hand has made a complete pass through all available buffers in the pool
-	 * divide NBuffers.
+	 * The next buffer available for use is determined by the clock-sweep
+	 * algorithm.
 	 */
-	pg_atomic_uint64 nextVictimBuffer;
+	ClockSweep	clock;
 
 	/*
 	 * Statistics.  These counters should be wide enough that they can't
@@ -86,32 +89,40 @@ static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
 static void AddBufferToRing(BufferAccessStrategy strategy,
 							BufferDesc *buf);
 
-#define CLOCK_HAND_POSITION(counter) \
-	((counter) & 0xFFFFFFFF) % NBuffers
+static void
+ClockSweepInit(ClockSweep *sweep, uint32 size)
+{
+	pg_atomic_init_u64(&sweep->counter, 0);
+	sweep->size = size;
+}
 
-/*
- * ClockSweepTick - Helper routine for StrategyGetBuffer()
- *
- * Move the clock hand one buffer ahead of its current position and return the
- * id of the buffer now under the hand.
- */
+/* Extract the number of complete cycles from the clock hand */
 static inline uint32
-ClockSweepTick(void)
+ClockSweepCycles(ClockSweep *sweep)
 {
-	uint64		hand = UINT64_MAX;
-	uint32		victim;
+	uint64		current = pg_atomic_read_u64(&sweep->counter);
 
-	/*
-	 * Atomically move hand ahead one buffer - if there's several processes
-	 * doing this, this can lead to buffers being returned slightly out of
-	 * apparent order.
-	 */
-	hand = pg_atomic_fetch_add_u64(&StrategyControl->nextVictimBuffer, 1);
+	return current / sweep->size;
+}
+
+/* Return the current position of the clock's hand modulo size */
+static inline uint32
+ClockSweepPosition(ClockSweep *sweep)
+{
+	uint64		counter = pg_atomic_read_u64(&sweep->counter);
+
+	return ((counter) & 0xFFFFFFFF) % sweep->size;
+}
 
-	victim = CLOCK_HAND_POSITION(hand);
-	Assert(victim < NBuffers);
+/*
+ * Move the clock hand ahead one and return its new position.
+ */
+static inline uint32
+ClockSweepTick(ClockSweep *sweep)
+{
+	uint64		counter = pg_atomic_fetch_add_u64(&sweep->counter, 1);
 
-	return victim;
+	return ((counter) & 0xFFFFFFFF) % sweep->size;
 }
 
 /*
@@ -181,11 +192,11 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_r
 	 */
 	pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
 
-	/* Use the "clock-sweep" algorithm to find a free buffer */
+	/* Use the clock-sweep algorithm to find a free buffer */
 	trycounter = NBuffers;
 	for (;;)
 	{
-		buf = GetBufferDescriptor(ClockSweepTick());
+		buf = GetBufferDescriptor(ClockSweepTick(&StrategyControl->clock));
 
 		/*
 		 * If the buffer is pinned or has a nonzero usage_count, we cannot use
@@ -236,19 +247,14 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_r
  * buffer allocs if non-NULL pointers are passed.  The alloc count is reset
  * after being read.
  */
-uint32 StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc) {
-	uint64		counter = UINT64_MAX; uint32		result;
-
-	counter = pg_atomic_read_u64(&StrategyControl->nextVictimBuffer);
-	result = CLOCK_HAND_POSITION(counter);
+uint32
+StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
+{
+	uint32		result = ClockSweepPosition(&StrategyControl->clock);
 
 	if (complete_passes)
 	{
-		/*
-		 * The number of complete passes is the counter divided by NBuffers
-		 * because the clock hand is a 64-bit counter that only increases.
-		 */
-		*complete_passes = (uint32) (counter / NBuffers);
+		*complete_passes = ClockSweepCycles(&StrategyControl->clock);
 	}
 
 	if (num_buf_alloc)
@@ -335,8 +341,8 @@ StrategyInitialize(bool init)
 		 */
 		Assert(init);
 
-		/* Initialize combined clock-sweep pointer/complete passes counter */
-		pg_atomic_init_u64(&StrategyControl->nextVictimBuffer, 0);
+		/* Initialize the clock-sweep algorithm */
+		ClockSweepInit(&StrategyControl->clock, NBuffers);
 
 		/* Clear statistics */
 		pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 4353befab99..1cbfca592d9 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -426,6 +426,7 @@ ClientCertName
 ClientConnectionInfo
 ClientData
 ClientSocket
+ClockSweep
 ClonePtrType
 ClosePortalStmt
 ClosePtrType
-- 
2.49.0

