From 59783c7585e36043ebb2384937226c054d42c766 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Tue, 18 Mar 2025 14:40:06 -0400
Subject: [PATCH v2.10 02/28] bufmgr: Improve stats when buffer is read in
 concurrently

Previously we would count buffers that were read in concurently as being read
by the current backend, which leads to double-counting the read IOs
globally. Similarly, the buffer hit in that case was never attributed to
pg_stat_io and relation-level IO statistics, making cache hit ratios look
worse.  We also did not account for the cache hit in vacuum cost balancing.

This behaviour has historically grown, but there doesn't seem to be any reason
to just continue down that path.
---
 src/backend/storage/buffer/bufmgr.c | 41 ++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 79ca9d18d07..e76ff1f3224 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1433,19 +1433,6 @@ WaitReadBuffers(ReadBuffersOperation *operation)
 		io_object = IOOBJECT_RELATION;
 	}
 
-	/*
-	 * We count all these blocks as read by this backend.  This is traditional
-	 * behavior, but might turn out to be not true if we find that someone
-	 * else has beaten us and completed the read of some of these blocks.  In
-	 * that case the system globally double-counts, but we traditionally don't
-	 * count this as a "hit", and we don't have a separate counter for "miss,
-	 * but another backend completed the read".
-	 */
-	if (persistence == RELPERSISTENCE_TEMP)
-		pgBufferUsage.local_blks_read += nblocks;
-	else
-		pgBufferUsage.shared_blks_read += nblocks;
-
 	for (int i = 0; i < nblocks; ++i)
 	{
 		int			io_buffers_len;
@@ -1463,8 +1450,13 @@ WaitReadBuffers(ReadBuffersOperation *operation)
 		if (!WaitReadBuffersCanStartIO(buffers[i], false))
 		{
 			/*
-			 * Report this as a 'hit' for this backend, even though it must
-			 * have started out as a miss in PinBufferForBlock().
+			 * Report and track this as a 'hit' for this backend, even though
+			 * it must have started out as a miss in PinBufferForBlock().
+			 *
+			 * Some of the accesses would otherwise never be counted (e.g.
+			 * pgBufferUsage) or counted as a miss (e.g.
+			 * pgstat_count_buffer_hit(), as we always call
+			 * pgstat_count_buffer_read()).
 			 */
 			TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum + i,
 											  operation->smgr->smgr_rlocator.locator.spcOid,
@@ -1472,6 +1464,20 @@ WaitReadBuffers(ReadBuffersOperation *operation)
 											  operation->smgr->smgr_rlocator.locator.relNumber,
 											  operation->smgr->smgr_rlocator.backend,
 											  true);
+
+			if (persistence == RELPERSISTENCE_TEMP)
+				pgBufferUsage.local_blks_hit += 1;
+			else
+				pgBufferUsage.shared_blks_hit += 1;
+
+			if (operation->rel)
+				pgstat_count_buffer_hit(operation->rel);
+
+			pgstat_count_io_op(io_object, io_context, IOOP_HIT, 1, 0);
+
+			if (VacuumCostActive)
+				VacuumCostBalance += VacuumCostPageHit;
+
 			continue;
 		}
 
@@ -1557,6 +1563,11 @@ WaitReadBuffers(ReadBuffersOperation *operation)
 											  false);
 		}
 
+		if (persistence == RELPERSISTENCE_TEMP)
+			pgBufferUsage.local_blks_read += io_buffers_len;
+		else
+			pgBufferUsage.shared_blks_read += io_buffers_len;
+
 		if (VacuumCostActive)
 			VacuumCostBalance += VacuumCostPageMiss * io_buffers_len;
 	}
-- 
2.48.1.76.g4e746b1a31.dirty

