From cab8e0435b2a8390887f99091b2e9d336dd353c0 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Sat, 25 Feb 2023 14:36:06 -0500
Subject: [PATCH v3 2/2] Track shared buffer hits in pg_stat_io

Count new IOOP_HITs and add "hits" column to pg_stat_io.

Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/CAAKRu_beMa9Hzih40%3DXPYqhDVz6tsgUGTrhZXRo%3Dunp%2Bszb%3DUA%40mail.gmail.com
---
 doc/src/sgml/monitoring.sgml           | 11 ++++++++
 src/backend/catalog/system_views.sql   |  1 +
 src/backend/storage/buffer/bufmgr.c    | 38 ++++++++++----------------
 src/backend/storage/buffer/localbuf.c  | 11 ++------
 src/backend/utils/activity/pgstat_io.c |  2 +-
 src/backend/utils/adt/pgstatfuncs.c    |  3 ++
 src/include/catalog/pg_proc.dat        |  6 ++--
 src/include/pgstat.h                   |  1 +
 src/include/storage/buf_internals.h    |  2 +-
 src/test/regress/expected/rules.out    |  3 +-
 src/test/regress/expected/stats.out    | 27 ++++++++++++++++--
 src/test/regress/sql/stats.sql         | 14 ++++++++--
 12 files changed, 76 insertions(+), 43 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 6249bb50d0..8b34ca60bc 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3855,6 +3855,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
       </entry>
      </row>
 
+     <row>
+      <entry role="catalog_table_entry">
+       <para role="column_definition">
+        <structfield>hits</structfield> <type>bigint</type>
+       </para>
+       <para>
+        The number of times a desired block was found in a shared buffer.
+       </para>
+      </entry>
+     </row>
+
      <row>
       <entry role="catalog_table_entry">
        <para role="column_definition">
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 34ca0e739f..87bbbdfcb3 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1126,6 +1126,7 @@ SELECT
        b.writes,
        b.extends,
        b.op_bytes,
+       b.hits,
        b.evictions,
        b.reuses,
        b.fsyncs,
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 0a05577b68..05fd3c9a2a 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -472,7 +472,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
 							   ForkNumber forkNum,
 							   BlockNumber blockNum,
 							   BufferAccessStrategy strategy,
-							   bool *foundPtr, IOContext *io_context);
+							   bool *foundPtr, IOContext io_context);
 static void FlushBuffer(BufferDesc *buf, SMgrRelation reln,
 						IOObject io_object, IOContext io_context);
 static void FindAndDropRelationBuffers(RelFileLocator rlocator,
@@ -850,13 +850,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	if (isLocalBuf)
 	{
 		/*
-		 * LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We
-		 * do not use a BufferAccessStrategy for I/O of temporary tables.
+		 * We do not use a BufferAccessStrategy for I/O of temporary tables.
 		 * However, in some cases, the "strategy" may not be NULL, so we can't
 		 * rely on IOContextForStrategy() to set the right IOContext for us.
 		 * This may happen in cases like CREATE TEMPORARY TABLE AS...
 		 */
-		bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found, &io_context);
+		io_context = IOCONTEXT_NORMAL;
+		io_object = IOOBJECT_TEMP_RELATION;
+		bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
 		if (found)
 			pgBufferUsage.local_blks_hit++;
 		else if (isExtend)
@@ -871,8 +872,10 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 * lookup the buffer.  IO_IN_PROGRESS is set if the requested block is
 		 * not currently in memory.
 		 */
+		io_context = IOContextForStrategy(strategy);
+		io_object = IOOBJECT_RELATION;
 		bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
-							 strategy, &found, &io_context);
+							 strategy, &found, io_context);
 		if (found)
 			pgBufferUsage.shared_blks_hit++;
 		else if (isExtend)
@@ -892,6 +895,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			/* Just need to update stats before we exit */
 			*hit = true;
 			VacuumPageHit++;
+			pgstat_count_io_op(io_object, io_context, IOOP_HIT);
 
 			if (VacuumCostActive)
 				VacuumCostBalance += VacuumCostPageHit;
@@ -987,16 +991,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	 */
 	Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID));	/* spinlock not needed */
 
-	if (isLocalBuf)
-	{
-		bufBlock = LocalBufHdrGetBlock(bufHdr);
-		io_object = IOOBJECT_TEMP_RELATION;
-	}
-	else
-	{
-		bufBlock = BufHdrGetBlock(bufHdr);
-		io_object = IOOBJECT_RELATION;
-	}
+	bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
 
 	if (isExtend)
 	{
@@ -1139,7 +1134,7 @@ static BufferDesc *
 BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			BlockNumber blockNum,
 			BufferAccessStrategy strategy,
-			bool *foundPtr, IOContext *io_context)
+			bool *foundPtr, IOContext io_context)
 {
 	bool		from_ring;
 	BufferTag	newTag;			/* identity of requested block */
@@ -1193,11 +1188,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			{
 				/*
 				 * If we get here, previous attempts to read the buffer must
-				 * have failed ... but we shall bravely try again. Set
-				 * io_context since we will in fact need to count an IO
-				 * Operation.
+				 * have failed ... but we shall bravely try again.
 				 */
-				*io_context = IOContextForStrategy(strategy);
 				*foundPtr = false;
 			}
 		}
@@ -1211,8 +1203,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	 */
 	LWLockRelease(newPartitionLock);
 
-	*io_context = IOContextForStrategy(strategy);
-
 	/* Loop here in case we have to try another victim buffer */
 	for (;;)
 	{
@@ -1295,7 +1285,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 														  smgr->smgr_rlocator.locator.dbOid,
 														  smgr->smgr_rlocator.locator.relNumber);
 
-				FlushBuffer(buf, NULL, IOOBJECT_RELATION, *io_context);
+				FlushBuffer(buf, NULL, IOOBJECT_RELATION, io_context);
 				LWLockRelease(BufferDescriptorGetContentLock(buf));
 
 				ScheduleBufferTagForWriteback(&BackendWritebackContext,
@@ -1494,7 +1484,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 * we may have been forced to release the buffer due to concurrent
 		 * pinners or erroring out.
 		 */
-		pgstat_count_io_op(IOOBJECT_RELATION, *io_context,
+		pgstat_count_io_op(IOOBJECT_RELATION, io_context,
 						   from_ring ? IOOP_REUSE : IOOP_EVICT);
 	}
 
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 5325ddb663..880c9909b9 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -108,7 +108,7 @@ PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
  */
 BufferDesc *
 LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
-				 bool *foundPtr, IOContext *io_context)
+				 bool *foundPtr)
 {
 	BufferTag	newTag;			/* identity of requested block */
 	LocalBufferLookupEnt *hresult;
@@ -128,14 +128,6 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	hresult = (LocalBufferLookupEnt *)
 		hash_search(LocalBufHash, &newTag, HASH_FIND, NULL);
 
-	/*
-	 * IO Operations on local buffers are only done in IOCONTEXT_NORMAL. Set
-	 * io_context here (instead of after a buffer hit would have returned) for
-	 * convenience since we don't have to worry about the overhead of calling
-	 * IOContextForStrategy().
-	 */
-	*io_context = IOCONTEXT_NORMAL;
-
 	if (hresult)
 	{
 		b = hresult->id;
@@ -239,6 +231,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 		buf_state &= ~BM_DIRTY;
 		pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
 
+		/* Temporary table I/O does not use Buffer Access Strategies */
 		pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
 		pgBufferUsage.local_blks_written++;
 	}
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
index af5d554610..ae8bb34f78 100644
--- a/src/backend/utils/activity/pgstat_io.c
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -344,7 +344,7 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
 	 * Some BackendTypes will not do certain IOOps.
 	 */
 	if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
-		(io_op == IOOP_READ || io_op == IOOP_EVICT))
+		(io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
 		return false;
 
 	if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 1112bc2904..e2e79eca99 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1258,6 +1258,7 @@ typedef enum io_stat_col
 	IO_COL_WRITES,
 	IO_COL_EXTENDS,
 	IO_COL_CONVERSION,
+	IO_COL_HITS,
 	IO_COL_EVICTIONS,
 	IO_COL_REUSES,
 	IO_COL_FSYNCS,
@@ -1280,6 +1281,8 @@ pgstat_get_io_op_index(IOOp io_op)
 			return IO_COL_EXTENDS;
 		case IOOP_FSYNC:
 			return IO_COL_FSYNCS;
+		case IOOP_HIT:
+			return IO_COL_HITS;
 		case IOOP_READ:
 			return IO_COL_READS;
 		case IOOP_REUSE:
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 505595620e..615139c2f9 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5721,9 +5721,9 @@
   proname => 'pg_stat_get_io', provolatile => 'v',
   prorows => '30', proretset => 't',
   proparallel => 'r', prorettype => 'record', proargtypes => '',
-  proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
-  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}',
-  proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,evictions,reuses,fsyncs,stats_reset}',
+  proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
+  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}',
+  proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,hits,evictions,reuses,fsyncs,stats_reset}',
   prosrc => 'pg_stat_get_io' },
 
 { oid => '1136', descr => 'statistics: information about WAL activity',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index f43fac09ed..a7f77c17b3 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -304,6 +304,7 @@ typedef enum IOOp
 	IOOP_EVICT,
 	IOOP_EXTEND,
 	IOOP_FSYNC,
+	IOOP_HIT,
 	IOOP_READ,
 	IOOP_REUSE,
 	IOOP_WRITE,
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 0b44814740..2afb9bb309 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -419,7 +419,7 @@ extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr,
 												ForkNumber forkNum,
 												BlockNumber blockNum);
 extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
-									BlockNumber blockNum, bool *foundPtr, IOContext *io_context);
+									BlockNumber blockNum, bool *foundPtr);
 extern void MarkLocalBufferDirty(Buffer buffer);
 extern void DropRelationLocalBuffers(RelFileLocator rlocator,
 									 ForkNumber forkNum,
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index e953d1f515..c994da9f97 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1883,11 +1883,12 @@ pg_stat_io| SELECT backend_type,
     writes,
     extends,
     op_bytes,
+    hits,
     evictions,
     reuses,
     fsyncs,
     stats_reset
-   FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, evictions, reuses, fsyncs, stats_reset);
+   FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, hits, evictions, reuses, fsyncs, stats_reset);
 pg_stat_progress_analyze| SELECT s.pid,
     s.datid,
     d.datname,
diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out
index 186c296299..9dc66ef027 100644
--- a/src/test/regress/expected/stats.out
+++ b/src/test/regress/expected/stats.out
@@ -1131,6 +1131,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
 -- - writes of shared buffers to permanent storage
 -- - extends of relations using shared buffers
 -- - fsyncs done to ensure the durability of data dirtying shared buffers
+-- - shared buffer hits
 -- There is no test for blocks evicted from shared buffers, because we cannot
 -- be sure of the state of shared buffers at the point the test is run.
 -- Create a regular table and insert some data to generate IOCONTEXT_NORMAL
@@ -1208,6 +1209,28 @@ SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;
  t
 (1 row)
 
+SELECT sum(hits) AS io_sum_shared_before_hits
+  FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
+SELECT COUNT(*) FROM test_io_shared;
+ count 
+-------
+   100
+(1 row)
+
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
+(1 row)
+
+SELECT sum(hits) AS io_sum_shared_after_hits
+  FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
+SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits;
+ ?column? 
+----------
+ t
+(1 row)
+
 DROP TABLE test_io_shared;
 -- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io:
 -- - eviction of local buffers in order to reuse them
@@ -1342,7 +1365,7 @@ SELECT pg_stat_have_stats('io', 0, 0);
  t
 (1 row)
 
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
   FROM pg_stat_io \gset
 SELECT pg_stat_reset_shared('io');
  pg_stat_reset_shared 
@@ -1350,7 +1373,7 @@ SELECT pg_stat_reset_shared('io');
  
 (1 row)
 
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
   FROM pg_stat_io \gset
 SELECT :io_stats_post_reset < :io_stats_pre_reset;
  ?column? 
diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql
index d7f873cfc9..b7f0aafe4f 100644
--- a/src/test/regress/sql/stats.sql
+++ b/src/test/regress/sql/stats.sql
@@ -541,6 +541,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
 -- - writes of shared buffers to permanent storage
 -- - extends of relations using shared buffers
 -- - fsyncs done to ensure the durability of data dirtying shared buffers
+-- - shared buffer hits
 
 -- There is no test for blocks evicted from shared buffers, because we cannot
 -- be sure of the state of shared buffers at the point the test is run.
@@ -588,6 +589,15 @@ SELECT pg_stat_force_next_flush();
 SELECT sum(reads) AS io_sum_shared_after_reads
   FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation'  \gset
 SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;
+
+SELECT sum(hits) AS io_sum_shared_before_hits
+  FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
+SELECT COUNT(*) FROM test_io_shared;
+SELECT pg_stat_force_next_flush();
+SELECT sum(hits) AS io_sum_shared_after_hits
+  FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
+SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits;
+
 DROP TABLE test_io_shared;
 
 -- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io:
@@ -675,10 +685,10 @@ SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_ext
 
 -- Test IO stats reset
 SELECT pg_stat_have_stats('io', 0, 0);
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
   FROM pg_stat_io \gset
 SELECT pg_stat_reset_shared('io');
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
   FROM pg_stat_io \gset
 SELECT :io_stats_post_reset < :io_stats_pre_reset;
 
-- 
2.37.2

