From 47a36e4a17817e56c6bebd0e2e53c338b35d267b Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 24 Nov 2021 12:07:37 -0500
Subject: [PATCH 6/8] Add system view tracking IO ops per backend type

Add pg_stat_buffers, a system view which tracks the number of IO
operations (allocs, writes, fsyncs, and extends) done through each IO
path (e.g. shared buffers, local buffers, unbuffered IO) by each type of
backend.

Some of these should always be zero. For example, checkpointer does not
use a BufferAccessStrategy (currently), so the "strategy" IO Path for
checkpointer will be 0 for all IO operations (alloc, write, fsync, and
extend). All possible combinations of IOPath and IOOp are enumerated in
the view but not all are populated or even possible at this point.

All backends increment a counter in an array of IO stat counters in
their PgBackendStatus when performing an IO operation. On exit, backends
send these stats to the stats collector to be persisted.

When the pg_stat_buffers view is queried, one backend will sum live
backends' stats with saved stats from exited backends and subtract saved
reset stats, returning the total.

Each row of the view is stats for a particular backend type for a
particular IO Path (e.g. shared buffer accesses by checkpointer) and
each column in the view is the total number of IO operations done (e.g.
writes).
So a cell in the view would be, for example, the number of shared
buffers written by checkpointer since the last stats reset.

Suggested by Andres Freund

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Justin Pryzby <pryzby@telsasoft.com>
Discussion: https://www.postgresql.org/message-id/flat/20200124195226.lth52iydq2n2uilq%40alap3.anarazel.de
---
 doc/src/sgml/monitoring.sgml                | 119 +++++++++++++++-
 src/backend/catalog/system_views.sql        |  11 ++
 src/backend/postmaster/pgstat.c             |  12 ++
 src/backend/utils/activity/backend_status.c |  19 ++-
 src/backend/utils/adt/pgstatfuncs.c         | 150 ++++++++++++++++++++
 src/include/catalog/pg_proc.dat             |   9 ++
 src/include/pgstat.h                        |  11 ++
 src/include/utils/backend_status.h          |   1 +
 src/test/regress/expected/rules.out         |   8 ++
 9 files changed, 332 insertions(+), 8 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index dc07cc15f53..e73783fe116 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -435,6 +435,15 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
      </entry>
      </row>
 
+     <row>
+      <entry><structname>pg_stat_buffers</structname><indexterm><primary>pg_stat_buffers</primary></indexterm></entry>
+      <entry>A row for each IO path for each backend type showing
+      statistics about backend IO operations. See
+       <link linkend="monitoring-pg-stat-buffers-view">
+       <structname>pg_stat_buffers</structname></link> for details.
+     </entry>
+     </row>
+
      <row>
       <entry><structname>pg_stat_wal</structname><indexterm><primary>pg_stat_wal</primary></indexterm></entry>
       <entry>One row only, showing statistics about WAL activity. See
@@ -3581,7 +3590,102 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
        <structfield>stats_reset</structfield> <type>timestamp with time zone</type>
       </para>
       <para>
-       Time at which these statistics were last reset
+       Time at which these statistics were last reset.
+      </para></entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+ </sect2>
+
+ <sect2 id="monitoring-pg-stat-buffers-view">
+  <title><structname>pg_stat_buffers</structname></title>
+
+  <indexterm>
+   <primary>pg_stat_buffers</primary>
+  </indexterm>
+
+  <para>
+   The <structname>pg_stat_buffers</structname> view has a row for each backend
+   type for each possible IO path containing global data for the cluster for
+   that backend and IO path.
+  </para>
+
+  <table id="pg-stat-buffers-view" xreflabel="pg_stat_buffers">
+   <title><structname>pg_stat_buffers</structname> View</title>
+   <tgroup cols="1">
+    <thead>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       Column Type
+      </para>
+      <para>
+       Description
+      </para></entry>
+     </row>
+    </thead>
+    <tbody>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>backend_type</structfield> <type>text</type>
+      </para>
+      <para>
+       Type of backend (e.g. background worker, autovacuum worker).
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>io_path</structfield> <type>text</type>
+      </para>
+      <para>
+       IO path taken (e.g. shared buffers, direct).
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>alloc</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of buffers allocated.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>extend</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of buffers extended.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>fsync</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of buffers fsynced.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>write</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of buffers written.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>stats_reset</structfield> <type>timestamp with time zone</type>
+      </para>
+      <para>
+       Time at which these statistics were last reset.
       </para></entry>
      </row>
     </tbody>
@@ -5186,12 +5290,13 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
        </para>
        <para>
         Resets some cluster-wide statistics counters to zero, depending on the
-        argument.  The argument can be <literal>buffers</literal> to reset
-        all the counters shown in
-        the <structname>pg_stat_bgwriter</structname>
-        view, <literal>archiver</literal> to reset all the counters shown in
-        the <structname>pg_stat_archiver</structname> view or <literal>wal</literal>
-        to reset all the counters shown in the <structname>pg_stat_wal</structname> view.
+        argument.  The argument can be <literal>archiver</literal> to reset all
+        the counters shown in the <structname>pg_stat_archiver</structname>
+        view, <literal>buffers</literal> to reset all the counters shown in
+        both the <structname>pg_stat_bgwriter</structname> view and
+        <structname>pg_stat_buffers</structname> view, or
+        <literal>wal</literal> to reset all the counters shown in the
+        <structname>pg_stat_wal</structname> view.
        </para>
        <para>
         This function is restricted to superusers by default, but other users
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 9eaa51df290..b6cfe3d3f93 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1103,6 +1103,17 @@ CREATE VIEW pg_stat_bgwriter AS
         pg_stat_get_buf_alloc() AS buffers_alloc,
         pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
 
+CREATE VIEW pg_stat_buffers AS
+SELECT
+       b.backend_type,
+       b.io_path,
+       b.alloc,
+       b.extend,
+       b.fsync,
+       b.write,
+       b.stats_reset
+FROM pg_stat_get_buffers() b;
+
 CREATE VIEW pg_stat_wal AS
     SELECT
         w.wal_records,
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 57a7f0fa7e9..a48998fe944 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -1363,6 +1363,18 @@ pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time, Oid databas
 	pgstat_send(&msg, sizeof(msg));
 }
 
+/*
+ *     Support function for SQL-callable pgstat* functions. Returns a pointer to
+ *     the PgStat_BackendIOPathOps structure tracking IO operations statistics for
+ *     both exited backends and reset arithmetic.
+ */
+PgStat_BackendIOPathOps *
+pgstat_fetch_exited_backend_buffers(void)
+{
+	backend_read_statsfile();
+	return &globalStats.buffers;
+}
+
 /*
  * Support function for the SQL-callable pgstat* functions. Returns
  * the collected statistics for one database or NULL. NULL doesn't mean
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
index 87b9d0fc0d8..c579014ec25 100644
--- a/src/backend/utils/activity/backend_status.c
+++ b/src/backend/utils/activity/backend_status.c
@@ -38,7 +38,7 @@ int			pgstat_track_activity_query_size = 1024;
 PgBackendStatus *MyBEEntry = NULL;
 
 
-static PgBackendStatus *BackendStatusArray = NULL;
+PgBackendStatus *BackendStatusArray = NULL;
 static char *BackendAppnameBuffer = NULL;
 static char *BackendClientHostnameBuffer = NULL;
 static char *BackendActivityBuffer = NULL;
@@ -241,6 +241,23 @@ CreateSharedBackendStatus(void)
 #endif
 }
 
+const char *
+GetIOPathDesc(IOPath io_path)
+{
+	switch (io_path)
+	{
+		case IOPATH_DIRECT:
+			return "direct";
+		case IOPATH_LOCAL:
+			return "local";
+		case IOPATH_SHARED:
+			return "shared";
+		case IOPATH_STRATEGY:
+			return "strategy";
+	}
+	return "unknown IO path";
+}
+
 /*
  * Initialize pgstats backend activity state, and set up our on-proc-exit
  * hook.  Called from InitPostgres and AuxiliaryProcessMain. For auxiliary
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index ce84525d402..e1213a9ad03 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1739,6 +1739,156 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
 	PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->buf_alloc);
 }
 
+/*
+* When adding a new column to the pg_stat_buffers view, add a new enum
+* value here above BUFFERS_NUM_COLUMNS.
+*/
+enum
+{
+	BUFFERS_COLUMN_BACKEND_TYPE,
+	BUFFERS_COLUMN_IO_PATH,
+	BUFFERS_COLUMN_ALLOCS,
+	BUFFERS_COLUMN_EXTENDS,
+	BUFFERS_COLUMN_FSYNCS,
+	BUFFERS_COLUMN_WRITES,
+	BUFFERS_COLUMN_RESET_TIME,
+	BUFFERS_NUM_COLUMNS,
+};
+
+/*
+ * Helper function to get the correct row in the pg_stat_buffers view.
+ */
+static inline Datum *
+get_pg_stat_buffers_row(Datum all_values[BACKEND_NUM_TYPES][IOPATH_NUM_TYPES][BUFFERS_NUM_COLUMNS],
+		BackendType backend_type, IOPath io_path)
+{
+	return all_values[backend_type_get_idx(backend_type)][io_path];
+}
+
+Datum
+pg_stat_get_buffers(PG_FUNCTION_ARGS)
+{
+	PgStat_BackendIOPathOps *backend_io_path_ops;
+	PgBackendStatus *beentry;
+	Datum		reset_time;
+
+	ReturnSetInfo *rsinfo;
+	TupleDesc	tupdesc;
+	Tuplestorestate *tupstore;
+	MemoryContext per_query_ctx;
+	MemoryContext oldcontext;
+
+	Datum		all_values[BACKEND_NUM_TYPES][IOPATH_NUM_TYPES][BUFFERS_NUM_COLUMNS];
+	bool		all_nulls[BACKEND_NUM_TYPES][IOPATH_NUM_TYPES][BUFFERS_NUM_COLUMNS];
+
+	rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+
+	/* check to see if caller supports us returning a tuplestore */
+	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("set-valued function called in context that cannot accept a set")));
+	if (!(rsinfo->allowedModes & SFRM_Materialize))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("materialize mode required, but it is not allowed in this context")));
+
+	/* Build a tuple descriptor for our result type */
+	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+		elog(ERROR, "return type must be a row type");
+
+	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
+	oldcontext = MemoryContextSwitchTo(per_query_ctx);
+	tupstore = tuplestore_begin_heap((bool) (rsinfo->allowedModes & SFRM_Materialize_Random),
+			false, work_mem);
+	rsinfo->returnMode = SFRM_Materialize;
+	rsinfo->setResult = tupstore;
+	rsinfo->setDesc = tupdesc;
+	MemoryContextSwitchTo(oldcontext);
+
+	memset(all_values, 0, sizeof(all_values));
+	memset(all_nulls, 0, sizeof(all_nulls));
+
+	 /* Loop through all live backends and count their IO Ops for each IO Path */
+	beentry = BackendStatusArray;
+
+	for (int i = 0; i < GetMaxBackends() + NUM_AUXPROCTYPES; i++, beentry++)
+	{
+		IOOpCounters   *io_ops;
+
+		/*
+		 * Don't count dead backends. They will be added below. There are no
+		 * rows in the view for BackendType B_INVALID, so skip those as well.
+		 */
+		if (beentry->st_procpid == 0 || beentry->st_backendType == B_INVALID)
+			continue;
+
+		io_ops = beentry->io_path_stats;
+
+		for (int i = 0; i < IOPATH_NUM_TYPES; i++)
+		{
+			Datum *row = get_pg_stat_buffers_row(all_values, beentry->st_backendType, i);
+
+			/*
+			 * BUFFERS_COLUMN_RESET_TIME, BUFFERS_COLUMN_BACKEND_TYPE, and
+			 * BUFFERS_COLUMN_IO_PATH will all be set when looping through
+			 * exited backends array
+			 */
+			row[BUFFERS_COLUMN_ALLOCS] += pg_atomic_read_u64(&io_ops->allocs);
+			row[BUFFERS_COLUMN_EXTENDS] += pg_atomic_read_u64(&io_ops->extends);
+			row[BUFFERS_COLUMN_FSYNCS] += pg_atomic_read_u64(&io_ops->fsyncs);
+			row[BUFFERS_COLUMN_WRITES] += pg_atomic_read_u64(&io_ops->writes);
+			io_ops++;
+		}
+	}
+
+	/* Add stats from all exited backends */
+	backend_io_path_ops = pgstat_fetch_exited_backend_buffers();
+
+	reset_time = TimestampTzGetDatum(backend_io_path_ops->stat_reset_timestamp);
+
+	for (int i = 0; i < BACKEND_NUM_TYPES; i++)
+	{
+		BackendType backend_type = idx_get_backend_type(i);
+
+		PgStatIOOpCounters *io_ops =
+			backend_io_path_ops->ops[i].io_path_ops;
+		PgStatIOOpCounters *resets =
+			backend_io_path_ops->resets[i].io_path_ops;
+
+		Datum		backend_type_desc =
+			CStringGetTextDatum(GetBackendTypeDesc(backend_type));
+
+		for (int j = 0; j < IOPATH_NUM_TYPES; j++)
+		{
+			Datum *row = get_pg_stat_buffers_row(all_values, backend_type, j);
+
+			row[BUFFERS_COLUMN_BACKEND_TYPE] = backend_type_desc;
+			row[BUFFERS_COLUMN_IO_PATH] = CStringGetTextDatum(GetIOPathDesc(j));
+			row[BUFFERS_COLUMN_RESET_TIME] = reset_time;
+			row[BUFFERS_COLUMN_ALLOCS] += io_ops->allocs - resets->allocs;
+			row[BUFFERS_COLUMN_EXTENDS] += io_ops->extends - resets->extends;
+			row[BUFFERS_COLUMN_FSYNCS] += io_ops->fsyncs - resets->fsyncs;
+			row[BUFFERS_COLUMN_WRITES] += io_ops->writes - resets->writes;
+			io_ops++;
+			resets++;
+		}
+	}
+
+	for (int i = 0; i < BACKEND_NUM_TYPES; i++)
+	{
+		for (int j = 0; j < IOPATH_NUM_TYPES; j++)
+		{
+			Datum	   *values = all_values[i][j];
+			bool	   *nulls = all_nulls[i][j];
+
+			tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+		}
+	}
+
+	return (Datum) 0;
+}
+
 /*
  * Returns statistics of WAL activity
  */
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 25304430f44..2a3f11c26e9 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5641,6 +5641,15 @@
   proname => 'pg_stat_get_buf_alloc', provolatile => 's', proparallel => 'r',
   prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_buf_alloc' },
 
+{ oid => '8459', descr => 'statistics: counts of all IO operations done through all IO paths by each type of backend.',
+  proname => 'pg_stat_get_buffers', provolatile => 's', proisstrict => 'f',
+  prorows => '52', proretset => 't',
+  proparallel => 'r', prorettype => 'record', proargtypes => '',
+  proallargtypes => '{text,text,int8,int8,int8,int8,timestamptz}',
+  proargmodes => '{o,o,o,o,o,o,o}',
+  proargnames => '{backend_type,io_path,alloc,extend,fsync,write,stats_reset}',
+  prosrc => 'pg_stat_get_buffers' },
+
 { oid => '1136', descr => 'statistics: information about WAL activity',
   proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's',
   proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 9e93580f680..ca96f7c3e4c 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -1207,6 +1207,17 @@ extern void pgstat_sum_io_path_ops(PgStatIOOpCounters *dest, IOOpCounters *src);
 /*
  * Functions in pgstat_replslot.c
  */
+extern PgStat_BackendIOPathOps *pgstat_fetch_exited_backend_buffers(void);
+extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid);
+extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid);
+extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid);
+extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void);
+extern PgStat_BgWriterStats *pgstat_fetch_stat_bgwriter(void);
+extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void);
+extern PgStat_GlobalStats *pgstat_fetch_global(void);
+extern PgStat_WalStats *pgstat_fetch_stat_wal(void);
+extern PgStat_SLRUStats *pgstat_fetch_slru(void);
+extern PgStat_StatReplSlotEntry *pgstat_fetch_replslot(NameData slotname);
 
 extern void pgstat_reset_replslot_counter(const char *name);
 extern void pgstat_report_replslot(const PgStat_StatReplSlotEntry *repSlotStat);
diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h
index 7e59c063b94..6d623ff7469 100644
--- a/src/include/utils/backend_status.h
+++ b/src/include/utils/backend_status.h
@@ -316,6 +316,7 @@ extern PGDLLIMPORT int pgstat_track_activity_query_size;
  * ----------
  */
 extern PGDLLIMPORT PgBackendStatus *MyBEEntry;
+extern PGDLLIMPORT PgBackendStatus *BackendStatusArray;
 
 
 /* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 423b9b99fb6..39d0ed45642 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1820,6 +1820,14 @@ pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints
     pg_stat_get_buf_fsync_backend() AS buffers_backend_fsync,
     pg_stat_get_buf_alloc() AS buffers_alloc,
     pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
+pg_stat_buffers| SELECT b.backend_type,
+    b.io_path,
+    b.alloc,
+    b.extend,
+    b.fsync,
+    b.write,
+    b.stats_reset
+   FROM pg_stat_get_buffers() b(backend_type, io_path, alloc, extend, fsync, write, stats_reset);
 pg_stat_database| SELECT d.oid AS datid,
     d.datname,
         CASE
-- 
2.17.1

