From 094bc124baea085a721f0013da290f3b92f96607 Mon Sep 17 00:00:00 2001
From: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Date: Mon, 6 Jan 2025 10:00:00 +0000
Subject: [PATCH v5 4/4] per backend WAL statistics

Now that commit 9aea73fc61 added backend-level statistics to pgstats (and
per backend IO statistics) we can more easily add per backend statistics.

This commit adds per backend WAL statistics using the same layer as pg_stat_wal,
except that it is now possible to know how much WAL activity is happening in each
backend rather than an overall aggregate of all the activity.  A function called
pg_stat_get_backend_wal() is added to access this data depending on the
PID of a backend.

Note that allow_critical_section has been enabled for the PGSTAT_KIND_BACKEND
stats kind as we are now tracking variable-numbered stats in critical section(s)
(XLogWrite() for example).

The same limitation as in 9aea73fc61 persists, meaning that Auxiliary processes
are not included in this set of statistics.

XXX: bump catalog version
---
 doc/src/sgml/config.sgml                    |  4 +-
 doc/src/sgml/monitoring.sgml                | 19 +++++++
 src/backend/access/transam/xlog.c           | 44 +++++++++++++--
 src/backend/utils/activity/pgstat.c         |  2 +-
 src/backend/utils/activity/pgstat_backend.c | 59 +++++++++++++++++++++
 src/backend/utils/activity/pgstat_wal.c     |  2 +
 src/backend/utils/adt/pgstatfuncs.c         | 53 +++++++++++++++++-
 src/include/catalog/pg_proc.dat             |  7 +++
 src/include/pgstat.h                        | 43 ++++++++-------
 src/include/utils/pgstat_internal.h         |  3 +-
 src/test/regress/expected/stats.out         | 14 +++++
 src/test/regress/sql/stats.sql              |  6 +++
 12 files changed, 227 insertions(+), 29 deletions(-)
  14.2% doc/src/sgml/
  14.9% src/backend/access/transam/
  28.8% src/backend/utils/activity/
  19.7% src/backend/utils/adt/
   7.0% src/include/catalog/
   5.1% src/include/
   5.6% src/test/regress/expected/
   4.3% src/test/regress/sql/

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 3f41a17b1fe..075cb4fe1a8 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8292,7 +8292,9 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
         measure the overhead of timing on your system.
         I/O timing information is
         displayed in <link linkend="monitoring-pg-stat-wal-view">
-        <structname>pg_stat_wal</structname></link>.
+        <structname>pg_stat_wal</structname></link> and in the output of the
+        <link linkend="pg-stat-get-backend-wal">
+        <function>pg_stat_get_backend_wal()</function></link> function.
         Only superusers and users with the appropriate <literal>SET</literal>
         privilege can change this setting.
        </para>
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index e5888fae2b5..8fdf7d13f21 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -4824,6 +4824,25 @@ description | Waiting for a newly initialized WAL file to reach durable storage
        </para></entry>
       </row>
 
+      <row>
+       <entry id="pg-stat-get-backend-wal" role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_stat_get_backend_wal</primary>
+        </indexterm>
+        <function>pg_stat_get_backend_wal</function> ( <type>integer</type> )
+        <returnvalue>record</returnvalue>
+       </para>
+       <para>
+        Returns WAL statistics about the backend with the specified
+        process ID. The output fields are exactly the same as the ones in the
+        <structname>pg_stat_wal</structname> view.
+       </para>
+       <para>
+        The function does not return WAL statistics for the checkpointer,
+        the background writer, the startup process and the autovacuum launcher.
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index bf3dbda901d..d09a079bcbb 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -96,6 +96,7 @@
 #include "utils/guc_hooks.h"
 #include "utils/guc_tables.h"
 #include "utils/injection_point.h"
+#include "utils/pgstat_internal.h"
 #include "utils/ps_status.h"
 #include "utils/relmapper.h"
 #include "utils/snapmgr.h"
@@ -2058,6 +2059,15 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 					XLogWrite(WriteRqst, tli, false);
 					LWLockRelease(WALWriteLock);
 					PendingWalStats.wal_buffers_full++;
+
+					if (pgstat_tracks_backend_bktype(MyBackendType))
+					{
+						PgStat_BackendPending *entry_ref;
+
+						entry_ref = pgstat_prep_backend_pending(MyProcNumber);
+						entry_ref->pending_wal.wal_buffers_full++;
+					}
+
 					TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
 				}
 				/* Re-acquire WALBufMappingLock and retry */
@@ -2426,11 +2436,14 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 			Size		nleft;
 			ssize_t		written;
 			instr_time	start;
+			instr_time	end;
 
 			/* OK to write the page(s) */
 			from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
 			nbytes = npages * (Size) XLOG_BLCKSZ;
 			nleft = nbytes;
+			/* keep compiler quiet */
+			INSTR_TIME_SET_ZERO(end);
 			do
 			{
 				errno = 0;
@@ -2451,14 +2464,24 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 				 */
 				if (track_wal_io_timing)
 				{
-					instr_time	end;
-
 					INSTR_TIME_SET_CURRENT(end);
 					INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_write_time, end, start);
 				}
 
 				PendingWalStats.wal_write++;
 
+				if (pgstat_tracks_backend_bktype(MyBackendType))
+				{
+					PgStat_BackendPending *entry_ref;
+
+					entry_ref = pgstat_prep_backend_pending(MyProcNumber);
+					entry_ref->pending_wal.wal_write++;
+
+					if (track_wal_io_timing)
+						INSTR_TIME_ACCUM_DIFF(entry_ref->pending_wal.wal_write_time,
+											  end, start);
+				}
+
 				if (written <= 0)
 				{
 					char		xlogfname[MAXFNAMELEN];
@@ -8684,8 +8707,11 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 {
 	char	   *msg = NULL;
 	instr_time	start;
+	instr_time	end;
 
 	Assert(tli != 0);
+	/* keep compiler quiet */
+	INSTR_TIME_SET_ZERO(end);
 
 	/*
 	 * Quick exit if fsync is disabled or write() has already synced the WAL
@@ -8751,13 +8777,23 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 	 */
 	if (track_wal_io_timing)
 	{
-		instr_time	end;
-
 		INSTR_TIME_SET_CURRENT(end);
 		INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_sync_time, end, start);
 	}
 
 	PendingWalStats.wal_sync++;
+
+	if (pgstat_tracks_backend_bktype(MyBackendType))
+	{
+		PgStat_BackendPending *entry_ref;
+
+		entry_ref = pgstat_prep_backend_pending(MyProcNumber);
+		entry_ref->pending_wal.wal_sync++;
+
+		if (track_wal_io_timing)
+			INSTR_TIME_ACCUM_DIFF(entry_ref->pending_wal.wal_sync_time,
+								  end, start);
+	}
 }
 
 /*
diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index a942e04bb97..65b2f687816 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -369,7 +369,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = false,
-		.allow_critical_section = false,
+		.allow_critical_section = true,
 
 		.accessed_across_databases = true,
 
diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c
index 79e4d0a3053..94762b12f8d 100644
--- a/src/backend/utils/activity/pgstat_backend.c
+++ b/src/backend/utils/activity/pgstat_backend.c
@@ -24,6 +24,14 @@
 
 #include "utils/pgstat_internal.h"
 
+/*
+ * WAL usage counters saved from pgWalUsage at the previous call to
+ * pgstat_report_wal(). This is used to calculate how much WAL usage
+ * happens between pgstat_report_wal() calls, by subtracting
+ * the previous counters from the current ones.
+ */
+static WalUsage prevBackendWalUsage;
+
 /*
  * Returns statistics of a backend by proc number.
  */
@@ -77,6 +85,52 @@ pgstat_flush_backend_entry_io(PgStat_EntryRef *entry_ref)
 	}
 }
 
+/*
+ * Flush out locally pending backend WAL statistics.  Locking is managed
+ * by the caller.
+ */
+static void
+pgstat_flush_backend_entry_wal(PgStat_EntryRef *entry_ref)
+{
+	PgStatShared_Backend *shbackendent;
+	PgStat_BackendPending *pendingent;
+	PgStat_WalCounters *bktype_shstats;
+	PgStat_PendingWalStats pending_wal;
+	WalUsage	wal_usage_diff = {0};
+
+	shbackendent = (PgStatShared_Backend *) entry_ref->shared_stats;
+	pendingent = (PgStat_BackendPending *) entry_ref->pending;
+	bktype_shstats = &shbackendent->stats.wal_stats;
+	pending_wal = pendingent->pending_wal;
+
+	/*
+	 * We don't update the WAL usage portion of the local WalStats elsewhere.
+	 * Calculate how much WAL usage counters were increased by subtracting the
+	 * previous counters from the current ones.
+	 */
+	WalUsageAccumDiff(&wal_usage_diff, &pgWalUsage, &prevBackendWalUsage);
+
+#define WALSTAT_ACC(fld, var_to_add) \
+	(bktype_shstats->fld += var_to_add.fld)
+#define WALSTAT_ACC_INSTR_TIME(fld) \
+	(bktype_shstats->fld += INSTR_TIME_GET_MICROSEC(pending_wal.fld))
+	WALSTAT_ACC(wal_buffers_full, pending_wal);
+	WALSTAT_ACC(wal_write, pending_wal);
+	WALSTAT_ACC(wal_sync, pending_wal);
+	WALSTAT_ACC(wal_records, wal_usage_diff);
+	WALSTAT_ACC(wal_fpi, wal_usage_diff);
+	WALSTAT_ACC(wal_bytes, wal_usage_diff);
+	WALSTAT_ACC_INSTR_TIME(wal_write_time);
+	WALSTAT_ACC_INSTR_TIME(wal_sync_time);
+#undef WALSTAT_ACC_INSTR_TIME
+#undef WALSTAT_ACC
+
+	/*
+	 * Save the current counters for the subsequent calculation of WAL usage.
+	 */
+	prevBackendWalUsage = pgWalUsage;
+}
+
 /*
  * Wrapper routine to flush backend statistics.
  */
@@ -94,6 +148,9 @@ pgstat_flush_backend_entry(PgStat_EntryRef *entry_ref, bool nowait,
 	if (flags & PGSTAT_BACKEND_FLUSH_IO)
 		pgstat_flush_backend_entry_io(entry_ref);
 
+	if (flags & PGSTAT_BACKEND_FLUSH_WAL)
+		pgstat_flush_backend_entry_wal(entry_ref);
+
 	pgstat_unlock_entry(entry_ref);
 
 	return true;
@@ -147,6 +204,8 @@ pgstat_create_backend(ProcNumber procnum)
 	 * e.g. if we previously used this proc number.
 	 */
 	memset(&shstatent->stats, 0, sizeof(shstatent->stats));
+
+	prevBackendWalUsage = pgWalUsage;
 }
 
 /*
diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c
index bfc06178a68..33627642261 100644
--- a/src/backend/utils/activity/pgstat_wal.c
+++ b/src/backend/utils/activity/pgstat_wal.c
@@ -55,6 +55,8 @@ pgstat_report_wal(bool force)
 	/* flush wal stats */
 	pgstat_flush_wal(nowait);
 
+	pgstat_flush_backend(nowait, PGSTAT_BACKEND_FLUSH_WAL);
+
 	/* flush IO stats */
 	pgstat_flush_io(nowait);
 }
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 97510d48eef..52bfda4f923 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1606,8 +1606,8 @@ pg_stat_get_backend_io(PG_FUNCTION_ARGS)
 /*
  * pg_stat_wal_build_tuple
  *
- * Helper routine for pg_stat_get_wal() returning one tuple based on the contents
- * of wal_counters.
+ * Helper routine for pg_stat_get_wal() and pg_stat_get_backend_wal () returning
+ * one tuple based on the contents of wal_counters.
  */
 static Datum
 pg_stat_wal_build_tuple(PgStat_WalCounters wal_counters,
@@ -1670,6 +1670,55 @@ pg_stat_wal_build_tuple(PgStat_WalCounters wal_counters,
 	PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
 }
 
+/*
+ * Returns WAL statistics for a backend with given PID.
+ */
+Datum
+pg_stat_get_backend_wal(PG_FUNCTION_ARGS)
+{
+	int			pid;
+	PGPROC	   *proc;
+	ProcNumber	procNumber;
+	PgStat_Backend *backend_stats;
+	PgStat_WalCounters bktype_stats;
+	PgBackendStatus *beentry;
+
+	pid = PG_GETARG_INT32(0);
+	proc = BackendPidGetProc(pid);
+
+	/*
+	 * This could be an auxiliary process but these do not report backend
+	 * statistics due to pgstat_tracks_backend_bktype(), so there is no need
+	 * for an extra call to AuxiliaryPidGetProc().
+	 */
+	if (!proc)
+		PG_RETURN_NULL();
+
+	procNumber = GetNumberFromPGProc(proc);
+
+	beentry = pgstat_get_beentry_by_proc_number(procNumber);
+	if (!beentry)
+		PG_RETURN_NULL();
+
+	backend_stats = pgstat_fetch_stat_backend(procNumber);
+	if (!backend_stats)
+		PG_RETURN_NULL();
+
+	/* if PID does not match, leave */
+	if (beentry->st_procpid != pid)
+		PG_RETURN_NULL();
+
+	/* backend may be gone, so recheck in case */
+	if (beentry->st_backendType == B_INVALID)
+		PG_RETURN_NULL();
+
+	bktype_stats = backend_stats->wal_stats;
+
+	/* save tuples with data from this PgStat_BktypeIO */
+	return (pg_stat_wal_build_tuple(bktype_stats, backend_stats->stat_reset_timestamp));
+}
+
+
 /*
  * Returns statistics of WAL activity
  */
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index ba02ba53b29..9d57c69c152 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5929,6 +5929,13 @@
   proargmodes => '{o,o,o,o,o,o,o,o,o}',
   proargnames => '{wal_records,wal_fpi,wal_bytes,wal_buffers_full,wal_write,wal_sync,wal_write_time,wal_sync_time,stats_reset}',
   prosrc => 'pg_stat_get_wal' },
+{ oid => '8037', descr => 'statistics: backend WAL activity',
+  proname => 'pg_stat_get_backend_wal', provolatile => 'v',
+  proparallel => 'r', prorettype => 'record', proargtypes => 'int4',
+  proallargtypes => '{int4,int8,int8,numeric,int8,int8,int8,float8,float8,timestamptz}',
+  proargmodes => '{i,o,o,o,o,o,o,o,o,o}',
+  proargnames => '{backend_pid,wal_records,wal_fpi,wal_bytes,wal_buffers_full,wal_write,wal_sync,wal_write_time,wal_sync_time,stats_reset}',
+  prosrc => 'pg_stat_get_backend_wal' },
 { oid => '6248', descr => 'statistics: information about WAL prefetching',
   proname => 'pg_stat_get_recovery_prefetch', prorows => '1', proretset => 't',
   provolatile => 'v', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index bb8e0044a47..821104047a0 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -334,24 +334,6 @@ typedef struct PgStat_IO
 	PgStat_BktypeIO stats[BACKEND_NUM_TYPES];
 } PgStat_IO;
 
-typedef struct PgStat_Backend
-{
-	TimestampTz stat_reset_timestamp;
-	PgStat_BktypeIO io_stats;
-} PgStat_Backend;
-
-/* ---------
- * PgStat_BackendPending	Non-flushed backend stats.
- * ---------
- */
-typedef struct PgStat_BackendPending
-{
-	/*
-	 * Backend statistics store the same amount of IO data as PGSTAT_KIND_IO.
-	 */
-	PgStat_PendingIO pending_io;
-} PgStat_BackendPending;
-
 typedef struct PgStat_StatDBEntry
 {
 	PgStat_Counter xact_commit;
@@ -496,6 +478,29 @@ typedef struct PgStat_PendingWalStats
 	instr_time	wal_sync_time;
 } PgStat_PendingWalStats;
 
+/* ---------
+ * PgStat_BackendPending	Non-flushed backend stats.
+ * ---------
+ */
+typedef struct PgStat_BackendPending
+{
+	/*
+	 * Backend statistics store the same amount of IO data as PGSTAT_KIND_IO.
+	 */
+	PgStat_PendingIO pending_io;
+
+	/*
+	 * Backend statistics store the same amount of IO data as PGSTAT_KIND_WAL.
+	 */
+	PgStat_PendingWalStats pending_wal;
+} PgStat_BackendPending;
+
+typedef struct PgStat_Backend
+{
+	TimestampTz stat_reset_timestamp;
+	PgStat_BktypeIO io_stats;
+	PgStat_WalCounters wal_stats;
+} PgStat_Backend;
 
 /*
  * Functions in pgstat.c
@@ -826,6 +831,4 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause;
 
 /* updated directly by backends and background processes */
 extern PGDLLIMPORT PgStat_PendingWalStats PendingWalStats;
-
-
 #endif							/* PGSTAT_H */
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 0b21603c863..5a284d8c94a 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -618,7 +618,8 @@ extern void pgstat_archiver_snapshot_cb(void);
 
 /* flags for pgstat_flush_backend() */
 #define PGSTAT_BACKEND_FLUSH_IO		(1 << 0)	/* Flush I/O statistics */
-#define PGSTAT_BACKEND_FLUSH_ALL	(PGSTAT_BACKEND_FLUSH_IO)
+#define PGSTAT_BACKEND_FLUSH_WAL	(1 << 1)	/* Flush WAL statistics */
+#define PGSTAT_BACKEND_FLUSH_ALL	(PGSTAT_BACKEND_FLUSH_IO | PGSTAT_BACKEND_FLUSH_WAL)
 
 extern void pgstat_flush_backend(bool nowait, bits32 flags);
 extern PgStat_BackendPending *pgstat_prep_backend_pending(ProcNumber procnum);
diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out
index a0317b7208e..cc01fdf2741 100644
--- a/src/test/regress/expected/stats.out
+++ b/src/test/regress/expected/stats.out
@@ -832,6 +832,8 @@ SELECT sessions > :db_stat_sessions FROM pg_stat_database WHERE datname = (SELEC
 SELECT num_requested AS rqst_ckpts_before FROM pg_stat_checkpointer \gset
 -- Test pg_stat_wal (and make a temp table so our temp schema exists)
 SELECT wal_bytes AS wal_bytes_before FROM pg_stat_wal \gset
+-- Test pg_stat_get_backend_wal (and make a temp table so our temp schema exists)
+SELECT wal_bytes AS backend_wal_bytes_before from pg_stat_get_backend_wal(pg_backend_pid()) \gset
 CREATE TEMP TABLE test_stats_temp AS SELECT 17;
 DROP TABLE test_stats_temp;
 -- Checkpoint twice: The checkpointer reports stats after reporting completion
@@ -851,6 +853,18 @@ SELECT wal_bytes > :wal_bytes_before FROM pg_stat_wal;
  t
 (1 row)
 
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
+(1 row)
+
+SELECT wal_bytes > :backend_wal_bytes_before FROM pg_stat_get_backend_wal(pg_backend_pid());
+ ?column? 
+----------
+ t
+(1 row)
+
 -- Test pg_stat_get_backend_idset() and some allied functions.
 -- In particular, verify that their notion of backend ID matches
 -- our temp schema index.
diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql
index 399c72bbcf7..28fe0a1a7d0 100644
--- a/src/test/regress/sql/stats.sql
+++ b/src/test/regress/sql/stats.sql
@@ -423,6 +423,9 @@ SELECT num_requested AS rqst_ckpts_before FROM pg_stat_checkpointer \gset
 -- Test pg_stat_wal (and make a temp table so our temp schema exists)
 SELECT wal_bytes AS wal_bytes_before FROM pg_stat_wal \gset
 
+-- Test pg_stat_get_backend_wal (and make a temp table so our temp schema exists)
+SELECT wal_bytes AS backend_wal_bytes_before from pg_stat_get_backend_wal(pg_backend_pid()) \gset
+
 CREATE TEMP TABLE test_stats_temp AS SELECT 17;
 DROP TABLE test_stats_temp;
 
@@ -435,6 +438,9 @@ CHECKPOINT;
 SELECT num_requested > :rqst_ckpts_before FROM pg_stat_checkpointer;
 SELECT wal_bytes > :wal_bytes_before FROM pg_stat_wal;
 
+SELECT pg_stat_force_next_flush();
+SELECT wal_bytes > :backend_wal_bytes_before FROM pg_stat_get_backend_wal(pg_backend_pid());
+
 -- Test pg_stat_get_backend_idset() and some allied functions.
 -- In particular, verify that their notion of backend ID matches
 -- our temp schema index.
-- 
2.34.1

