From 99561e65c3e266157ac21588097c8222a1df1d28 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Mon, 20 Jan 2025 15:31:57 +0900
Subject: [PATCH v2 2/2] More work to allow backend statistics in critical
 sections

Callbacks are refactored so as variable-numbered stats can use them if
they don't want to use pending data, like the backend stats.
---
 src/include/pgstat.h                         |  21 ++-
 src/include/utils/pgstat_internal.h          |  29 ++--
 src/backend/utils/activity/pgstat.c          |  67 +++------
 src/backend/utils/activity/pgstat_backend.c  | 142 ++++++++++++-------
 src/backend/utils/activity/pgstat_io.c       |  15 +-
 src/backend/utils/activity/pgstat_relation.c |   4 +-
 6 files changed, 140 insertions(+), 138 deletions(-)

diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 3eb7b6614f..d0d4515097 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -540,6 +540,15 @@ extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void);
  * Functions in pgstat_backend.c
  */
 
+/* used by pgstat_io.c for I/O stats tracked in backends */
+extern void pgstat_count_backend_io_op_time(IOObject io_object,
+											IOContext io_context,
+											IOOp io_op,
+											instr_time io_time);
+extern void pgstat_count_backend_io_op(IOObject io_object,
+									   IOContext io_context,
+									   IOOp io_op, uint32 cnt,
+									   uint64 bytes);
 extern PgStat_Backend *pgstat_fetch_stat_backend(ProcNumber procNumber);
 extern bool pgstat_tracks_backend_bktype(BackendType bktype);
 extern void pgstat_create_backend(ProcNumber procnum);
@@ -826,17 +835,5 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause;
 /* updated directly by backends and background processes */
 extern PGDLLIMPORT PgStat_PendingWalStats PendingWalStats;
 
-/*
- * Variables in pgstat_backend.c
- */
-
-/* updated directly by backends */
-
-/*
- * Some pending statistics are not well suited to memory allocation and have
- * to be outside of critical sections. For those, let's rely on this variable
- * instead.
- */
-extern PGDLLIMPORT PgStat_BackendPending PendingBackendStats;
 
 #endif							/* PGSTAT_H */
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 4bb8e5c53a..000ed5b36f 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -259,8 +259,8 @@ typedef struct PgStat_KindInfo
 	void		(*init_backend_cb) (void);
 
 	/*
-	 * For variable-numbered stats: flush pending stats. Required if pending
-	 * data is used.  See flush_fixed_cb for fixed-numbered stats.
+	 * For variable-numbered stats: flush pending stats within the dshash.
+	 * Required if pending data interacts with the pgstats dshash.
 	 */
 	bool		(*flush_pending_cb) (PgStat_EntryRef *sr, bool nowait);
 
@@ -289,17 +289,19 @@ typedef struct PgStat_KindInfo
 	void		(*init_shmem_cb) (void *stats);
 
 	/*
-	 * For fixed-numbered statistics: Flush pending stats. Returns true if
-	 * some of the stats could not be flushed, due to lock contention for
-	 * example. Optional.
+	 * For fixed-numbered or variable-numbered statistics: Check for pending
+	 * stats in need of flush, when these do not use the pgstats dshash.
+	 * Returns true if there are any stats pending for flush, triggering
+	 * flush_cb. Optional.
 	 */
-	bool		(*flush_fixed_cb) (bool nowait);
+	bool		(*have_pending_cb) (void);
 
 	/*
-	 * For fixed-numbered statistics: Check for pending stats in need of
-	 * flush. Returns true if there are any stats pending for flush. Optional.
+	 * For fixed-numbered or variable-numbered statistics: Flush pending
+	 * stats. Returns true if some of the stats could not be flushed, due
+	 * to lock contention for example. Optional.
 	 */
-	bool		(*have_fixed_pending_cb) (void);
+	bool		(*flush_cb) (bool nowait);
 
 	/*
 	 * For fixed-numbered statistics: Reset All.
@@ -617,10 +619,11 @@ extern void pgstat_archiver_snapshot_cb(void);
 #define PGSTAT_BACKEND_FLUSH_IO		(1 << 0)	/* Flush I/O statistics */
 #define PGSTAT_BACKEND_FLUSH_ALL	(PGSTAT_BACKEND_FLUSH_IO)
 
-extern void pgstat_flush_backend(bool nowait, bits32 flags);
-extern PgStat_BackendPending *pgstat_prep_backend_pending(ProcNumber procnum);
-extern bool pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait);
-extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts);
+extern bool pgstat_flush_backend(bool nowait, bits32 flags);
+extern bool pgstat_backend_flush_cb(bool nowait);
+extern bool pgstat_backend_have_pending_cb(void);
+extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header,
+											  TimestampTz ts);
 
 /*
  * Functions in pgstat_bgwriter.c
diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index 27aa049100..bd1ffad6f4 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -372,7 +372,8 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 		.shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats),
 		.pending_size = sizeof(PgStat_BackendPending),
 
-		.flush_pending_cb = pgstat_backend_flush_cb,
+		.have_pending_cb = pgstat_backend_have_pending_cb,
+		.flush_cb = pgstat_backend_flush_cb,
 		.reset_timestamp_cb = pgstat_backend_reset_timestamp_cb,
 	},
 
@@ -437,8 +438,8 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 		.shared_data_off = offsetof(PgStatShared_IO, stats),
 		.shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
 
-		.flush_fixed_cb = pgstat_io_flush_cb,
-		.have_fixed_pending_cb = pgstat_io_have_pending_cb,
+		.flush_cb = pgstat_io_flush_cb,
+		.have_pending_cb = pgstat_io_have_pending_cb,
 		.init_shmem_cb = pgstat_io_init_shmem_cb,
 		.reset_all_cb = pgstat_io_reset_all_cb,
 		.snapshot_cb = pgstat_io_snapshot_cb,
@@ -455,8 +456,8 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 		.shared_data_off = offsetof(PgStatShared_SLRU, stats),
 		.shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
 
-		.flush_fixed_cb = pgstat_slru_flush_cb,
-		.have_fixed_pending_cb = pgstat_slru_have_pending_cb,
+		.flush_cb = pgstat_slru_flush_cb,
+		.have_pending_cb = pgstat_slru_have_pending_cb,
 		.init_shmem_cb = pgstat_slru_init_shmem_cb,
 		.reset_all_cb = pgstat_slru_reset_all_cb,
 		.snapshot_cb = pgstat_slru_snapshot_cb,
@@ -474,8 +475,8 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 		.shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats),
 
 		.init_backend_cb = pgstat_wal_init_backend_cb,
-		.flush_fixed_cb = pgstat_wal_flush_cb,
-		.have_fixed_pending_cb = pgstat_wal_have_pending_cb,
+		.flush_cb = pgstat_wal_flush_cb,
+		.have_pending_cb = pgstat_wal_have_pending_cb,
 		.init_shmem_cb = pgstat_wal_init_shmem_cb,
 		.reset_all_cb = pgstat_wal_reset_all_cb,
 		.snapshot_cb = pgstat_wal_snapshot_cb,
@@ -713,22 +714,17 @@ pgstat_report_stat(bool force)
 	{
 		bool		do_flush = false;
 
-		/* Check for pending fixed-numbered stats */
+		/* Check for pending stats */
 		for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
 		{
 			const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
 
 			if (!kind_info)
 				continue;
-			if (!kind_info->fixed_amount)
-			{
-				Assert(kind_info->have_fixed_pending_cb == NULL);
-				continue;
-			}
-			if (!kind_info->have_fixed_pending_cb)
+			if (!kind_info->have_pending_cb)
 				continue;
 
-			if (kind_info->have_fixed_pending_cb())
+			if (kind_info->have_pending_cb())
 			{
 				do_flush = true;
 				break;
@@ -792,22 +788,20 @@ pgstat_report_stat(bool force)
 	/* flush of variable-numbered stats */
 	partial_flush |= pgstat_flush_pending_entries(nowait);
 
-	/* flush of fixed-numbered stats */
+	/*
+	 * Flush of other stats, which could be variable-numbered or
+	 * fixed-numbered.
+	 */
 	for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
 	{
 		const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
 
 		if (!kind_info)
 			continue;
-		if (!kind_info->fixed_amount)
-		{
-			Assert(kind_info->flush_fixed_cb == NULL);
-			continue;
-		}
-		if (!kind_info->flush_fixed_cb)
+		if (!kind_info->flush_cb)
 			continue;
 
-		partial_flush |= kind_info->flush_fixed_cb(nowait);
+		partial_flush |= kind_info->flush_cb(nowait);
 	}
 
 	last_flush = now;
@@ -1369,7 +1363,6 @@ static bool
 pgstat_flush_pending_entries(bool nowait)
 {
 	bool		have_pending = false;
-	bool		backend_have_pending = false;
 	dlist_node *cur = NULL;
 
 	/*
@@ -1417,33 +1410,9 @@ pgstat_flush_pending_entries(bool nowait)
 		cur = next;
 	}
 
-	/*
-	 * There is a special case for some pending stats that are tracked in
-	 * PendingBackendStats. It's possible that those have not been flushed
-	 * above, hence the extra check here.
-	 */
-	if (!pg_memory_is_all_zeros(&PendingBackendStats,
-								sizeof(struct PgStat_BackendPending)))
-	{
-		PgStat_EntryRef *entry_ref;
-
-		entry_ref = pgstat_get_entry_ref(PGSTAT_KIND_BACKEND, InvalidOid,
-										 MyProcNumber, false, NULL);
-
-		if (entry_ref)
-		{
-			PgStat_HashKey key = entry_ref->shared_entry->key;
-			PgStat_Kind kind = key.kind;
-			const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
-
-			if (!kind_info->flush_pending_cb(entry_ref, nowait))
-				backend_have_pending = true;
-		}
-	}
-
 	Assert(dlist_is_empty(&pgStatPending) == !have_pending);
 
-	return (have_pending || backend_have_pending);
+	return have_pending;
 }
 
 
diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c
index ca5cec348b..fb7abf64a0 100644
--- a/src/backend/utils/activity/pgstat_backend.c
+++ b/src/backend/utils/activity/pgstat_backend.c
@@ -11,7 +11,9 @@
  * This statistics kind uses a proc number as object ID for the hash table
  * of pgstats.  Entries are created each time a process is spawned, and are
  * dropped when the process exits.  These are not written to the pgstats file
- * on disk.
+ * on disk.  Pending statistics are managed without direct interactions with
+ * the pgstats dshash, relying on PendingBackendStats instead so as it is
+ * possible to report data within critical sections.
  *
  * Copyright (c) 2001-2025, PostgreSQL Global Development Group
  *
@@ -22,10 +24,53 @@
 
 #include "postgres.h"
 
+#include "storage/bufmgr.h"
 #include "utils/memutils.h"
 #include "utils/pgstat_internal.h"
 
-PgStat_BackendPending PendingBackendStats = {0};
+/*
+ * Backend statistics counts waiting to be flushed out.  We assume this variable
+ * inits to zeroes.  These counters may be reported within critical sections so
+ * we use static memory in order to avoid memory allocation.
+ */
+static PgStat_BackendPending PendingBackendStats = {0};
+static bool have_backendstats = false;
+
+/*
+ * Utility routines to report I/O stats for backends, kept here to avoid
+ * exposing PendingBackendStats to the outside world.
+ */
+void
+pgstat_count_backend_io_op_time(IOObject io_object, IOContext io_context,
+								IOOp io_op, instr_time io_time)
+{
+	Assert(track_io_timing);
+
+	if (!pgstat_tracks_backend_bktype(MyBackendType))
+		return;
+
+	Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
+
+	INSTR_TIME_ADD(PendingBackendStats.pending_io.pending_times[io_object][io_context][io_op],
+				   io_time);
+
+	have_backendstats = true;
+}
+
+void
+pgstat_count_backend_io_op(IOObject io_object, IOContext io_context,
+						   IOOp io_op, uint32 cnt, uint64 bytes)
+{
+	if (!pgstat_tracks_backend_bktype(MyBackendType))
+		return;
+
+	Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
+
+	PendingBackendStats.pending_io.counts[io_object][io_context][io_op] += cnt;
+	PendingBackendStats.pending_io.bytes[io_object][io_context][io_op] += bytes;
+
+	have_backendstats = true;
+}
 
 /*
  * Returns statistics of a backend by proc number.
@@ -53,8 +98,9 @@ pgstat_flush_backend_entry_io(PgStat_EntryRef *entry_ref)
 	PgStat_PendingIO pending_io;
 
 	/*
-	 * This function can be called even if nothing at all has happened. In
-	 * this case, avoid unnecessarily modifying the stats entry.
+	 * This function can be called even if nothing at all has happened for
+	 * IO statistics.  In this case, avoid unnecessarily modifying the stats
+	 * entry.
 	 */
 	if (pg_memory_is_all_zeros(&PendingBackendStats.pending_io,
 							   sizeof(struct PgStat_PendingIO)))
@@ -83,25 +129,29 @@ pgstat_flush_backend_entry_io(PgStat_EntryRef *entry_ref)
 			}
 		}
 	}
-
-	/*
-	 * Clear out the statistics buffer, so it can be re-used.
-	 */
-	MemSet(&PendingBackendStats.pending_io, 0, sizeof(PgStat_PendingIO));
 }
 
 /*
- * Wrapper routine to flush backend statistics.
+ * Flush out locally pending backend statistics
+ *
+ * "flags" parameter controls which statistics to flush.  Returns true
+ * if some statistics could not be flushed.
  */
-static bool
-pgstat_flush_backend_entry(PgStat_EntryRef *entry_ref, bool nowait,
-						   bits32 flags)
+bool
+pgstat_flush_backend(bool nowait, bits32 flags)
 {
+	PgStat_EntryRef *entry_ref;
+
+	if (!have_backendstats)
+		return false;
+
 	if (!pgstat_tracks_backend_bktype(MyBackendType))
 		return false;
 
-	if (!pgstat_lock_entry(entry_ref, nowait))
-		return false;
+	entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_BACKEND, InvalidOid,
+											MyProcNumber, nowait);
+	if (!entry_ref)
+		return true;
 
 	/* Flush requested statistics */
 	if (flags & PGSTAT_BACKEND_FLUSH_IO)
@@ -109,36 +159,33 @@ pgstat_flush_backend_entry(PgStat_EntryRef *entry_ref, bool nowait,
 
 	pgstat_unlock_entry(entry_ref);
 
-	return true;
+	/*
+	 * Clear out the statistics buffer, so it can be re-used.
+	 */
+	MemSet(&PendingBackendStats, 0, sizeof(PgStat_BackendPending));
+
+	have_backendstats = false;
+	return false;
+}
+
+/*
+ * Check if there are any backend stats waiting for flush.
+ */
+bool
+pgstat_backend_have_pending_cb(void)
+{
+	return have_backendstats;
 }
 
 /*
  * Callback to flush out locally pending backend statistics.
  *
- * If no stats have been recorded, this function returns false.
+ * If some stats could not be flushed, return true.
  */
 bool
-pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait)
+pgstat_backend_flush_cb(bool nowait)
 {
-	return pgstat_flush_backend_entry(entry_ref, nowait, PGSTAT_BACKEND_FLUSH_ALL);
-}
-
-/*
- * Flush out locally pending backend statistics
- *
- * "flags" parameter controls which statistics to flush.
- */
-void
-pgstat_flush_backend(bool nowait, bits32 flags)
-{
-	PgStat_EntryRef *entry_ref;
-
-	if (!pgstat_tracks_backend_bktype(MyBackendType))
-		return;
-
-	entry_ref = pgstat_get_entry_ref(PGSTAT_KIND_BACKEND, InvalidOid,
-									 MyProcNumber, false, NULL);
-	(void) pgstat_flush_backend_entry(entry_ref, nowait, flags);
+	return pgstat_flush_backend(nowait, PGSTAT_BACKEND_FLUSH_ALL);
 }
 
 /*
@@ -150,9 +197,8 @@ pgstat_create_backend(ProcNumber procnum)
 	PgStat_EntryRef *entry_ref;
 	PgStatShared_Backend *shstatent;
 
-	entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_BACKEND, InvalidOid,
-										  procnum, NULL);
-
+	entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_BACKEND, InvalidOid,
+											MyProcNumber, false);
 	shstatent = (PgStatShared_Backend *) entry_ref->shared_stats;
 
 	/*
@@ -160,20 +206,10 @@ pgstat_create_backend(ProcNumber procnum)
 	 * e.g. if we previously used this proc number.
 	 */
 	memset(&shstatent->stats, 0, sizeof(shstatent->stats));
-}
+	pgstat_unlock_entry(entry_ref);
 
-/*
- * Find or create a local PgStat_BackendPending entry for proc number.
- */
-PgStat_BackendPending *
-pgstat_prep_backend_pending(ProcNumber procnum)
-{
-	PgStat_EntryRef *entry_ref;
-
-	entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_BACKEND, InvalidOid,
-										  procnum, NULL);
-
-	return entry_ref->pending;
+	MemSet(&PendingBackendStats, 0, sizeof(PgStat_BackendPending));
+	have_backendstats = false;
 }
 
 /*
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
index b8f19515ae..6ff5d9e96a 100644
--- a/src/backend/utils/activity/pgstat_io.c
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -73,15 +73,12 @@ pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op,
 	Assert(pgstat_is_ioop_tracked_in_bytes(io_op) || bytes == 0);
 	Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
 
-	if (pgstat_tracks_backend_bktype(MyBackendType))
-	{
-		PendingBackendStats.pending_io.counts[io_object][io_context][io_op] += cnt;
-		PendingBackendStats.pending_io.bytes[io_object][io_context][io_op] += bytes;
-	}
-
 	PendingIOStats.counts[io_object][io_context][io_op] += cnt;
 	PendingIOStats.bytes[io_object][io_context][io_op] += bytes;
 
+	/* Add the per-backend counts */
+	pgstat_count_backend_io_op(io_object, io_context, io_op, cnt, bytes);
+
 	have_iostats = true;
 }
 
@@ -142,9 +139,9 @@ pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
 		INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op],
 					   io_time);
 
-		if (pgstat_tracks_backend_bktype(MyBackendType))
-			INSTR_TIME_ADD(PendingBackendStats.pending_io.pending_times[io_object][io_context][io_op],
-						   io_time);
+		/* Add the per-backend count */
+		pgstat_count_backend_io_op_time(io_object, io_context, io_op,
+										io_time);
 	}
 
 	pgstat_count_io_op(io_object, io_context, io_op, cnt, bytes);
diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c
index 09247ba097..965a7fe2c6 100644
--- a/src/backend/utils/activity/pgstat_relation.c
+++ b/src/backend/utils/activity/pgstat_relation.c
@@ -264,7 +264,7 @@ pgstat_report_vacuum(Oid tableoid, bool shared,
 	 * VACUUM command has processed all tables and committed.
 	 */
 	pgstat_flush_io(false);
-	pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO);
+	(void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO);
 }
 
 /*
@@ -351,7 +351,7 @@ pgstat_report_analyze(Relation rel,
 
 	/* see pgstat_report_vacuum() */
 	pgstat_flush_io(false);
-	pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO);
+	(void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO);
 }
 
 /*
-- 
2.47.1

