From 380903150cdeb75a7703d6918df7c77f3211c3df Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 24 Nov 2021 11:16:55 -0500
Subject: [PATCH 4/8] Send IO operations to stats collector

On exit, backends send the IO operations they have done on all IO Paths
to the stats collector. The stats collector adds these counts to its
existing counts stored in a global data structure it maintains and
persists.

PgStatIOOpCounters contains the same information as backend_status.h's
IOOpCounters, however IOOpCounters' members must be atomics and the
stats collector has no such requirement.

Suggested by Andres Freund

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Justin Pryzby <pryzby@telsasoft.com>
Discussion: https://www.postgresql.org/message-id/flat/20200124195226.lth52iydq2n2uilq%40alap3.anarazel.de
---
 src/backend/postmaster/pgstat.c    | 98 +++++++++++++++++++++++++++++-
 src/include/miscadmin.h            |  2 +
 src/include/pgstat.h               | 60 ++++++++++++++++++
 src/include/utils/backend_status.h | 37 +++++++++++
 4 files changed, 196 insertions(+), 1 deletion(-)

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index ef1cba61a6f..f4c0fd3e8dc 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -146,6 +146,7 @@ static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
 static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len);
 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
 static void pgstat_recv_checkpointer(PgStat_MsgCheckpointer *msg, int len);
+static void pgstat_recv_io_path_ops(PgStat_MsgIOPathOps *msg, int len);
 static void pgstat_recv_wal(PgStat_MsgWal *msg, int len);
 static void pgstat_recv_slru(PgStat_MsgSLRU *msg, int len);
 static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
@@ -178,7 +179,6 @@ char	   *pgstat_stat_directory = NULL;
 char	   *pgstat_stat_filename = NULL;
 char	   *pgstat_stat_tmpname = NULL;
 
-
 /* ----------
  * state shared with pgstat_*.c
  * ----------
@@ -704,6 +704,14 @@ pgstat_shutdown_hook(int code, Datum arg)
 {
 	Assert(!pgstat_is_shutdown);
 
+	/*
+	 * Only need to send stats on IOOps for IOPaths when a process exits. Users
+	 * requiring IOOps for both live and exited backends can read from live
+	 * backends' PgBackendStatuses and sum this with totals from exited
+	 * backends persisted by the stats collector.
+	 */
+	pgstat_send_buffers();
+
 	/*
 	 * If we got as far as discovering our own database ID, we can report what
 	 * we did to the collector.  Otherwise, we'd be sending an invalid
@@ -1559,6 +1567,45 @@ pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype)
 	hdr->m_type = mtype;
 }
 
+/*
+ * Before exiting, a backend sends its IO operations statistics to the
+ * collector so that they may be persisted.
+ */
+void
+pgstat_send_buffers(void)
+{
+	PgStatIOOpCounters *io_path_ops;
+	PgStat_MsgIOPathOps msg;
+
+	PgBackendStatus *beentry = MyBEEntry;
+	PgStat_Counter sum = 0;
+
+	if (!beentry || beentry->st_backendType == B_INVALID)
+		return;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.backend_type = beentry->st_backendType;
+
+	io_path_ops = msg.iop.io_path_ops;
+	pgstat_sum_io_path_ops(io_path_ops, (IOOpCounters *)
+			&beentry->io_path_stats);
+
+	/* If no IO was done, don't bother sending anything to the stats collector. */
+	for (int i = 0; i < IOPATH_NUM_TYPES; i++)
+	{
+		sum += io_path_ops[i].allocs;
+		sum += io_path_ops[i].extends;
+		sum += io_path_ops[i].fsyncs;
+		sum += io_path_ops[i].writes;
+	}
+
+	if (sum == 0)
+		return;
+
+	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_IO_PATH_OPS);
+	pgstat_send(&msg, sizeof(msg));
+}
+
 
 /*
  * Send out one statistics message to the collector
@@ -1588,6 +1635,29 @@ pgstat_send(void *msg, int len)
 #endif
 }
 
+/*
+ * Helper function to sum all IO operations stats for all IOPaths (e.g. shared,
+ * local) from live backends with those in the equivalent stats structure for
+ * exited backends.
+ * Note that this adds and doesn't set, so the destination stats structure
+ * should be zeroed out by the caller initially.
+ * This would commonly be used to transfer all IOOp stats for all IOPaths for a
+ * particular backend type to the pgstats structure.
+ */
+void
+pgstat_sum_io_path_ops(PgStatIOOpCounters *dest, IOOpCounters *src)
+{
+	for (int i = 0; i < IOPATH_NUM_TYPES; i++)
+	{
+		dest->allocs += pg_atomic_read_u64(&src->allocs);
+		dest->extends += pg_atomic_read_u64(&src->extends);
+		dest->fsyncs += pg_atomic_read_u64(&src->fsyncs);
+		dest->writes += pg_atomic_read_u64(&src->writes);
+		dest++;
+		src++;
+	}
+}
+
 /*
  * Start up the statistics collector process.  This is the body of the
  * postmaster child process.
@@ -1798,6 +1868,10 @@ PgstatCollectorMain(int argc, char *argv[])
 					pgstat_recv_checkpointer(&msg.msg_checkpointer, len);
 					break;
 
+				case PGSTAT_MTYPE_IO_PATH_OPS:
+					pgstat_recv_io_path_ops(&msg.msg_io_path_ops, len);
+					break;
+
 				case PGSTAT_MTYPE_WAL:
 					pgstat_recv_wal(&msg.msg_wal, len);
 					break;
@@ -3961,6 +4035,28 @@ pgstat_recv_checkpointer(PgStat_MsgCheckpointer *msg, int len)
 	globalStats.checkpointer.buf_fsync_backend += msg->m_buf_fsync_backend;
 }
 
+static void
+pgstat_recv_io_path_ops(PgStat_MsgIOPathOps *msg, int len)
+{
+	PgStatIOOpCounters *src_io_path_ops;
+	PgStatIOOpCounters *dest_io_path_ops;
+
+	src_io_path_ops = msg->iop.io_path_ops;
+	dest_io_path_ops =
+		globalStats.buffers.ops[backend_type_get_idx(msg->backend_type)].io_path_ops;
+
+	for (int i = 0; i < IOPATH_NUM_TYPES; i++)
+	{
+		PgStatIOOpCounters *src = &src_io_path_ops[i];
+		PgStatIOOpCounters *dest = &dest_io_path_ops[i];
+
+		dest->allocs += src->allocs;
+		dest->extends += src->extends;
+		dest->fsyncs += src->fsyncs;
+		dest->writes += src->writes;
+	}
+}
+
 /*
  * Process a WAL message.
  */
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 94c6135e930..77c89134c21 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -338,6 +338,8 @@ typedef enum BackendType
 	B_WAL_WRITER,
 } BackendType;
 
+#define BACKEND_NUM_TYPES B_WAL_WRITER
+
 extern BackendType MyBackendType;
 
 extern const char *GetBackendTypeDesc(BackendType backendType);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 3584078f6ea..cdb2ce60c46 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -239,6 +239,7 @@ typedef enum StatMsgType
 	PGSTAT_MTYPE_ARCHIVER,
 	PGSTAT_MTYPE_BGWRITER,
 	PGSTAT_MTYPE_CHECKPOINTER,
+	PGSTAT_MTYPE_IO_PATH_OPS,
 	PGSTAT_MTYPE_WAL,
 	PGSTAT_MTYPE_SLRU,
 	PGSTAT_MTYPE_FUNCSTAT,
@@ -372,6 +373,49 @@ typedef struct PgStat_MsgDropdb
 	Oid			m_databaseid;
 } PgStat_MsgDropdb;
 
+
+/*
+ * Structure for counting all types of IOOps in the stats collector
+ */
+typedef struct PgStatIOOpCounters
+{
+	PgStat_Counter allocs;
+	PgStat_Counter extends;
+	PgStat_Counter fsyncs;
+	PgStat_Counter writes;
+} PgStatIOOpCounters;
+
+/*
+ * Structure for counting all IOOps on all types of IOPaths.
+ */
+typedef struct PgStatIOPathOps
+{
+	PgStatIOOpCounters io_path_ops[IOPATH_NUM_TYPES];
+} PgStatIOPathOps;
+
+/*
+ * Sent by a backend to the stats collector to report all IOOps for all IOPaths
+ * for a given type of a backend. This will happen when the backend exits.
+ */
+typedef struct PgStat_MsgIOPathOps
+{
+	PgStat_MsgHdr m_hdr;
+
+	BackendType backend_type;
+	PgStatIOPathOps iop;
+} PgStat_MsgIOPathOps;
+
+/*
+ * Structure used by stats collector to keep track of all types of exited
+ * backends' IOOps for all IOPaths as well as all stats from live backends at
+ * the time of stats reset. resets is populated using a reset message sent to
+ * the stats collector.
+ */
+typedef struct PgStat_BackendIOPathOps
+{
+	PgStatIOPathOps ops[BACKEND_NUM_TYPES];
+} PgStat_BackendIOPathOps;
+
 /* ----------
  * PgStat_MsgResetcounter		Sent by the backend to tell the collector
  *								to reset counters
@@ -750,6 +794,7 @@ typedef union PgStat_Msg
 	PgStat_MsgArchiver msg_archiver;
 	PgStat_MsgBgWriter msg_bgwriter;
 	PgStat_MsgCheckpointer msg_checkpointer;
+	PgStat_MsgIOPathOps msg_io_path_ops;
 	PgStat_MsgWal msg_wal;
 	PgStat_MsgSLRU msg_slru;
 	PgStat_MsgFuncstat msg_funcstat;
@@ -869,6 +914,7 @@ typedef struct PgStat_GlobalStats
 
 	PgStat_CheckpointerStats checkpointer;
 	PgStat_BgWriterStats bgwriter;
+	PgStat_BackendIOPathOps buffers;
 } PgStat_GlobalStats;
 
 typedef struct PgStat_StatReplSlotEntry
@@ -1120,6 +1166,20 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
 									  void *recdata, uint32 len);
 
 extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
+extern void pgstat_send_archiver(const char *xlog, bool failed);
+extern void pgstat_send_bgwriter(void);
+
+/*
+ * While some processes send some types of statistics to the collector at
+ * regular intervals (e.g. CheckpointerMain() calling
+ * pgstat_send_checkpointer()), IO operations stats are only sent by
+ * pgstat_send_buffers() when a process exits (in pgstat_shutdown_hook()). IO
+ * operations stats from live backends can be read from their PgBackendStatuses
+ * and, if desired, summed with totals from exited backends persisted by the
+ * stats collector.
+ */
+extern void pgstat_send_buffers(void);
+extern void pgstat_sum_io_path_ops(PgStatIOOpCounters *dest, IOOpCounters *src);
 
 
 /*
diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h
index 950b7396a59..3de1e7c8d37 100644
--- a/src/include/utils/backend_status.h
+++ b/src/include/utils/backend_status.h
@@ -331,6 +331,43 @@ extern void CreateSharedBackendStatus(void);
  * ----------
  */
 
+/* Utility functions */
+
+/*
+ * When maintaining an array of information about all valid BackendTypes, in
+ * order to avoid wasting the 0th spot, use this helper to convert a valid
+ * BackendType to a valid location in the array (given that no spot is
+ * maintained for B_INVALID BackendType).
+ */
+static inline int backend_type_get_idx(BackendType backend_type)
+{
+	/*
+	 * backend_type must be one of the valid backend types. If caller is
+	 * maintaining backend information in an array that includes B_INVALID,
+	 * this function is unnecessary.
+	 */
+	Assert(backend_type > B_INVALID && backend_type <= BACKEND_NUM_TYPES);
+	return backend_type - 1;
+}
+
+/*
+ * When using a value from an array of information about all valid
+ * BackendTypes, add 1 to the index before using it as a BackendType to adjust
+ * for not maintaining a spot for B_INVALID BackendType.
+ */
+static inline BackendType idx_get_backend_type(int idx)
+{
+	int backend_type = idx + 1;
+	/*
+	 * If the array includes a spot for B_INVALID BackendType this function is
+	 * not required.
+	 */
+	Assert(backend_type > B_INVALID && backend_type <= BACKEND_NUM_TYPES);
+	return backend_type;
+}
+
+extern const char *GetIOPathDesc(IOPath io_path);
+
 /* Initialization functions */
 extern void pgstat_beinit(void);
 extern void pgstat_bestart(void);
-- 
2.17.1

