From 8fbf5e027dfbcc9fd3ebc52cb82000a855c23064 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Thu, 13 Jun 2024 13:32:22 +0900
Subject: [PATCH 3/6] Introduce pluggable APIs for Cumulative Statistics

This commit adds support in the backend for $subject, allowing
out-of-core extensions to add their own custom statistics kinds.
---
 src/include/pgstat.h                          |   2 +
 src/include/storage/lwlocklist.h              |   1 +
 src/include/utils/pgstat_internal.h           |   1 +
 src/backend/storage/ipc/ipci.c                |   2 +
 src/backend/utils/activity/pgstat.c           | 301 +++++++++++++++++-
 .../utils/activity/wait_event_names.txt       |   1 +
 src/tools/pgindent/typedefs.list              |   3 +
 7 files changed, 308 insertions(+), 3 deletions(-)

diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 2d30fadaf1..b3cdc0da6d 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -463,6 +463,8 @@ typedef struct PgStat_PendingWalStats
 /* functions called from postmaster */
 extern Size StatsShmemSize(void);
 extern void StatsShmemInit(void);
+extern Size StatsKindShmemSize(void);
+extern void StatsKindShmemInit(void);
 
 /* Functions called during server startup / shutdown */
 extern void pgstat_restore_stats(void);
diff --git a/src/include/storage/lwlocklist.h b/src/include/storage/lwlocklist.h
index 85f6568b9e..ed78f93683 100644
--- a/src/include/storage/lwlocklist.h
+++ b/src/include/storage/lwlocklist.h
@@ -83,3 +83,4 @@ PG_LWLOCK(49, WALSummarizer)
 PG_LWLOCK(50, DSMRegistry)
 PG_LWLOCK(51, InjectionPoint)
 PG_LWLOCK(52, SerialControl)
+PG_LWLOCK(53, PgStatKind)
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index dbbca31602..21dfff740d 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -503,6 +503,7 @@ static inline void *pgstat_get_entry_data(PgStat_Kind kind, PgStatShared_Common
  */
 
 extern const PgStat_KindInfo *pgstat_get_kind_info(PgStat_Kind kind);
+extern PgStat_Kind pgstat_add_kind(const PgStat_KindInfo *kind_info);
 
 #ifdef USE_ASSERT_CHECKING
 extern void pgstat_assert_is_up(void);
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 521ed5418c..8b5023d9de 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -149,6 +149,7 @@ CalculateShmemSize(int *num_semaphores)
 	size = add_size(size, SyncScanShmemSize());
 	size = add_size(size, AsyncShmemSize());
 	size = add_size(size, StatsShmemSize());
+	size = add_size(size, StatsKindShmemSize());
 	size = add_size(size, WaitEventExtensionShmemSize());
 	size = add_size(size, InjectionPointShmemSize());
 	size = add_size(size, SlotSyncShmemSize());
@@ -355,6 +356,7 @@ CreateOrAttachShmemStructs(void)
 	SyncScanShmemInit();
 	AsyncShmemInit();
 	StatsShmemInit();
+	StatsKindShmemInit();
 	WaitEventExtensionShmemInit();
 	InjectionPointShmemInit();
 }
diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index f03fee7cd5..b96743ce84 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -58,6 +58,12 @@
  * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
  * pgStatLocal.snapshot.
  *
+ * It is possible for out-of-core modules to define custom statistics kinds,
+ * that can use the same properties as any in-core stats kinds.  Each custom
+ * kind is assigned a unique PgStat_Kind stored in shared memory with the
+ * name of the statistics kind.  Each PgStat_KindInfo is maintained in a
+ * local array cache known to the current process.
+ *
  * To keep things manageable, stats handling is split across several
  * files. Infrastructure pieces are in:
  * - pgstat.c - this file, to tie it all together
@@ -94,13 +100,18 @@
 #include <unistd.h>
 
 #include "access/xact.h"
+#include "common/int.h"
 #include "lib/dshash.h"
 #include "pgstat.h"
 #include "port/atomics.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
+#include "storage/shmem.h"
+#include "storage/spin.h"
+#include "storage/s_lock.h"
 #include "utils/guc_hooks.h"
+#include "utils/hsearch.h"
 #include "utils/memutils.h"
 #include "utils/pgstat_internal.h"
 #include "utils/timestamp.h"
@@ -400,6 +411,121 @@ static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = {
 };
 
 
+/* --------
+ * Hash tables for storing custom pgstats kinds
+ *
+ * PgStatKindHashById is used to find the name from a PgStat_Kind.
+ * Any backend can search it to find custom stats kinds
+ *
+ * PgStatKindHashByName is used to find the PgStat_Kind from a name.
+ * It is used to ensure that no duplicated entries are registered.
+ *
+ * The size of the hash table is based on the assumption that
+ * PGSTAT_KIND_HASH_INIT_SIZE is enough for most cases, and it seems
+ * unlikely that the number of entries will reach
+ * PGSTAT_KIND_HASH_MAX_SIZE.
+ * --------
+ */
+
+static HTAB *PgStatKindHashById;	/* find PgStat_KindInfo from IDs */
+static HTAB *PgStatKindHashByName;	/* find PgStat_Kind from names */
+
+#define PGSTAT_KIND_HASH_INIT_SIZE 16
+#define PGSTAT_KIND_HASH_MAX_SIZE  128
+
+/* hash table entries */
+typedef struct PgStatKindEntryById
+{
+	PgStat_Kind kind;			/* hash key */
+	char		kind_name[NAMEDATALEN]; /* stats kind name */
+} PgStatKindEntryById;
+
+typedef struct PgStatKindEntryByName
+{
+	char		kind_name[NAMEDATALEN]; /* hash key */
+	PgStat_Kind kind;			/* kind ID */
+} PgStatKindEntryByName;
+
+/* dynamic allocation counter for custom pgstats kinds */
+typedef struct PgStatKindCounterData
+{
+	int			nextId;			/* next ID to assign */
+	slock_t		mutex;			/* protects the counter */
+} PgStatKindCounterData;
+
+/* pointer to the shared memory */
+static PgStatKindCounterData *PgStatKindCounter;
+
+/* first ID of custom pgstats kinds, as stored in pgstats tables */
+#define PGSTAT_KIND_INITIAL_ID (PGSTAT_KIND_LAST + 1)
+
+/*
+ * Local array cache pointing to the custom PgStat_KindInfos known to the
+ * current process, indexed by kind ID minus PGSTAT_KIND_LAST.  Any unused
+ * entries in the array will contain NULL.
+ */
+static const PgStat_KindInfo **PgStatKindCache = NULL;
+static uint32 PgStatKindCacheNum = 0;
+
+/* -----------------------------------------------------------
+ * Functions managing the shared memory for custom stats kinds
+ * -----------------------------------------------------------
+ */
+
+/*
+ * Compute shared memory space needed for custom stats kinds
+ */
+Size
+StatsKindShmemSize(void)
+{
+	Size		sz;
+
+	sz = MAXALIGN(sizeof(PgStatKindCounterData));
+	sz = add_size(sz, hash_estimate_size(PGSTAT_KIND_HASH_MAX_SIZE,
+										 sizeof(PgStatKindEntryById)));
+	sz = add_size(sz, hash_estimate_size(PGSTAT_KIND_HASH_MAX_SIZE,
+										 sizeof(PgStatKindEntryByName)));
+	return sz;
+}
+
+/*
+ * Initialize shared memory area for custom stats kinds during startup
+ */
+void
+StatsKindShmemInit(void)
+{
+	bool		found;
+	HASHCTL		info;
+
+	PgStatKindCounter = (PgStatKindCounterData *)
+		ShmemInitStruct("PgStatKindCounterData",
+						sizeof(PgStatKindCounterData), &found);
+	if (!found)
+	{
+		/* initialize the allocation counter and its spinlock. */
+		PgStatKindCounter->nextId = PGSTAT_KIND_INITIAL_ID;
+		SpinLockInit(&PgStatKindCounter->mutex);
+	}
+
+	/* initialize or attach the hash tables to store custom stats kinds */
+	info.keysize = sizeof(PgStat_Kind);
+	info.entrysize = sizeof(PgStatKindEntryById);
+	PgStatKindHashById = ShmemInitHash("PgStatKind hash by id",
+									   PGSTAT_KIND_HASH_INIT_SIZE,
+									   PGSTAT_KIND_HASH_MAX_SIZE,
+									   &info,
+									   HASH_ELEM | HASH_BLOBS);
+
+	/* key is a NULL-terminated string */
+	info.keysize = sizeof(char[NAMEDATALEN]);
+	info.entrysize = sizeof(PgStatKindEntryByName);
+	PgStatKindHashByName = ShmemInitHash("PgStatKind hash by name",
+										 PGSTAT_KIND_HASH_INIT_SIZE,
+										 PGSTAT_KIND_HASH_MAX_SIZE,
+										 &info,
+										 HASH_ELEM | HASH_STRINGS);
+}
+
 /* ------------------------------------------------------------
  * Functions managing the state of the stats system for all backends.
  * ------------------------------------------------------------
@@ -1254,6 +1380,18 @@ pgstat_get_kind_from_str(char *kind_str)
 			return kind;
 	}
 
+	/* Check the local cache if any */
+	if (PgStatKindCacheNum > 0)
+	{
+		for (int kind = 0; kind <= PgStatKindCacheNum; kind++)
+		{
+			if (PgStatKindCache[kind] == NULL)
+				continue;
+			if (pg_strcasecmp(kind_str, PgStatKindCache[kind]->name) == 0)
+				return kind + PGSTAT_KIND_INITIAL_ID;
+		}
+	}
+
 	ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 			 errmsg("invalid statistics kind: \"%s\"", kind_str)));
@@ -1263,7 +1401,8 @@ pgstat_get_kind_from_str(char *kind_str)
 static inline bool
 pgstat_is_kind_valid(PgStat_Kind kind)
 {
-	return kind >= PGSTAT_KIND_FIRST_VALID && kind <= PGSTAT_KIND_LAST;
+	return kind >= PGSTAT_KIND_FIRST_VALID &&
+		kind <= (PGSTAT_KIND_LAST + PgStatKindCacheNum);
 }
 
 const PgStat_KindInfo *
@@ -1271,7 +1410,152 @@ pgstat_get_kind_info(PgStat_Kind kind)
 {
 	Assert(pgstat_is_kind_valid(kind));
 
-	return &pgstat_kind_infos[kind];
+	if (kind >= PGSTAT_KIND_FIRST_VALID && kind <= PGSTAT_KIND_LAST)
+		return &pgstat_kind_infos[kind];
+
+	/* Look in local cache */
+	if (kind >= PGSTAT_KIND_INITIAL_ID &&
+		kind <= PGSTAT_KIND_INITIAL_ID + PgStatKindCacheNum)
+		return PgStatKindCache[kind - PGSTAT_KIND_INITIAL_ID];
+
+	Assert(false);
+	return NULL;				/* keep compiler quiet */
+}
+
+/*
+ * Save a PgStat_KindInfo into the local cache, if not already done.
+ */
+static void
+pgstat_save_kind_info(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
+{
+	/* This should only be called for user-defined stats kinds */
+	if (kind <= PGSTAT_KIND_LAST)
+		return;
+
+	/* Convert to array index */
+	kind -= PGSTAT_KIND_INITIAL_ID;
+
+	/* If necessary, create or enlarge local cache array. */
+	if (kind >= PgStatKindCacheNum)
+	{
+		uint32		newalloc;
+
+		/*
+		 * Do a simple increment, to keep an exact count of the custom stats
+		 * kinds stored rather than an upper-bound.  This is more costly each
+		 * time a new PgStat_KindInfo is added, but saves in correctness. This
+		 * overflow should not happen as this is capped by
+		 * PGSTAT_KIND_HASH_MAX_SIZE, but let's be safe.
+		 */
+		if (pg_add_u32_overflow(kind, 1, &newalloc))
+			elog(ERROR, "could not allocate memory for custom pgstats");
+
+		if (PgStatKindCache == NULL)
+			PgStatKindCache = (const PgStat_KindInfo **)
+				MemoryContextAllocZero(TopMemoryContext,
+									   newalloc * sizeof(PgStat_KindInfo *));
+		else
+			PgStatKindCache = repalloc0_array(PgStatKindCache,
+											  const PgStat_KindInfo *,
+											  PgStatKindCacheNum,
+											  newalloc);
+		PgStatKindCacheNum = newalloc;
+	}
+
+	PgStatKindCache[kind] = kind_info;
+}
+
+/*
+ * Allocate a new stats kind and return its PgStat_Kind.
+ */
+PgStat_Kind
+pgstat_add_kind(const PgStat_KindInfo *kind_info)
+{
+	PgStat_Kind kind;
+	bool		found;
+	PgStatKindEntryByName *entry_by_name;
+	PgStatKindEntryById *entry_by_id;
+
+	if (strlen(kind_info->name) >= NAMEDATALEN)
+		elog(ERROR,
+			 "cannot use custom stats kind longer than %u characters",
+			 NAMEDATALEN - 1);
+
+	/*
+	 * These are not supported for now, as these point out to fixed areas of
+	 * shared memory.
+	 */
+	if (kind_info->fixed_amount)
+		elog(ERROR,
+			 "cannot define custom stats kind with fixed amount of data");
+
+	/*
+	 * Check if kind ID associated to the name is already defined, and return
+	 * it if so.
+	 */
+	LWLockAcquire(PgStatKindLock, LW_SHARED);
+	entry_by_name = (PgStatKindEntryByName *)
+		hash_search(PgStatKindHashByName, kind_info->name,
+					HASH_FIND, &found);
+	LWLockRelease(PgStatKindLock);
+
+	if (found)
+	{
+		pgstat_save_kind_info(entry_by_name->kind, kind_info);
+		return entry_by_name->kind;
+	}
+
+	/*
+	 * Allocate and register a new stats kind.  Recheck if this kind name
+	 * exists, as it could be possible that a concurrent process has inserted
+	 * one with the same name since the LWLock acquired again here was
+	 * previously released.
+	 */
+	LWLockAcquire(PgStatKindLock, LW_EXCLUSIVE);
+	entry_by_name = (PgStatKindEntryByName *)
+		hash_search(PgStatKindHashByName, kind_info->name,
+					HASH_FIND, &found);
+	if (found)
+	{
+		LWLockRelease(PgStatKindLock);
+		pgstat_save_kind_info(entry_by_name->kind, kind_info);
+		return entry_by_name->kind;
+	}
+
+	/* Allocate a new kind ID */
+	SpinLockAcquire(&PgStatKindCounter->mutex);
+
+	if (PgStatKindCounter->nextId >= PGSTAT_KIND_HASH_MAX_SIZE - PGSTAT_KIND_INITIAL_ID)
+	{
+		SpinLockRelease(&PgStatKindCounter->mutex);
+		ereport(ERROR,
+				errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				errmsg("too many stats kinds"));
+	}
+
+	kind = PgStatKindCounter->nextId;
+	PgStatKindCounter->nextId++;
+
+	SpinLockRelease(&PgStatKindCounter->mutex);
+
+	/* Register the new stats kind */
+	entry_by_id = (PgStatKindEntryById *)
+		hash_search(PgStatKindHashById, &kind,
+					HASH_ENTER, &found);
+	Assert(!found);
+	strlcpy(entry_by_id->kind_name, kind_info->name,
+			sizeof(entry_by_id->kind_name));
+
+	entry_by_name = (PgStatKindEntryByName *)
+		hash_search(PgStatKindHashByName, kind_info->name,
+					HASH_ENTER, &found);
+	Assert(!found);
+	entry_by_name->kind = kind;
+
+	LWLockRelease(PgStatKindLock);
+
+	pgstat_save_kind_info(kind, kind_info);
+	return kind;
 }
 
 /*
@@ -1405,6 +1689,17 @@ pgstat_write_statsfile(void)
 		if (ps->dropped)
 			continue;
 
+		/*
+		 * This discards data related to custom stats kinds that are unknown
+		 * to this process.
+		 */
+		if (!pgstat_is_kind_valid(ps->key.kind))
+		{
+			elog(WARNING, "found unknown stats entry %u/%u/%u",
+				 ps->key.kind, ps->key.dboid, ps->key.objoid);
+			continue;
+		}
+
 		shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
 
 		kind_info = pgstat_get_kind_info(ps->key.kind);
@@ -1639,7 +1934,7 @@ pgstat_read_statsfile(void)
 					if (found)
 					{
 						dshash_release_lock(pgStatLocal.shared_hash, p);
-						elog(WARNING, "found duplicate stats entry %d/%u/%u",
+						elog(WARNING, "found duplicate stats entry %u/%u/%u",
 							 key.kind, key.dboid, key.objoid);
 						goto error;
 					}
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 87cbca2811..32d6d8fd74 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -345,6 +345,7 @@ WALSummarizer	"Waiting to read or update WAL summarization state."
 DSMRegistry	"Waiting to read or update the dynamic shared memory registry."
 InjectionPoint	"Waiting to read or update information related to injection points."
 SerialControl	"Waiting to read or update shared <filename>pg_serial</filename> state."
+PgStatKind	"Waiting to read or update custom pgstats kind information."
 
 #
 # END OF PREDEFINED LWLOCKS (DO NOT CHANGE THIS LINE)
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 4f57078d13..8718ca54e0 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2109,6 +2109,9 @@ PgFdwRelationInfo
 PgFdwSamplingMethod
 PgFdwScanState
 PgIfAddrCallback
+PgStatKindCounterData
+PgStatKindEntryById
+PgStatKindEntryByName
 PgStatShared_Archiver
 PgStatShared_BgWriter
 PgStatShared_Checkpointer
-- 
2.43.0

