From 59a118402e5e59685fb9e0fb086872e25a405736 Mon Sep 17 00:00:00 2001
From: Masahiro Ikeda <masahiro.ikeda.us@hco.ntt.co.jp>
Date: Thu, 15 Jun 2023 12:57:29 +0900
Subject: [PATCH 2/3] Support to define custom wait events for extensions.

Currently, only one PG_WAIT_EXTENSION event can be used as a
wait event for extensions. Therefore, in environments with multiple
extensions are installed, it could take time to identify bottlenecks.

This commit allows defining custom wait events for extensions and
introduces a new API called RequestNamedExtensionWaitEventTranche()/
GetNamedExtensionWaitEventTranche().

These refer to RequestNamedLWLockTranche()/GetNamedLWLockTranche(),
but do not require as much flexibility as LWLock and can be implemented
more simply.

The extension calls RequestNamedExtensionWaitEventTranche() in
shmem_request_hook() to request wait events to be used by each extension.
In the core, the requested wait events are dynamically registered in shared
memory. The extension then obtains the wait event information with
GetNamedExtensionWaitEventTranche() and uses the value to notify the core
that it is waiting.
---
 src/backend/postmaster/postmaster.c     |   6 +
 src/backend/storage/ipc/ipci.c          |   3 +
 src/backend/storage/ipc/shmem.c         |   3 +-
 src/backend/utils/activity/wait_event.c | 303 +++++++++++++++++++++++-
 src/include/utils/wait_event.h          |  32 +++
 5 files changed, 345 insertions(+), 2 deletions(-)

diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 4c49393fc5..50afa3aa14 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -516,6 +516,8 @@ typedef struct
 	int			NamedLWLockTrancheRequests;
 	NamedLWLockTranche *NamedLWLockTrancheArray;
 	LWLockPadded *MainLWLockArray;
+	int			NamedExtensionWaitEventTrancheRequests;
+	NamedExtensionWaitEventTranche *NamedExtensionWaitEventTrancheArray;
 	slock_t    *ProcStructLock;
 	PROC_HDR   *ProcGlobal;
 	PGPROC	   *AuxiliaryProcs;
@@ -6087,6 +6089,8 @@ save_backend_variables(BackendParameters *param, Port *port,
 	param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests;
 	param->NamedLWLockTrancheArray = NamedLWLockTrancheArray;
 	param->MainLWLockArray = MainLWLockArray;
+	param->NamedExtensionWaitEventTrancheRequests = NamedExtensionWaitEventTrancheRequests;
+	param->NamedExtensionWaitEventTrancheArray = NamedExtensionWaitEventTrancheArray;
 	param->ProcStructLock = ProcStructLock;
 	param->ProcGlobal = ProcGlobal;
 	param->AuxiliaryProcs = AuxiliaryProcs;
@@ -6320,6 +6324,8 @@ restore_backend_variables(BackendParameters *param, Port *port)
 	NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests;
 	NamedLWLockTrancheArray = param->NamedLWLockTrancheArray;
 	MainLWLockArray = param->MainLWLockArray;
+	NamedExtensionWaitEventTrancheRequests = param->NamedExtensionWaitEventTrancheRequests;
+	NamedExtensionWaitEventTrancheArray = param->NamedExtensionWaitEventTrancheArray;
 	ProcStructLock = param->ProcStructLock;
 	ProcGlobal = param->ProcGlobal;
 	AuxiliaryProcs = param->AuxiliaryProcs;
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 8f1ded7338..ed05121fa3 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -49,6 +49,7 @@
 #include "storage/spin.h"
 #include "utils/guc.h"
 #include "utils/snapmgr.h"
+#include "utils/wait_event.h"
 
 /* GUCs */
 int			shared_memory_type = DEFAULT_SHARED_MEMORY_TYPE;
@@ -142,6 +143,7 @@ CalculateShmemSize(int *num_semaphores)
 	size = add_size(size, SyncScanShmemSize());
 	size = add_size(size, AsyncShmemSize());
 	size = add_size(size, StatsShmemSize());
+	size = add_size(size, WaitEventShmemSize());
 #ifdef EXEC_BACKEND
 	size = add_size(size, ShmemBackendArraySize());
 #endif
@@ -294,6 +296,7 @@ CreateSharedMemoryAndSemaphores(void)
 	SyncScanShmemInit();
 	AsyncShmemInit();
 	StatsShmemInit();
+	WaitEventShmemInit();
 
 #ifdef EXEC_BACKEND
 
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index 5465fa1964..02c72ebbb1 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -85,7 +85,8 @@ static void *ShmemBase;			/* start address of shared memory */
 
 static void *ShmemEnd;			/* end+1 address of shared memory */
 
-slock_t    *ShmemLock;			/* spinlock for shared memory and LWLock
+slock_t    *ShmemLock;			/* spinlock for shared memory, LWLock
+								 * allocation, and named extension wait event
 								 * allocation */
 
 static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c
index 7940d64639..4bf725fbae 100644
--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -22,11 +22,18 @@
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
+#include "port/pg_bitutils.h"
 #include "storage/lmgr.h"		/* for GetLockNameFromTagType */
 #include "storage/lwlock.h"		/* for GetLWLockIdentifier */
+#include "storage/spin.h"
+#include "utils/memutils.h"
 #include "utils/wait_event.h"
 
 
+/* We use the ShmemLock spinlock to protect ExtensionWaitEventCounter */
+extern slock_t *ShmemLock;
+
 static const char *pgstat_get_wait_activity(WaitEventActivity w);
 static const char *pgstat_get_wait_client(WaitEventClient w);
 static const char *pgstat_get_wait_ipc(WaitEventIPC w);
@@ -38,6 +45,46 @@ static uint32 local_my_wait_event_info;
 uint32	   *my_wait_event_info = &local_my_wait_event_info;
 
 
+/* struct representing the request for named tranche of extension wait event */
+typedef struct NamedExtensionWaitEventTrancheRequest
+{
+	char		tranche_name[NAMEDATALEN];
+}			NamedExtensionWaitEventTrancheRequest;
+
+static NamedExtensionWaitEventTrancheRequest * NamedExtensionWaitEventTrancheRequestArray = NULL;
+static int	NamedExtensionWaitEventTrancheRequestsAllocated = 0;
+
+/*
+ * NamedExtensionWaitEventTrancheRequests is both the valid length of the request array,
+ * and the length of the shared-memory NamedExtensionWaitEventTrancheArray later on.
+ * This variable and NamedExtensionWaitEventTrancheArray are non-static so that
+ * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
+ */
+int			NamedExtensionWaitEventTrancheRequests = 0;
+
+/* points to data in shared memory */
+NamedExtensionWaitEventTranche *NamedExtensionWaitEventTrancheArray = NULL;
+
+static void InitializeExtensionWaitEventTranches(void);
+static void ExtensionWaitEventRegisterTranche(int tranche_id, const char *tranche_name);
+static int	ExtensionWaitEventNewTrancheId(void);
+
+/* first tranche ID for named tranche */
+#define NUM_BUILDIN_WAIT_EVENT_EXTENSION	\
+	(WAIT_EVENT_EXTENSION_FIRST_USER_DEFINED - WAIT_EVENT_EXTENSION)
+
+/*
+ * This is indexed by tranche ID minus NUM_BUILDIN_WAIT_EVENT_EXTENSION, and
+ * stores the names of all dynamically-created tranches known to the current
+ * process.  Any unused entries in the array will contain NULL.
+ */
+static const char **ExtensionWaitEventTrancheNames = NULL;
+static int	ExtensionWaitEventTrancheNamesAllocated = 0;
+
+static const char *GetExtensionIdentifier(uint32 classId, uint16 eventId);
+static const char *GetExtensionTrancheName(uint16 trancheId);
+
+
 /*
  * Configure wait event reporting to report wait events to *wait_event_info.
  * *wait_event_info needs to be valid until pgstat_reset_wait_event_storage()
@@ -165,7 +212,7 @@ pgstat_get_wait_event(uint32 wait_event_info)
 				break;
 			}
 		case PG_WAIT_EXTENSION:
-			event_name = "Extension";
+			event_name = GetExtensionIdentifier(classId, eventId);
 			break;
 		case PG_WAIT_IPC:
 			{
@@ -762,3 +809,257 @@ pgstat_get_wait_io(WaitEventIO w)
 
 	return event_name;
 }
+
+/*
+ * RequestNamedExtensionWaitEventTranche
+ *		Request that extra wait events for extensions be allocated
+ *      during postmaster startup.
+ *
+ * This may only be called via the shmem_request_hook of a library that is
+ * loaded into the postmaster via shared_preload_libraries.  Calls from
+ * elsewhere will fail.
+ *
+ * The tranche name will be user-visible as a wait event name, so try to
+ * use a name that fits the style for those.
+ */
+void
+RequestNamedExtensionWaitEventTranche(const char *tranche_name)
+{
+	NamedExtensionWaitEventTrancheRequest *request;
+
+	if (!process_shmem_requests_in_progress)
+		elog(FATAL, "cannot request additional wait events outside shmem_request_hook");
+
+	if (NamedExtensionWaitEventTrancheRequestArray == NULL)
+	{
+		NamedExtensionWaitEventTrancheRequestsAllocated = 16;
+		NamedExtensionWaitEventTrancheRequestArray = (NamedExtensionWaitEventTrancheRequest *)
+			MemoryContextAlloc(TopMemoryContext,
+							   NamedExtensionWaitEventTrancheRequestsAllocated
+							   * sizeof(NamedExtensionWaitEventTrancheRequest));
+	}
+
+	if (NamedExtensionWaitEventTrancheRequests >= NamedExtensionWaitEventTrancheRequestsAllocated)
+	{
+		int			i = pg_nextpower2_32(NamedExtensionWaitEventTrancheRequests + 1);
+
+		NamedExtensionWaitEventTrancheRequestArray = (NamedExtensionWaitEventTrancheRequest *)
+			repalloc(NamedExtensionWaitEventTrancheRequestArray,
+					 i * sizeof(NamedExtensionWaitEventTrancheRequest));
+		NamedExtensionWaitEventTrancheRequestsAllocated = i;
+	}
+
+	request = &NamedExtensionWaitEventTrancheRequestArray[NamedExtensionWaitEventTrancheRequests];
+	Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
+	strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
+	NamedExtensionWaitEventTrancheRequests++;
+}
+
+/*
+ * Compute shmem space needed for named wait event tranches.
+ */
+Size
+WaitEventShmemSize(void)
+{
+	Size		size;
+	int			i;
+
+	/* space for named tranches. */
+	size = mul_size(NamedExtensionWaitEventTrancheRequests, sizeof(NamedExtensionWaitEventTranche));
+
+	/* space for name of each tranche. */
+	for (i = 0; i < NamedExtensionWaitEventTrancheRequests; i++)
+		size = add_size(size, strlen(NamedExtensionWaitEventTrancheRequestArray[i].tranche_name) + 1);
+
+	return size;
+}
+
+/*
+ * Allocate shmem space for named wait event tranches and initialize it.
+ * We also register named wait event tranches here.
+ */
+void
+WaitEventShmemInit(void)
+{
+	if (!IsUnderPostmaster)
+	{
+		Size		spaceWaitEvent = WaitEventShmemSize();
+		int		   *ExtensionWaitEventCounter;
+		char	   *ptr;
+
+		/* Allocate space */
+		ptr = (char *) ShmemAlloc(spaceWaitEvent);
+
+		/* Leave room for dynamic allocation of tranches */
+		ptr += sizeof(int);
+
+		NamedExtensionWaitEventTrancheArray = (NamedExtensionWaitEventTranche *) ptr;
+
+		/*
+		 * Initialize the dynamic-allocation counter for tranches, which is
+		 * stored just before the first wait event.
+		 */
+		ExtensionWaitEventCounter = (int *) ((char *) NamedExtensionWaitEventTrancheArray - sizeof(int));
+		*ExtensionWaitEventCounter = NUM_BUILDIN_WAIT_EVENT_EXTENSION;
+
+		/* Initialize requested named wait event tranches */
+		InitializeExtensionWaitEventTranches();
+	}
+
+	/* Register named wait event tranches in the current process. */
+	for (int i = 0; i < NamedExtensionWaitEventTrancheRequests; i++)
+		ExtensionWaitEventRegisterTranche(NamedExtensionWaitEventTrancheArray[i].trancheId,
+										  NamedExtensionWaitEventTrancheArray[i].trancheName);
+}
+
+/*
+ * Initialize requested named wait event tranches.
+ */
+static void
+InitializeExtensionWaitEventTranches(void)
+{
+	/*
+	 * Copy the info about any named tranches into shared memory (so that
+	 * other processes can see it), and initialize the requested wait events.
+	 */
+	if (NamedExtensionWaitEventTrancheRequests > 0)
+	{
+		char	   *trancheNames;
+		int			i;
+
+		trancheNames = (char *) NamedExtensionWaitEventTrancheArray +
+			(NamedExtensionWaitEventTrancheRequests * sizeof(NamedExtensionWaitEventTranche));
+
+		for (i = 0; i < NamedExtensionWaitEventTrancheRequests; i++)
+		{
+			NamedExtensionWaitEventTrancheRequest *request;
+			NamedExtensionWaitEventTranche *tranche;
+			char	   *name;
+
+			request = &NamedExtensionWaitEventTrancheRequestArray[i];
+			tranche = &NamedExtensionWaitEventTrancheArray[i];
+
+			name = trancheNames;
+			trancheNames += strlen(request->tranche_name) + 1;
+			strcpy(name, request->tranche_name);
+			tranche->trancheId = ExtensionWaitEventNewTrancheId();
+			tranche->trancheName = name;
+		}
+	}
+}
+
+/*
+ * Allocate a new tranche ID.
+ */
+int
+ExtensionWaitEventNewTrancheId(void)
+{
+	int			result;
+	int		   *ExtensionWaitEventCounter;
+
+	ExtensionWaitEventCounter = (int *) ((char *) NamedExtensionWaitEventTrancheArray - sizeof(int));
+	SpinLockAcquire(ShmemLock);
+	result = (*ExtensionWaitEventCounter)++;
+	SpinLockRelease(ShmemLock);
+
+	return result;
+}
+
+void
+ExtensionWaitEventRegisterTranche(int tranche_id, const char *tranche_name)
+{
+	/* This should only be called for user-defined tranches. */
+	if (tranche_id < NUM_BUILDIN_WAIT_EVENT_EXTENSION)
+		return;
+
+	/* Convert to array index. */
+	tranche_id -= NUM_BUILDIN_WAIT_EVENT_EXTENSION;
+
+	/* If necessary, create or enlarge array. */
+	if (tranche_id >= ExtensionWaitEventTrancheNamesAllocated)
+	{
+		int			newalloc;
+
+		newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
+
+		if (ExtensionWaitEventTrancheNames == NULL)
+			ExtensionWaitEventTrancheNames = (const char **)
+				MemoryContextAllocZero(TopMemoryContext,
+									   newalloc * sizeof(char *));
+		else
+			ExtensionWaitEventTrancheNames =
+				repalloc0_array(ExtensionWaitEventTrancheNames, const char *, ExtensionWaitEventTrancheNamesAllocated, newalloc);
+		ExtensionWaitEventTrancheNamesAllocated = newalloc;
+	}
+
+	ExtensionWaitEventTrancheNames[tranche_id] = tranche_name;
+}
+
+/*
+ * GetNamedExtensionWaitEventTranche - returns the Extension wait event information
+ *      from the specified tranche.
+ *
+ * Caller needs to retrieve the requested number of Extensions starting from
+ * the base extension address returned by this API.  This can be used for
+ * tranches that are requested by using RequestNamedExtensionWaitEventTranche() API.
+ */
+uint32
+GetNamedExtensionWaitEventTranche(const char *tranche_name)
+{
+	uint32		wait_event_info;
+	int			pos;
+	int			i;
+
+	/* The wait event type is always PG_WAIT_EXTENSION. */
+	wait_event_info = PG_WAIT_EXTENSION;
+
+	/*
+	 * Obtain the name of Extension wait event belonging to requested
+	 * tranche_name in NamedExtensionWaitEventTrancheArray.
+	 */
+	pos = 0;
+	for (i = 0; i < NamedExtensionWaitEventTrancheRequests; i++)
+	{
+		if (strcmp(NamedExtensionWaitEventTrancheRequestArray[i].tranche_name,
+				   tranche_name) == 0)
+		{
+			wait_event_info |= NamedExtensionWaitEventTrancheArray[pos].trancheId;
+			return wait_event_info;
+		}
+
+		pos++;
+	}
+
+	elog(ERROR, "requested tranche is not registered");
+
+	/* just to keep compiler quiet */
+	return wait_event_info;
+}
+
+/*
+ * Return an identifier based on the Extension wait event.
+ */
+static const char *
+GetExtensionIdentifier(uint32 classId, uint16 eventId)
+{
+	Assert(classId == PG_WAIT_EXTENSION);
+	/* The event IDs are just tranche numbers. */
+	return GetExtensionTrancheName(eventId);
+}
+
+/*
+ * Return the name of an Extension tranche.
+ */
+static const char *
+GetExtensionTrancheName(uint16 trancheId)
+{
+	/* Build-in tranche? */
+	if (trancheId < NUM_BUILDIN_WAIT_EVENT_EXTENSION)
+		return "Extension";
+
+	/* It's an extension tranche, so look in ExtensionWaitEventTrancheNames[]. */
+	trancheId -= NUM_BUILDIN_WAIT_EVENT_EXTENSION;
+	Assert(trancheId < ExtensionWaitEventTrancheNamesAllocated);
+
+	return ExtensionWaitEventTrancheNames[trancheId];
+}
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index 518d3b0a1f..fa42671bd5 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -70,6 +70,38 @@ typedef enum
 	WAIT_EVENT_WAL_SENDER_WRITE_DATA,
 } WaitEventClient;
 
+/* ----------
+ * Wait Events - Extension
+ *
+ * Use this category when the server process is waiting for some condition
+ * defined by an extension module.
+ *
+ * Extensions can define custom wait events by calling RequestNamedExtensionWaitEventTranche()
+ * during postmaster startup.  Subsequently, call GetNamedExtensionWaitEventTranche() to
+ * obtain the wait event information requested.
+ * ----------
+ */
+typedef enum
+{
+	WAIT_EVENT_EXTENSION = PG_WAIT_EXTENSION,
+	WAIT_EVENT_EXTENSION_FIRST_USER_DEFINED
+}			WaitEventExtension;
+
+extern void WaitEventShmemInit(void);
+extern Size WaitEventShmemSize(void);
+extern void RequestNamedExtensionWaitEventTranche(const char *tranche_name);
+extern uint32 GetNamedExtensionWaitEventTranche(const char *tranche_name);
+
+/* struct for storing named tranche information */
+typedef struct NamedExtensionWaitEventTranche
+{
+	uint16		trancheId;
+	char	   *trancheName;
+}			NamedExtensionWaitEventTranche;
+
+extern PGDLLIMPORT NamedExtensionWaitEventTranche * NamedExtensionWaitEventTrancheArray;
+extern PGDLLIMPORT int NamedExtensionWaitEventTrancheRequests;
+
 /* ----------
  * Wait Events - IPC
  *
-- 
2.25.1

