From bb8e1a2bbb9425eb7667a97dc49beebe8f9bf327 Mon Sep 17 00:00:00 2001
From: Reid Thompson <jreidthompson@nc.rr.com>
Date: Sat, 4 Jun 2022 22:23:59 -0400
Subject: [PATCH 2/2] Add the ability to limit the amount of memory that can be
 allocated to backends.

This builds on the work that adds backend memory allocated to pg_stat_activity.

Add GUC variable max_total_backend_memory.

Specifies a limit to the amount of memory (in MB) that may be allocated to
backends in total (i.e. this is not a per user or per backend limit). If unset,
or set to 0 it is disabled. It is intended as a resource to help avoid the OOM
killer on LINUX and manage resources in general. A backend request that would
push the total over the limit will be denied with an out of memory error causing
that backend's current query/transaction to fail. Due to the dynamic nature of
memory allocations, this limit is not exact. If within 1.5MB of the limit and
two backends request 1MB each at the same time both may be allocated, and exceed
the limit. Further requests will not be allocated until dropping below the
limit. Keep this in mind when setting this value. This limit does not affect
auxiliary backend processes. Backend memory allocations are displayed in the
pg_stat_activity view.
---
 doc/src/sgml/config.sgml                      |  26 ++++
 src/backend/storage/ipc/dsm_impl.c            |  12 ++
 src/backend/utils/activity/backend_status.c   | 111 +++++++++++++++++-
 src/backend/utils/misc/guc_tables.c           |  11 ++
 src/backend/utils/misc/postgresql.conf.sample |   3 +
 src/backend/utils/mmgr/aset.c                 |  17 +++
 src/backend/utils/mmgr/generation.c           |  11 +-
 src/backend/utils/mmgr/slab.c                 |   8 ++
 src/include/utils/backend_status.h            |   2 +
 9 files changed, 199 insertions(+), 2 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 700914684d..ce8e35daee 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2079,6 +2079,32 @@ include_dir 'conf.d'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-max-total-backend-memory" xreflabel="max_total_backend_memory">
+      <term><varname>max_total_backend_memory</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>max_total_backend_memory</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies a limit to the amount of memory (MB) that may be allocated to
+        backends in total (i.e. this is not a per user or per backend limit).
+        If unset, or set to 0 it is disabled.  A backend request that would
+        push the total over the limit will be denied with an out of memory
+        error causing that backend's current query/transaction to fail. Due to
+        the dynamic nature of memory allocations, this limit is not exact. If
+        within 1.5MB of the limit and two backends request 1MB each at the same
+        time both may be allocated, and exceed the limit. Further requests will
+        not be allocated until dropping below the limit. Keep this in mind when
+        setting this value. This limit does not affect auxiliary backend
+        processes <xref linkend="glossary-auxiliary-proc"/> . Backend memory
+        allocations (<varname>backend_mem_allocated</varname>) are displayed in
+        the <link linkend="monitoring-pg-stat-activity-view"><structname>pg_stat_activity</structname></link>
+        view.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
      </sect2>
 
diff --git a/src/backend/storage/ipc/dsm_impl.c b/src/backend/storage/ipc/dsm_impl.c
index 3356bb65b5..cc061056a3 100644
--- a/src/backend/storage/ipc/dsm_impl.c
+++ b/src/backend/storage/ipc/dsm_impl.c
@@ -253,6 +253,10 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
 		return true;
 	}
 
+	/* Do not exceed maximum allowed memory allocation */
+	if (op == DSM_OP_CREATE && exceeds_max_total_bkend_mem(request_size))
+		return false;
+
 	/*
 	 * Create new segment or open an existing one for attach.
 	 *
@@ -524,6 +528,10 @@ dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
 		int			flags = IPCProtection;
 		size_t		segsize;
 
+		/* Do not exceed maximum allowed memory allocation */
+		if (op == DSM_OP_CREATE && exceeds_max_total_bkend_mem(request_size))
+			return false;
+
 		/*
 		 * Allocate the memory BEFORE acquiring the resource, so that we don't
 		 * leak the resource if memory allocation fails.
@@ -718,6 +726,10 @@ dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
 		return true;
 	}
 
+	/* Do not exceed maximum allowed memory allocation */
+	if (op == DSM_OP_CREATE && exceeds_max_total_bkend_mem(request_size))
+		return false;
+
 	/* Create new segment or open an existing one for attach. */
 	if (op == DSM_OP_CREATE)
 	{
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
index 45da3af213..7820c55489 100644
--- a/src/backend/utils/activity/backend_status.c
+++ b/src/backend/utils/activity/backend_status.c
@@ -45,6 +45,9 @@
 bool		pgstat_track_activities = false;
 int			pgstat_track_activity_query_size = 1024;
 
+/* Max backend memory allocation allowed (MB). 0 = disabled */
+int			max_total_bkend_mem = 0;
+
 
 /* exposed so that backend_progress.c can access it */
 PgBackendStatus *MyBEEntry = NULL;
@@ -1235,7 +1238,7 @@ pgstat_report_backend_mem_allocated_decrease(uint64 deallocation)
 	 */
 	if (deallocation > beentry->backend_mem_allocated)
 	{
-		ereport(LOG, errmsg("decrease reduces reported backend memory allocated below zero; setting reported to 0"));
+		ereport(LOG, errmsg("deallocation would decrease backend memory below zero; setting reported to 0"));
 
 		/*
 		 * Overwrite deallocation with current backend_mem_allocated so we end
@@ -1253,3 +1256,109 @@ pgstat_report_backend_mem_allocated_decrease(uint64 deallocation)
 	beentry->backend_mem_allocated -= deallocation;
 	PGSTAT_END_WRITE_ACTIVITY(beentry);
 }
+
+/* ----------
+ * pgstat_get_all_backend_memory_allocated() -
+ *
+ *	Return a uint64 representing the current shared memory allocated to all
+ *	backends.  This looks directly at the BackendStatusArray, and so will
+ *	provide current information regardless of the age of our transaction's
+ *	snapshot of the status array.
+ *	In the future we will likely utilize additional values - perhaps limit
+ *	backend allocation by user/role, etc.
+ * ----------
+ */
+uint64
+pgstat_get_all_backend_memory_allocated(void)
+{
+	PgBackendStatus *beentry;
+	int			i;
+	uint64		all_backend_memory_allocated = 0;
+
+	beentry = BackendStatusArray;
+
+	/*
+	 * We probably shouldn't get here before shared memory has been set up,
+	 * but be safe.
+	 */
+	if (beentry == NULL || BackendActivityBuffer == NULL)
+		return 0;
+
+	/*
+	 * We include AUX procs in all backend memory calculation
+	 */
+	for (i = 1; i <= NumBackendStatSlots; i++)
+	{
+		/*
+		 * We use a volatile pointer here to ensure the compiler doesn't try
+		 * to get cute.
+		 */
+		volatile PgBackendStatus *vbeentry = beentry;
+		bool		found;
+		uint64		backend_mem_allocated = 0;
+
+		for (;;)
+		{
+			int			before_changecount;
+			int			after_changecount;
+
+			pgstat_begin_read_activity(vbeentry, before_changecount);
+
+			/*
+			 * Ignore invalid entries, which may contain invalid data.
+			 * See pgstat_beshutdown_hook()
+			 */
+			if (vbeentry->st_procpid > 0)
+				backend_mem_allocated = vbeentry->backend_mem_allocated;
+
+			pgstat_end_read_activity(vbeentry, after_changecount);
+
+			if ((found = pgstat_read_activity_complete(before_changecount,
+													   after_changecount)))
+				break;
+
+			/* Make sure we can break out of loop if stuck... */
+			CHECK_FOR_INTERRUPTS();
+		}
+
+		if (found)
+			all_backend_memory_allocated += backend_mem_allocated;
+
+		beentry++;
+	}
+
+	return all_backend_memory_allocated;
+}
+
+/*
+ * Determine if allocation request will exceed max backend memory allowed.
+ * Do not apply to auxiliary processes.
+ */
+bool
+exceeds_max_total_bkend_mem(uint64 allocation_request)
+{
+	bool		result = false;
+
+	/* Exclude auxiliary processes from the check */
+	if (MyAuxProcType != NotAnAuxProcess)
+		return result;
+
+	/* Convert max_total_bkend_mem to bytes for comparison */
+	if (max_total_bkend_mem &&
+		pgstat_get_all_backend_memory_allocated() +
+		allocation_request > (uint64) max_total_bkend_mem * 1024 * 1024)
+	{
+		/*
+		 * Explicitly identify the OOM being a result of this configuration
+		 * parameter vs a system failure to allocate OOM.
+		 */
+		ereport(WARNING,
+				errmsg("allocation would exceed max_total_backend_memory limit (%llu > %llu)",
+					   (unsigned long long) pgstat_get_all_backend_memory_allocated() +
+					   allocation_request, (unsigned long long) max_total_bkend_mem * 1024 * 1024));
+
+		result = true;
+	}
+
+	return result;
+}
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 550e95056c..7d4dc8677a 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3415,6 +3415,17 @@ struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"max_total_backend_memory", PGC_SU_BACKEND, RESOURCES_MEM,
+			gettext_noop("Restrict total backend memory allocations to this max."),
+			gettext_noop("0 turns this feature off."),
+			GUC_UNIT_MB
+		},
+		&max_total_bkend_mem,
+		0, 0, INT_MAX,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 2ae76e5cfb..8a0b383eb7 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -155,6 +155,9 @@
 					#   mmap
 					# (change requires restart)
 #min_dynamic_shared_memory = 0MB	# (change requires restart)
+#max_total_backend_memory = 0MB		# Restrict total backend memory allocations
+									# to this max (in MB). 0 turns this feature
+									# off.
 
 # - Disk -
 
diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index 2ac6f10e12..509b93faa1 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -428,6 +428,10 @@ AllocSetContextCreateInternal(MemoryContext parent,
 	else
 		firstBlockSize = Max(firstBlockSize, initBlockSize);
 
+	/* Do not exceed maximum allowed memory allocation */
+	if (exceeds_max_total_bkend_mem(firstBlockSize))
+		return NULL;
+
 	/*
 	 * Allocate the initial block.  Unlike other aset.c blocks, it starts with
 	 * the context header and its block header follows that.
@@ -726,6 +730,11 @@ AllocSetAlloc(MemoryContext context, Size size)
 #endif
 
 		blksize = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ;
+
+		/* Do not exceed maximum allowed memory allocation */
+		if (exceeds_max_total_bkend_mem(blksize))
+			return NULL;
+
 		block = (AllocBlock) malloc(blksize);
 		if (block == NULL)
 			return NULL;
@@ -923,6 +932,10 @@ AllocSetAlloc(MemoryContext context, Size size)
 		while (blksize < required_size)
 			blksize <<= 1;
 
+		/* Do not exceed maximum allowed memory allocation */
+		if (exceeds_max_total_bkend_mem(blksize))
+			return NULL;
+
 		/* Try to allocate it */
 		block = (AllocBlock) malloc(blksize);
 
@@ -1150,6 +1163,10 @@ AllocSetRealloc(void *pointer, Size size)
 		blksize = chksize + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ;
 		oldblksize = block->endptr - ((char *) block);
 
+		/* Do not exceed maximum allowed memory allocation */
+		if (blksize > oldblksize && exceeds_max_total_bkend_mem(blksize - oldblksize))
+			return NULL;
+
 		block = (AllocBlock) realloc(block, blksize);
 		if (block == NULL)
 		{
diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c
index 34b11392ff..6256e84d48 100644
--- a/src/backend/utils/mmgr/generation.c
+++ b/src/backend/utils/mmgr/generation.c
@@ -192,6 +192,9 @@ GenerationContextCreate(MemoryContext parent,
 	else
 		allocSize = Max(allocSize, initBlockSize);
 
+	if (exceeds_max_total_bkend_mem(allocSize))
+		return NULL;
+
 	/*
 	 * Allocate the initial block.  Unlike other generation.c blocks, it
 	 * starts with the context header and its block header follows that.
@@ -276,7 +279,7 @@ GenerationReset(MemoryContext context)
 {
 	GenerationContext *set = (GenerationContext *) context;
 	dlist_mutable_iter miter;
-	uint64	deallocation = 0;
+	uint64		deallocation = 0;
 
 	AssertArg(GenerationIsValid(set));
 
@@ -369,6 +372,9 @@ GenerationAlloc(MemoryContext context, Size size)
 	{
 		Size		blksize = required_size + Generation_BLOCKHDRSZ;
 
+		if (exceeds_max_total_bkend_mem(blksize))
+			return NULL;
+
 		block = (GenerationBlock *) malloc(blksize);
 		if (block == NULL)
 			return NULL;
@@ -472,6 +478,9 @@ GenerationAlloc(MemoryContext context, Size size)
 			if (blksize < required_size)
 				blksize = pg_nextpower2_size_t(required_size);
 
+			if (exceeds_max_total_bkend_mem(blksize))
+				return NULL;
+
 			block = (GenerationBlock *) malloc(blksize);
 
 			if (block == NULL)
diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c
index 72376da82e..364c9eb795 100644
--- a/src/backend/utils/mmgr/slab.c
+++ b/src/backend/utils/mmgr/slab.c
@@ -182,6 +182,10 @@ SlabContextCreate(MemoryContext parent,
 	headerSize += chunksPerBlock * sizeof(bool);
 #endif
 
+	/* Do not exceed maximum allowed memory allocation */
+	if (exceeds_max_total_bkend_mem(headerSize))
+		return NULL;
+
 	slab = (SlabContext *) malloc(headerSize);
 	if (slab == NULL)
 	{
@@ -336,6 +340,10 @@ SlabAlloc(MemoryContext context, Size size)
 	 */
 	if (slab->minFreeChunks == 0)
 	{
+		/* Do not exceed maximum allowed memory allocation */
+		if (exceeds_max_total_bkend_mem(slab->blockSize))
+			return NULL;
+
 		block = (SlabBlock *) malloc(slab->blockSize);
 
 		if (block == NULL)
diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h
index 9bdc4197bd..3b940ff98e 100644
--- a/src/include/utils/backend_status.h
+++ b/src/include/utils/backend_status.h
@@ -270,6 +270,7 @@ typedef struct LocalPgBackendStatus
  */
 extern PGDLLIMPORT bool pgstat_track_activities;
 extern PGDLLIMPORT int pgstat_track_activity_query_size;
+extern PGDLLIMPORT int max_total_bkend_mem;
 
 
 /* ----------
@@ -321,6 +322,7 @@ extern int	pgstat_fetch_stat_numbackends(void);
 extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid);
 extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry(int beid);
 extern char *pgstat_clip_activity(const char *raw_activity);
+extern bool exceeds_max_total_bkend_mem(uint64 allocation_request);
 
 
 #endif							/* BACKEND_STATUS_H */
-- 
2.25.1

