From 292293e671f713bbb88c3ebf69a1bb61c4200392 Mon Sep 17 00:00:00 2001
From: Mats Kindahl <mats@timescale.com>
Date: Thu, 8 Dec 2022 10:00:23 +0100
Subject: Check result of memory allocations at startup

There are no checks for the return values of several allocations using
malloc(), calloc(), and strdup() in backend startup which means that
it is possible to get a segmentation fault when starting a backend if
the system is low on memory. It makes more sense to abort the spawning
of a backend if we're low on memory rather than tearing down the
entire server.

This commit fixes this by checking the return values of memory
allocation functions and abort with a fatal error if the memory
allocation fails.

Since the postmaster should not exit if a memory allocation failure
occurs when trying to start a backend, it will capture any errors
occuring when starting a backend and just continue running.

Fatal errors when starting the backend will still abort the postmaster,
but fatal errors in the backend will just abort starting the backend
and send an error message back to the client.
---
 PATCH.md                            | 70 +++++++++++++++++++++++
 src/backend/catalog/aclchk.c        |  4 +-
 src/backend/postmaster/postmaster.c | 86 ++++++++++++++++++++---------
 3 files changed, 131 insertions(+), 29 deletions(-)
 create mode 100644 PATCH.md

diff --git a/PATCH.md b/PATCH.md
new file mode 100644
index 0000000000..5ccd3979c8
--- /dev/null
+++ b/PATCH.md
@@ -0,0 +1,70 @@
+# Notes about the patch
+
+This contains a few notes about the patch, in particular what was
+tested manually.
+
+## Manually executed tests
+
+The following tests were executed manually to check what happens when
+starting a backend and failing the memory allocation at various
+places.
+
+### Allocating memory in postmaster process
+
+First case is when allocating memory in the postmaster process. We
+expect the connection to be denied and the postmaster to keep on
+running.
+
+When setting `bn` to null in `BackendStartup` after the `malloc` here
+
+```c
+	bn = malloc(sizeof(Backend));
+	if (!bn)
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
+```
+
+I get this result when trying to start a backend.
+
+```
+mats@fury:~/work/mkindahl/postgres$ psql
+psql: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: server closed the connection unexpectedly
+        This probably means the server terminated abnormally
+        before or while processing the request.
+```
+
+It would be nice to get a better error message, basically using the
+error message and send it to the client.
+
+### Allocating memory in backend process
+
+Second case is allocating memory in the backend and failing. When
+setting `port->remote_host` to NULL in `BackendInitialize` in this
+code:
+
+```c
+	/*
+	 * Save remote_host and remote_port in port structure (after this, they
+	 * will appear in log_line_prefix data for log messages).
+	 */
+	port->remote_host = strdup(remote_host);
+	if (!port->remote_host)
+		ereport(FATAL,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
+	port->remote_port = strdup(remote_port);
+	if (!port->remote_port)
+		ereport(FATAL,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
+```
+
+The following error is printed:
+
+```
+mats@fury:~/work/mkindahl/postgres$ psql
+psql: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: FATAL:  out of memory
+```
+
+This looks good. A FATAL level message will be sent to the client.
diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c
index e48fc1647a..3ee1dc7568 100644
--- a/src/backend/catalog/aclchk.c
+++ b/src/backend/catalog/aclchk.c
@@ -3411,8 +3411,8 @@ pg_class_aclmask_ext(Oid table_oid, Oid roleid, AclMode mask,
 		result |= ACL_VACUUM;
 
 	/*
-	 * Check if ACL_ANALYZE is being checked and, if so, and not already set as
-	 * part of the result, then check if the user is a member of the
+	 * Check if ACL_ANALYZE is being checked and, if so, and not already set
+	 * as part of the result, then check if the user is a member of the
 	 * pg_analyze_all_tables role, which allows ANALYZE on all relations.
 	 */
 	if (mask & ACL_ANALYZE &&
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index f459dab360..066e02530f 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -615,8 +615,8 @@ PostmasterMain(int argc, char *argv[])
 	 *
 	 * 1. We tell sigaction() to block all signals for the duration of the
 	 * signal handler.  This is faster than our old approach of
-	 * blocking/unblocking explicitly in the signal handler, and it should also
-	 * prevent excessive stack consumption if signals arrive quickly.
+	 * blocking/unblocking explicitly in the signal handler, and it should
+	 * also prevent excessive stack consumption if signals arrive quickly.
 	 *
 	 * 2. We do not set the SA_RESTART flag.  This is because signals will be
 	 * blocked at all times except when ServerLoop is waiting for something to
@@ -705,6 +705,10 @@ PostmasterMain(int argc, char *argv[])
 
 			case 'C':
 				output_config_variable = strdup(optarg);
+				if (!output_config_variable)
+					ereport(ERROR,
+							(errcode(ERRCODE_OUT_OF_MEMORY),
+							 errmsg("out of memory")));
 				break;
 
 			case 'c':
@@ -736,6 +740,10 @@ PostmasterMain(int argc, char *argv[])
 
 			case 'D':
 				userDoption = strdup(optarg);
+				if (!userDoption)
+					ereport(ERROR,
+							(errcode(ERRCODE_OUT_OF_MEMORY),
+							 errmsg("out of memory")));
 				break;
 
 			case 'd':
@@ -1783,18 +1791,35 @@ ServerLoop(void)
 					break;
 				if (FD_ISSET(ListenSocket[i], &rmask))
 				{
-					Port	   *port;
+					Port	   *port = NULL;
 
-					port = ConnCreate(ListenSocket[i]);
-					if (port)
+					/*
+					 * We wrap this in PG_TRY since if any errors occur when
+					 * trying to spawn the backend, we do not want to abort
+					 * the postmaster and rather report the error and continue
+					 * running.
+					 *
+					 * Note that fatal errors will not be caught, so
+					 * do not throw them unless you want to shut down the
+					 * postmaster.
+					 */
+					PG_TRY();
 					{
-						BackendStartup(port);
+						port = ConnCreate(ListenSocket[i]);
+						if (port)
+							BackendStartup(port);
+					}
+					PG_END_TRY();
 
-						/*
-						 * We no longer need the open socket or port structure
-						 * in this process
-						 */
-						StreamClose(port->sock);
+					/*
+					 * We no longer need the open socket or port structure
+					 * in this process, and definitely not if we failed
+					 * spawning a backend or creating the port structure.
+					 */
+					if (port)
+					{
+						if (port->sock != PGINVALID_SOCKET)
+							StreamClose(port->sock);
 						ConnFree(port);
 					}
 				}
@@ -2561,21 +2586,15 @@ ConnCreate(int serverFd)
 {
 	Port	   *port;
 
-	if (!(port = (Port *) calloc(1, sizeof(Port))))
-	{
-		ereport(LOG,
+	if (!(port = calloc(1, sizeof(Port))))
+		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of memory")));
-		ExitPostmaster(1);
-	}
 
+
+	/* Caller will deal with freeing anything that needs to be free'd */
 	if (StreamConnection(serverFd, port) != STATUS_OK)
-	{
-		if (port->sock != PGINVALID_SOCKET)
-			StreamClose(port->sock);
-		ConnFree(port);
 		return NULL;
-	}
 
 	return port;
 }
@@ -4054,14 +4073,12 @@ BackendStartup(Port *port)
 	 * Create backend data structure.  Better before the fork() so we can
 	 * handle failure cleanly.
 	 */
-	bn = (Backend *) malloc(sizeof(Backend));
+	bn = malloc(sizeof(Backend));
 	if (!bn)
-	{
-		ereport(LOG,
+		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of memory")));
-		return STATUS_ERROR;
-	}
+
 
 	/*
 	 * Compute the cancel key that will be assigned to this backend. The
@@ -4276,7 +4293,15 @@ BackendInitialize(Port *port)
 	 * will appear in log_line_prefix data for log messages).
 	 */
 	port->remote_host = strdup(remote_host);
+	if (!port->remote_host)
+		ereport(FATAL,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
 	port->remote_port = strdup(remote_port);
+	if (!port->remote_port)
+		ereport(FATAL,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
 
 	/* And now we can issue the Log_connections message, if wanted */
 	if (Log_connections)
@@ -4307,7 +4332,14 @@ BackendInitialize(Port *port)
 		ret == 0 &&
 		strspn(remote_host, "0123456789.") < strlen(remote_host) &&
 		strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
+	{
 		port->remote_hostname = strdup(remote_host);
+		if (!port->remote_hostname)
+			ereport(FATAL,
+					(errcode(ERRCODE_OUT_OF_MEMORY),
+					 errmsg("out of memory")));
+	}
+
 
 	/*
 	 * Ready to begin client interaction.  We will give up and _exit(1) after
@@ -4729,7 +4761,7 @@ retry:
 	 */
 	childinfo = malloc(sizeof(win32_deadchild_waitinfo));
 	if (!childinfo)
-		ereport(FATAL,
+		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of memory")));
 
-- 
2.34.1

