From b114661610387a1dda37d992b16349b49556dbd2 Mon Sep 17 00:00:00 2001
From: Mats Kindahl <mats@timescale.com>
Date: Wed, 14 Dec 2022 13:51:40 +0100
Subject: Use memory context for backend startup objects

There is no check for the return value of several allocations using
malloc(), calloc(), and strdup() in backend startup which means that it
is possible to get a segmentation fault when starting a backend if the
system is low on memory.

To handle backend startup, this commit create a dedicated memory
context BackendStartupContext for the backend startup and allocate
objects that need to be available during backend start there.  It then
replaces these memory allocations with corresponding palloc(),
pstrdup(), and friends.

Most of the startup code already uses PostmasterContext to allocate
memory, but some locations where missing and this commit adds
allocation in the PostmasterContext rather than allocating memory
using malloc() and friends.

As a result, this will allocate memory that is only needed in the
postmaster in PostmasterContext, and memory that is used in the backend
in BackendStartupContext.

Since the postmaster should not exit if a memory allocation failure
occurs when trying to start a backend, and palloc() and friends
generate errors on failing to allocate memory, it will capture any
errors occuring when starting a backend and just continue running.

Since errors will automatically abort the backend start after this
change, some log-level messages are replaced with error-level messages
indicating that there were an error in starting the backend.

Fatal errors when starting the backend will still abort the postmaster.
---
 PATCH.md                            |  96 ++++++++++++++++
 src/backend/postmaster/postmaster.c | 172 ++++++++++++++++++----------
 src/backend/tcop/postgres.c         |   6 +-
 src/backend/utils/mmgr/README       |   6 +
 src/backend/utils/mmgr/mcxt.c       |   7 +-
 src/include/utils/memutils.h        |   1 +
 6 files changed, 223 insertions(+), 65 deletions(-)
 create mode 100644 PATCH.md

diff --git a/PATCH.md b/PATCH.md
new file mode 100644
index 0000000000..25e3e0e781
--- /dev/null
+++ b/PATCH.md
@@ -0,0 +1,96 @@
+# Notes about the patch
+
+This contains a few notes about the patch, in particular what was
+tested manually.
+
+## Manually executed tests
+
+The following tests were executed manually to check what happens when
+starting a backend and failing the memory allocation at various
+places.
+
+The testing was done by starting a debugger, setting a breakpoint in
+`BackendStartup` and `BackendInitialize` and setting the result of the
+returned memory to NULL at suitable points.
+
+### Failing to allocate memory when creating socket structure
+
+When failing to allocate memory in for the port structure, after
+having accepted the connection;
+
+```c
+					if (status == STATUS_OK) {
+						Port *bport;
+						BackendStartupContext = AllocSetContextCreate(TopMemoryContext,
+																  "BackendStartup",
+																  ALLOCSET_SMALL_SIZES);
+						oldcontext = MemoryContextSwitchTo(BackendStartupContext);
+>>>						bport = palloc(sizeof(*bport));
+						memcpy(bport, &tmp_port, sizeof(*bport));
+						BackendStartup(bport);
+					}
+```
+
+We get this error.
+
+```
+$ psql
+psql: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: server closed the connection unexpectedly
+        This probably means the server terminated abnormally
+        before or while processing the request.
+```
+
+It would be nice to get a better error message on this failure.
+
+
+### Failing to allocate memory in postmaster process
+
+When allocating memory in the postmaster process, We expect the
+connection to be denied and the postmaster to keep on running.
+
+When setting `bn` to null in `BackendStartup` after the `malloc` here
+
+```c
+	bn = malloc(sizeof(Backend));
+	if (!bn)
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
+```
+
+I get this result when trying to start a backend.
+
+```
+mats@fury:~/work/mkindahl/postgres$ psql
+psql: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: server closed the connection unexpectedly
+        This probably means the server terminated abnormally
+        before or while processing the request.
+```
+
+It would be nice to get a better error message on this failure.
+
+### Allocating memory in backend process
+
+Second case is allocating memory in the backend and failing. When
+setting `port->remote_host` to NULL in `BackendInitialize` in this
+code:
+
+```c
+	/*
+	 * Save remote_host and remote_port in port structure (after this, they
+	 * will appear in log_line_prefix data for log messages).
+	 */
+	port->remote_host = pstrdup(remote_host);
+	port->remote_port = pstrdup(remote_port);
+```
+
+The following error is printed:
+
+```
+mats@fury:~/work/mkindahl/postgres$ psql
+psql: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: FATAL:  out of memory
+DETAIL:  Failed on request of size 8 in memory context "BackendStartup".
+```
+
+This looks good. A FATAL level message will be sent to the client
+telling it what happened.
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 2552327d90..52019c189c 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -399,7 +399,6 @@ static void unlink_external_pid_file(int status, Datum arg);
 static void getInstallationPaths(const char *argv0);
 static void checkControlFile(void);
 static Port *ConnCreate(int serverFd);
-static void ConnFree(Port *port);
 static void handle_pm_pmsignal_signal(SIGNAL_ARGS);
 static void handle_pm_child_exit_signal(SIGNAL_ARGS);
 static void handle_pm_reload_request_signal(SIGNAL_ARGS);
@@ -703,7 +702,7 @@ PostmasterMain(int argc, char *argv[])
 				break;
 
 			case 'C':
-				output_config_variable = strdup(optarg);
+				output_config_variable = pstrdup(optarg);
 				break;
 
 			case 'c':
@@ -734,7 +733,7 @@ PostmasterMain(int argc, char *argv[])
 				}
 
 			case 'D':
-				userDoption = strdup(optarg);
+				userDoption = pstrdup(optarg);
 				break;
 
 			case 'd':
@@ -1771,19 +1770,117 @@ ServerLoop(void)
 
 			if (events[i].events & WL_SOCKET_ACCEPT)
 			{
-				Port	   *port;
+				/*
+				 * We need to use volatile qualifier for both these variables
+				 * to ensure that they are both stored and read from memory in
+				 * the event that there is an error reported.
+				 *
+				 * Note that the Port structure is allocated on the stack to
+				 * avoid triggering an error when we're low on memory. Use use
+				 * PGINVALID_SOCKET to signal that the socket was not accepted
+				 * so that we can close the socket on error.
+				 */
+				volatile Port port = {PGINVALID_SOCKET};
+				volatile MemoryContext oldcontext = NULL;
 
-				port = ConnCreate(events[i].fd);
-				if (port)
+				/*
+				 * We wrap this in PG_TRY since if any errors occur when
+				 * trying to spawn the backend, we do not want to abort the
+				 * postmaster and rather report the error and continue
+				 * running.
+				 *
+				 * Note that we can reach the catch statement either in the
+				 * postmaster or in the started backend, so we have to handle
+				 * both cases.
+				 */
+				PG_TRY();
 				{
-					BackendStartup(port);
+					int status;
+					Port tmp_port = {PGINVALID_SOCKET};
 
 					/*
-					 * We no longer need the open socket or port structure in
-					 * this process
+					 * We need to first accept the connection regardless of
+					 * whether we can start the backend or not. This is
+					 * necessary to be able to send failure message to the
+					 * connecting client even if we have problems with the
+					 * backend.
+					 *
+					 * Note that StreamConnection does not throw errors nor
+					 * allocate memory, since this can fail.
+					 */
+					status = StreamConnection(events[i].fd, &tmp_port);
+
+					/*
+					 * We can now copy the port data to volatile memory, so
+					 * that we can close the socket if an error is generated
+					 * by BackendStartup().
+					 */
+					port = tmp_port;
+
+					/*
+					 * If everything is fine this far, we set up the backend
+					 * context, allocate the port structure in this memory,
+					 * and start the backend.
+					 */
+					if (status == STATUS_OK) {
+						Port *bport;
+						BackendStartupContext = AllocSetContextCreate(TopMemoryContext,
+																  "BackendStartup",
+																  ALLOCSET_SMALL_SIZES);
+						oldcontext = MemoryContextSwitchTo(BackendStartupContext);
+						bport = palloc(sizeof(*bport));
+						memcpy(bport, &tmp_port, sizeof(*bport));
+						BackendStartup(bport);
+					}
+				}
+				PG_CATCH();
+				{
+					/*
+					 * Note that fatal errors will not be caught by this
+					 * catch, so do not throw them in the postmaster unless
+					 * you want to shut it down.
+					 *
+					 * In the child process we can reach this catch since the
+					 * backend startup might generate errors. In this case, we
+					 * should abort the process and provide a useful message
+					 * to the connected client, so we change the error message
+					 * to FATAL and re-throw it.
 					 */
-					StreamClose(port->sock);
-					ConnFree(port);
+					if (IsUnderPostmaster) {
+						ErrorData  *edata;
+
+						MemoryContextSwitchTo(oldcontext);
+						edata = CopyErrorData();
+						FlushErrorState();
+						edata->elevel = FATAL;
+						ReThrowError(edata);
+					}
+				}
+				PG_END_TRY();
+
+				/*
+				 * We no longer need the open socket in this process,
+				 * regardless of whether the backend startup failed or not. We
+				 * do not reach this point if we are in the backend. See
+				 * above.
+				 *
+				 * Also note that we can reach this part due to a failure in
+				 * BackendStartup, so we need to check that 'port' is
+				 * non-NULL.
+				 */
+				Assert(!IsUnderPostmaster);
+				if (port.sock != PGINVALID_SOCKET)
+					StreamClose(port.sock);
+
+				/*
+				 * We can now release the backend startup context, including
+				 * the port data, since we're done with it in the postmaster.
+				 */
+				if (oldcontext)
+				{
+					MemoryContextSwitchTo(oldcontext);
+					MemoryContextDelete(BackendStartupContext);
+					BackendStartupContext = NULL;
 				}
 			}
 		}
@@ -2517,49 +2614,6 @@ canAcceptConnections(int backend_type)
 }
 
 
-/*
- * ConnCreate -- create a local connection data structure
- *
- * Returns NULL on failure, other than out-of-memory which is fatal.
- */
-static Port *
-ConnCreate(int serverFd)
-{
-	Port	   *port;
-
-	if (!(port = (Port *) calloc(1, sizeof(Port))))
-	{
-		ereport(LOG,
-				(errcode(ERRCODE_OUT_OF_MEMORY),
-				 errmsg("out of memory")));
-		ExitPostmaster(1);
-	}
-
-	if (StreamConnection(serverFd, port) != STATUS_OK)
-	{
-		if (port->sock != PGINVALID_SOCKET)
-			StreamClose(port->sock);
-		ConnFree(port);
-		return NULL;
-	}
-
-	return port;
-}
-
-
-/*
- * ConnFree -- free a local connection data structure
- *
- * Caller has already closed the socket if any, so there's not much
- * to do here.
- */
-static void
-ConnFree(Port *port)
-{
-	free(port);
-}
-
-
 /*
  * ClosePostmasterPorts -- close all the postmaster's open sockets
  *
@@ -4122,7 +4176,7 @@ BackendStartup(Port *port)
 	bn = (Backend *) malloc(sizeof(Backend));
 	if (!bn)
 	{
-		ereport(LOG,
+		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of memory")));
 		return STATUS_ERROR;
@@ -4136,7 +4190,7 @@ BackendStartup(Port *port)
 	if (!RandomCancelKey(&MyCancelKey))
 	{
 		free(bn);
-		ereport(LOG,
+		ereport(ERROR,
 				(errcode(ERRCODE_INTERNAL_ERROR),
 				 errmsg("could not generate random cancel key")));
 		return STATUS_ERROR;
@@ -4340,8 +4394,8 @@ BackendInitialize(Port *port)
 	 * Save remote_host and remote_port in port structure (after this, they
 	 * will appear in log_line_prefix data for log messages).
 	 */
-	port->remote_host = strdup(remote_host);
-	port->remote_port = strdup(remote_port);
+	port->remote_host = pstrdup(remote_host);
+	port->remote_port = pstrdup(remote_port);
 
 	/* And now we can issue the Log_connections message, if wanted */
 	if (Log_connections)
@@ -4372,7 +4426,7 @@ BackendInitialize(Port *port)
 		ret == 0 &&
 		strspn(remote_host, "0123456789.") < strlen(remote_host) &&
 		strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
-		port->remote_hostname = strdup(remote_host);
+		port->remote_hostname = pstrdup(remote_host);
 
 	/*
 	 * Ready to begin client interaction.  We will give up and _exit(1) after
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 5d439f2710..5f89a4de3b 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4147,9 +4147,9 @@ PostgresMain(const char *dbname, const char *username)
 	/*
 	 * If the PostmasterContext is still around, recycle the space; we don't
 	 * need it anymore after InitPostgres completes.  Note this does not trash
-	 * *MyProcPort, because ConnCreate() allocated that space with malloc()
-	 * ... else we'd need to copy the Port data first.  Also, subsidiary data
-	 * such as the username isn't lost either; see ProcessStartupPacket().
+	 * *MyProcPort, because ConnCreate() allocated that space in the
+	 * BackendStartupContext.  Also, subsidiary data such as the username
+	 * isn't lost either; see ProcessStartupPacket().
 	 */
 	if (PostmasterContext)
 	{
diff --git a/src/backend/utils/mmgr/README b/src/backend/utils/mmgr/README
index b20b9d4852..b0c3c72f8b 100644
--- a/src/backend/utils/mmgr/README
+++ b/src/backend/utils/mmgr/README
@@ -201,6 +201,12 @@ processes; so backends can't delete PostmasterContext until that's done.
 ErrorContext --- the remaining top-level contexts are set up in each
 backend during startup.)
 
+BackendStartupContext --- this is the context allocated for backend
+startup. This context is available because the PostmasterContext is
+deleted after the backend has started, but there is some information
+that needs to remain while the backend is running, for example
+information about the port.
+
 CacheMemoryContext --- permanent storage for relcache, catcache, and
 related modules.  This will never be reset or deleted, either, so it's
 not truly necessary to distinguish it from TopMemoryContext.  But it
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 0b00802df7..91f14efbce 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -141,6 +141,7 @@ MemoryContext CurrentMemoryContext = NULL;
 MemoryContext TopMemoryContext = NULL;
 MemoryContext ErrorContext = NULL;
 MemoryContext PostmasterContext = NULL;
+MemoryContext BackendStartupContext = NULL;
 MemoryContext CacheMemoryContext = NULL;
 MemoryContext MessageContext = NULL;
 MemoryContext TopTransactionContext = NULL;
@@ -718,9 +719,9 @@ MemoryContextStatsDetail(MemoryContext context, int max_children,
 		 *
 		 * We don't buffer the information about all memory contexts in a
 		 * backend into StringInfo and log it as one message.  That would
-		 * require the buffer to be enlarged, risking an OOM as there could
-		 * be a large number of memory contexts in a backend.  Instead, we
-		 * log one message per memory context.
+		 * require the buffer to be enlarged, risking an OOM as there could be
+		 * a large number of memory contexts in a backend.  Instead, we log
+		 * one message per memory context.
 		 */
 		ereport(LOG_SERVER_ONLY,
 				(errhidestmt(true),
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index 21640d62a6..5d8619830d 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -58,6 +58,7 @@
 extern PGDLLIMPORT MemoryContext TopMemoryContext;
 extern PGDLLIMPORT MemoryContext ErrorContext;
 extern PGDLLIMPORT MemoryContext PostmasterContext;
+extern PGDLLIMPORT MemoryContext BackendStartupContext;
 extern PGDLLIMPORT MemoryContext CacheMemoryContext;
 extern PGDLLIMPORT MemoryContext MessageContext;
 extern PGDLLIMPORT MemoryContext TopTransactionContext;
-- 
2.34.1

