From 43042799b96b588a446c509637b5acf570e2a325 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <bossartn@amazon.com>
Date: Sun, 5 Dec 2021 21:42:52 -0800
Subject: [PATCH v6 4/6] Move pgsql_tmp file removal to custodian process.

With this change, startup (and restart after a crash) simply
renames the pgsql_tmp directories, and the custodian process
actually removes all the files in the staged directories as well as
the staged directories themselves.  This should help avoid long
startup delays due to many leftover temporary files.
---
 src/backend/postmaster/custodian.c  | 14 +++++++++++++-
 src/backend/postmaster/postmaster.c | 14 +++++++++-----
 src/backend/storage/file/fd.c       | 21 +++++++++++++++------
 src/include/postmaster/custodian.h  |  3 +++
 4 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/src/backend/postmaster/custodian.c b/src/backend/postmaster/custodian.c
index db00282658..a0ec94ea5c 100644
--- a/src/backend/postmaster/custodian.c
+++ b/src/backend/postmaster/custodian.c
@@ -196,7 +196,19 @@ CustodianMain(void)
 		CustodianShmem->cust_flags = 0;
 		SpinLockRelease(&CustodianShmem->cust_lck);
 
-		/* TODO: offloaded tasks go here */
+		/*
+		 * Remove any pgsql_tmp directories that have been staged for deletion.
+		 * Since pgsql_tmp directories can accumulate many files, removing all
+		 * of the files during startup (which we used to do) can take a very
+		 * long time.  To avoid delaying startup, we simply have startup rename
+		 * the temporary directories, and we clean them up here.
+		 *
+		 * pgsql_tmp directories are not staged or cleaned in single-user mode,
+		 * so we don't need any extra handling outside of the custodian process
+		 * for this.
+		 */
+		if (flags & CUSTODIAN_REMOVE_TEMP_FILES)
+			RemovePgTempFiles(false, false);
 
 		/* Calculate how long to sleep */
 		end_time = (pg_time_t) time(NULL);
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 82aa0c6307..b67f8828df 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1401,9 +1401,11 @@ PostmasterMain(int argc, char *argv[])
 	/*
 	 * Remove old temporary files.  At this point there can be no other
 	 * Postgres processes running in this directory, so this should be safe.
+	 *
+	 * Note that this just stages the pgsql_tmp directories for deletion.  The
+	 * custodian process is responsible for actually removing the files.
 	 */
 	RemovePgTempFiles(true, true);
-	RemovePgTempFiles(false, false);
 
 	/*
 	 * Initialize the autovacuum subsystem (again, no process start yet)
@@ -4052,12 +4054,14 @@ PostmasterStateMachine(void)
 		ereport(LOG,
 				(errmsg("all server processes terminated; reinitializing")));
 
-		/* remove leftover temporary files after a crash */
+		/*
+		 * Remove leftover temporary files after a crash.
+		 *
+		 * Note that this just stages the pgsql_tmp directories for deletion.
+		 * The custodian process is responsible for actually removing the files.
+		 */
 		if (remove_temp_files_after_crash)
-		{
 			RemovePgTempFiles(true, true);
-			RemovePgTempFiles(false, false);
-		}
 
 		/* allow background workers to immediately restart */
 		ResetBackgroundWorkerCrashTimes();
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 79ca3a5be9..46dc1925a2 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -97,6 +97,7 @@
 #include "pgstat.h"
 #include "port/pg_iovec.h"
 #include "portability/mem.h"
+#include "postmaster/custodian.h"
 #include "postmaster/startup.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
@@ -1640,9 +1641,9 @@ PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode)
  *
  * Directories created within the top-level temporary directory should begin
  * with PG_TEMP_FILE_PREFIX, so that they can be identified as temporary and
- * deleted at startup by RemovePgTempFiles().  Further subdirectories below
- * that do not need any particular prefix.
-*/
+ * deleted by RemovePgTempFiles().  Further subdirectories below that do not
+ * need any particular prefix.
+ */
 void
 PathNameCreateTemporaryDir(const char *basedir, const char *directory)
 {
@@ -1840,9 +1841,9 @@ OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError)
  *
  * If the file is inside the top-level temporary directory, its name should
  * begin with PG_TEMP_FILE_PREFIX so that it can be identified as temporary
- * and deleted at startup by RemovePgTempFiles().  Alternatively, it can be
- * inside a directory created with PathNameCreateTemporaryDir(), in which case
- * the prefix isn't needed.
+ * and deleted by RemovePgTempFiles().  Alternatively, it can be inside a
+ * directory created with PathNameCreateTemporaryDir(), in which case the prefix
+ * isn't needed.
  */
 File
 PathNameCreateTemporaryFile(const char *path, bool error_on_failure)
@@ -3211,6 +3212,14 @@ RemovePgTempFiles(bool stage, bool remove_relation_files)
 	 * would create a race condition.  It's done separately, earlier in
 	 * postmaster startup.
 	 */
+
+	/*
+	 * If we just staged some pgsql_tmp directories for removal, wake up the
+	 * custodian process so that it deletes all the files in the staged
+	 * directories as well as the directories themselves.
+	 */
+	if (stage)
+		RequestCustodian(CUSTODIAN_REMOVE_TEMP_FILES);
 }
 
 /*
diff --git a/src/include/postmaster/custodian.h b/src/include/postmaster/custodian.h
index c95a7c7de6..f6dcd9ddef 100644
--- a/src/include/postmaster/custodian.h
+++ b/src/include/postmaster/custodian.h
@@ -17,4 +17,7 @@ extern Size CustodianShmemSize(void);
 extern void CustodianShmemInit(void);
 extern void RequestCustodian(int flags);
 
+/* flags for RequestCustodian() */
+#define CUSTODIAN_REMOVE_TEMP_FILES		0x0001
+
 #endif						/* _CUSTODIAN_H */
-- 
2.25.1

