From 72baf897a3c733b30bb4bf63e63825b9bc6a6acf Mon Sep 17 00:00:00 2001
From: Nathan Bossart <bossartn@amazon.com>
Date: Sun, 5 Dec 2021 21:16:44 -0800
Subject: [PATCH v5 3/8] Split pgsql_tmp cleanup into two stages.

First, pgsql_tmp directories will be renamed to stage them for
removal.  Then, all files in pgsql_tmp are removed before removing
the staged directories themselves.  This change is being made in
preparation for a follow-up change to offload most temporary file
cleanup to the new custodian process.

Note that temporary relation files cannot be cleaned up via the
aforementioned strategy and will not be offloaded to the custodian.
---
 src/backend/postmaster/postmaster.c |   8 +-
 src/backend/storage/file/fd.c       | 176 ++++++++++++++++++++++++----
 src/include/storage/fd.h            |   2 +-
 3 files changed, 162 insertions(+), 24 deletions(-)

diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 54f77cb306..8248d55e23 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1391,7 +1391,8 @@ PostmasterMain(int argc, char *argv[])
 	 * Remove old temporary files.  At this point there can be no other
 	 * Postgres processes running in this directory, so this should be safe.
 	 */
-	RemovePgTempFiles();
+	RemovePgTempFiles(true, true);
+	RemovePgTempFiles(false, false);
 
 	/*
 	 * Initialize stats collection subsystem (this does NOT start the
@@ -4139,7 +4140,10 @@ PostmasterStateMachine(void)
 
 		/* remove leftover temporary files after a crash */
 		if (remove_temp_files_after_crash)
-			RemovePgTempFiles();
+		{
+			RemovePgTempFiles(true, true);
+			RemovePgTempFiles(false, false);
+		}
 
 		/* allow background workers to immediately restart */
 		ResetBackgroundWorkerCrashTimes();
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 35cb6f7bb6..d3019a4b67 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -112,6 +112,8 @@
 #define PG_FLUSH_DATA_WORKS 1
 #endif
 
+#define PG_TEMP_DIR_TO_REMOVE_PREFIX (PG_TEMP_FILES_DIR "_to_remove_")
+
 /*
  * We must leave some file descriptors free for system(), the dynamic loader,
  * and other code that tries to open files without consulting fd.c.  This
@@ -338,6 +340,8 @@ static void BeforeShmemExit_Files(int code, Datum arg);
 static void CleanupTempFiles(bool isCommit, bool isProcExit);
 static void RemovePgTempRelationFiles(const char *tsdirname);
 static void RemovePgTempRelationFilesInDbspace(const char *dbspacedirname);
+static void StagePgTempDirForRemoval(const char *tmp_dir);
+static void RemoveStagedPgTempDirs(const char *spc_dir);
 
 static void walkdir(const char *path,
 					void (*action) (const char *fname, bool isdir, int elevel),
@@ -3133,24 +3137,20 @@ CleanupTempFiles(bool isCommit, bool isProcExit)
  * Remove temporary and temporary relation files left over from a prior
  * postmaster session
  *
- * This should be called during postmaster startup.  It will forcibly
- * remove any leftover files created by OpenTemporaryFile and any leftover
- * temporary relation files created by mdcreate.
+ * If stage is true, this function will simply rename all pgsql_tmp directories
+ * to stage them for removal at a later time.  If stage is false, this function
+ * will delete all files in the staged directories as well as the directories
+ * themselves.
  *
- * During post-backend-crash restart cycle, this routine is called when
- * remove_temp_files_after_crash GUC is enabled. Multiple crashes while
- * queries are using temp files could result in useless storage usage that can
- * only be reclaimed by a service restart. The argument against enabling it is
- * that someone might want to examine the temporary files for debugging
- * purposes. This does however mean that OpenTemporaryFile had better allow for
- * collision with an existing temp file name.
+ * If remove_relation_files is true, this function will remove the temporary
+ * relation files.  Otherwise, this step is skipped.
  *
  * NOTE: this function and its subroutines generally report syscall failures
  * with ereport(LOG) and keep going.  Removing temp files is not so critical
  * that we should fail to start the database when we can't do it.
  */
 void
-RemovePgTempFiles(void)
+RemovePgTempFiles(bool stage, bool remove_relation_files)
 {
 	char		temp_path[MAXPGPATH + 10 + sizeof(TABLESPACE_VERSION_DIRECTORY) + sizeof(PG_TEMP_FILES_DIR)];
 	DIR		   *spc_dir;
@@ -3159,9 +3159,16 @@ RemovePgTempFiles(void)
 	/*
 	 * First process temp files in pg_default ($PGDATA/base)
 	 */
-	snprintf(temp_path, sizeof(temp_path), "base/%s", PG_TEMP_FILES_DIR);
-	RemovePgTempDir(temp_path, true, false);
-	RemovePgTempRelationFiles("base");
+	if (stage)
+	{
+		snprintf(temp_path, sizeof(temp_path), "base/%s", PG_TEMP_FILES_DIR);
+		StagePgTempDirForRemoval(temp_path);
+	}
+	else
+		RemoveStagedPgTempDirs("base");
+
+	if (remove_relation_files)
+		RemovePgTempRelationFiles("base");
 
 	/*
 	 * Cycle through temp directories for all non-default tablespaces.
@@ -3174,13 +3181,26 @@ RemovePgTempFiles(void)
 			strcmp(spc_de->d_name, "..") == 0)
 			continue;
 
-		snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s/%s",
-				 spc_de->d_name, TABLESPACE_VERSION_DIRECTORY, PG_TEMP_FILES_DIR);
-		RemovePgTempDir(temp_path, true, false);
+		if (stage)
+		{
+			snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s/%s",
+					 spc_de->d_name, TABLESPACE_VERSION_DIRECTORY,
+					 PG_TEMP_FILES_DIR);
+			StagePgTempDirForRemoval(temp_path);
+		}
+		else
+		{
+			snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s",
+					 spc_de->d_name, TABLESPACE_VERSION_DIRECTORY);
+			RemoveStagedPgTempDirs(temp_path);
+		}
 
-		snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s",
-				 spc_de->d_name, TABLESPACE_VERSION_DIRECTORY);
-		RemovePgTempRelationFiles(temp_path);
+		if (remove_relation_files)
+		{
+			snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s",
+					 spc_de->d_name, TABLESPACE_VERSION_DIRECTORY);
+			RemovePgTempRelationFiles(temp_path);
+		}
 	}
 
 	FreeDir(spc_dir);
@@ -3194,7 +3214,121 @@ RemovePgTempFiles(void)
 }
 
 /*
- * Process one pgsql_tmp directory for RemovePgTempFiles.
+ * StagePgTempDirForRemoval
+ *
+ * This function renames the given directory with a special prefix that
+ * RemoveStagedPgTempDirs() will know to look for.  An integer is appended to
+ * the end of the new directory name in case previously staged pgsql_tmp
+ * directories have not yet been removed.
+ */
+static void
+StagePgTempDirForRemoval(const char *tmp_dir)
+{
+	DIR		   *dir;
+	char		stage_path[MAXPGPATH * 2];
+	char		parent_path[MAXPGPATH * 2];
+
+	/*
+	 * If tmp_dir doesn't exist, there is nothing to stage.
+	 */
+	dir = AllocateDir(tmp_dir);
+	if (dir == NULL)
+	{
+		if (errno != ENOENT)
+			ereport(LOG,
+					(errcode_for_file_access(),
+					 errmsg("could not open directory \"%s\": %m", tmp_dir)));
+		return;
+	}
+	FreeDir(dir);
+
+	strlcpy(parent_path, tmp_dir, MAXPGPATH * 2);
+	get_parent_directory(parent_path);
+
+	/*
+	 * get_parent_directory() returns an empty string if the input argument is
+	 * just a file name (see comments in path.c), so handle that as being the
+	 * current directory.
+	 */
+	if (strlen(parent_path) == 0)
+		strlcpy(parent_path, ".", MAXPGPATH * 2);
+
+	/*
+	 * Find a name for the stage directory.  We just increment an integer at the
+	 * end of the name until we find one that doesn't exist.
+	 */
+	for (int n = 0; n <= INT_MAX; n++)
+	{
+		snprintf(stage_path, sizeof(stage_path), "%s/%s%d", parent_path,
+				 PG_TEMP_DIR_TO_REMOVE_PREFIX, n);
+
+		dir = AllocateDir(stage_path);
+		if (dir == NULL)
+		{
+			if (errno == ENOENT)
+				break;
+
+			ereport(LOG,
+					(errcode_for_file_access(),
+					 errmsg("could not open directory \"%s\": %m",
+							stage_path)));
+			return;
+		}
+		FreeDir(dir);
+
+		stage_path[0] = '\0';
+	}
+
+	/*
+	 * In the unlikely event that we couldn't find a name for the stage
+	 * directory, bail out.
+	 */
+	if (stage_path[0] == '\0')
+	{
+		ereport(LOG,
+				(errmsg("could not stage \"%s\" for deletion",
+						tmp_dir)));
+		return;
+	}
+
+	/*
+	 * Rename the temporary directory.
+	 */
+	if (rename(tmp_dir, stage_path) != 0)
+		ereport(LOG,
+				(errcode_for_file_access(),
+				 errmsg("could not rename directory \"%s\" to \"%s\": %m",
+						tmp_dir, stage_path)));
+}
+
+/*
+ * RemoveStagedPgTempDirs
+ *
+ * This function removes all pgsql_tmp directories that have been staged for
+ * removal by StagePgTempDirForRemoval() in the given tablespace directory.
+ */
+static void
+RemoveStagedPgTempDirs(const char *spc_dir)
+{
+	char		temp_path[MAXPGPATH * 2];
+	DIR		   *dir;
+	struct dirent *de;
+
+	dir = AllocateDir(spc_dir);
+	while ((de = ReadDirExtended(dir, spc_dir, LOG)) != NULL)
+	{
+		if (strncmp(de->d_name, PG_TEMP_DIR_TO_REMOVE_PREFIX,
+					strlen(PG_TEMP_DIR_TO_REMOVE_PREFIX)) != 0)
+			continue;
+
+		snprintf(temp_path, sizeof(temp_path), "%s/%s", spc_dir, de->d_name);
+		RemovePgTempDir(temp_path, true, false);
+	}
+	FreeDir(dir);
+}
+
+/*
+ * Process one pgsql_tmp directory for RemoveStagedPgTempDirs.
  *
  * If missing_ok is true, it's all right for the named directory to not exist.
  * Any other problem results in a LOG message.  (missing_ok should be true at
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 525847daea..240992ca51 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -168,7 +168,7 @@ extern Oid	GetNextTempTableSpace(void);
 extern void AtEOXact_Files(bool isCommit);
 extern void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
 							  SubTransactionId parentSubid);
-extern void RemovePgTempFiles(void);
+extern void RemovePgTempFiles(bool stage, bool remove_relation_files);
 extern void RemovePgTempDir(const char *tmpdirname, bool missing_ok,
 							bool unlink_all);
 extern bool looks_like_temp_rel_name(const char *name);
-- 
2.25.1

