From 7fecc9c9dc8a0ebbfbb1828a8410dac1be1ce7f5 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathandbossart@gmail.com>
Date: Fri, 23 Dec 2022 16:35:25 -0800
Subject: [PATCH v4 1/3] Move the code to restore files via the shell to a
 separate file.

This is preparatory work for allowing more extensibility in this
area.
---
 src/backend/access/transam/Makefile        |   1 +
 src/backend/access/transam/meson.build     |   1 +
 src/backend/access/transam/shell_restore.c | 194 +++++++++++++++++++++
 src/backend/access/transam/xlog.c          |  44 ++++-
 src/backend/access/transam/xlogarchive.c   | 158 +----------------
 src/include/access/xlogarchive.h           |   7 +-
 6 files changed, 240 insertions(+), 165 deletions(-)
 create mode 100644 src/backend/access/transam/shell_restore.c

diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile
index 661c55a9db..099c315d03 100644
--- a/src/backend/access/transam/Makefile
+++ b/src/backend/access/transam/Makefile
@@ -19,6 +19,7 @@ OBJS = \
 	multixact.o \
 	parallel.o \
 	rmgr.o \
+	shell_restore.o \
 	slru.o \
 	subtrans.o \
 	timeline.o \
diff --git a/src/backend/access/transam/meson.build b/src/backend/access/transam/meson.build
index 8920c1bfce..3031c2f6cf 100644
--- a/src/backend/access/transam/meson.build
+++ b/src/backend/access/transam/meson.build
@@ -7,6 +7,7 @@ backend_sources += files(
   'multixact.c',
   'parallel.c',
   'rmgr.c',
+  'shell_restore.c',
   'slru.c',
   'subtrans.c',
   'timeline.c',
diff --git a/src/backend/access/transam/shell_restore.c b/src/backend/access/transam/shell_restore.c
new file mode 100644
index 0000000000..3ddcabd969
--- /dev/null
+++ b/src/backend/access/transam/shell_restore.c
@@ -0,0 +1,194 @@
+/*-------------------------------------------------------------------------
+ *
+ * shell_restore.c
+ *
+ * These recovery functions use a user-specified shell command (e.g., the
+ * restore_command GUC).
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/backend/access/transam/shell_restore.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <signal.h>
+
+#include "access/xlogarchive.h"
+#include "access/xlogrecovery.h"
+#include "common/archive.h"
+#include "storage/ipc.h"
+#include "utils/wait_event.h"
+
+static void ExecuteRecoveryCommand(const char *command,
+								   const char *commandName, bool failOnSignal,
+								   uint32 wait_event_info,
+								   const char *lastRestartPointFileName);
+
+bool
+shell_restore(const char *file, const char *path,
+			  const char *lastRestartPointFileName)
+{
+	char	   *cmd;
+	int			rc;
+
+	/* Build the restore command to execute */
+	cmd = BuildRestoreCommand(recoveryRestoreCommand, path, file,
+							  lastRestartPointFileName);
+	if (cmd == NULL)
+		elog(ERROR, "could not build restore command \"%s\"", cmd);
+
+	ereport(DEBUG3,
+			(errmsg_internal("executing restore command \"%s\"", cmd)));
+
+	/*
+	 * Copy xlog from archival storage to XLOGDIR
+	 */
+	fflush(NULL);
+	pgstat_report_wait_start(WAIT_EVENT_RESTORE_COMMAND);
+	rc = system(cmd);
+	pgstat_report_wait_end();
+
+	pfree(cmd);
+
+	/*
+	 * Remember, we rollforward UNTIL the restore fails so failure here is
+	 * just part of the process... that makes it difficult to determine
+	 * whether the restore failed because there isn't an archive to restore,
+	 * or because the administrator has specified the restore program
+	 * incorrectly.  We have to assume the former.
+	 *
+	 * However, if the failure was due to any sort of signal, it's best to
+	 * punt and abort recovery.  (If we "return false" here, upper levels will
+	 * assume that recovery is complete and start up the database!) It's
+	 * essential to abort on child SIGINT and SIGQUIT, because per spec
+	 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
+	 * those it's a good bet we should have gotten it too.
+	 *
+	 * On SIGTERM, assume we have received a fast shutdown request, and exit
+	 * cleanly. It's pure chance whether we receive the SIGTERM first, or the
+	 * child process. If we receive it first, the signal handler will call
+	 * proc_exit, otherwise we do it here. If we or the child process received
+	 * SIGTERM for any other reason than a fast shutdown request, postmaster
+	 * will perform an immediate shutdown when it sees us exiting
+	 * unexpectedly.
+	 *
+	 * We treat hard shell errors such as "command not found" as fatal, too.
+	 */
+	if (wait_result_is_signal(rc, SIGTERM))
+		proc_exit(1);
+
+	ereport(wait_result_is_any_signal(rc, true) ? FATAL : DEBUG2,
+			(errmsg("could not restore file \"%s\" from archive: %s",
+					file, wait_result_to_str(rc))));
+
+	return (rc == 0);
+}
+
+void
+shell_archive_cleanup(const char *lastRestartPointFileName)
+{
+	ExecuteRecoveryCommand(archiveCleanupCommand, "archive_cleanup_command",
+						   false, WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND,
+						   lastRestartPointFileName);
+}
+
+void
+shell_recovery_end(const char *lastRestartPointFileName)
+{
+	ExecuteRecoveryCommand(recoveryEndCommand, "recovery_end_command", true,
+						   WAIT_EVENT_RECOVERY_END_COMMAND,
+						   lastRestartPointFileName);
+}
+
+/*
+ * Attempt to execute an external shell command during recovery.
+ *
+ * 'command' is the shell command to be executed, 'commandName' is a
+ * human-readable name describing the command emitted in the logs. If
+ * 'failOnSignal' is true and the command is killed by a signal, a FATAL
+ * error is thrown. Otherwise a WARNING is emitted.
+ *
+ * This is currently used for recovery_end_command and archive_cleanup_command.
+ */
+static void
+ExecuteRecoveryCommand(const char *command, const char *commandName,
+					   bool failOnSignal, uint32 wait_event_info,
+					   const char *lastRestartPointFileName)
+{
+	char		xlogRecoveryCmd[MAXPGPATH];
+	char	   *dp;
+	char	   *endp;
+	const char *sp;
+	int			rc;
+
+	Assert(command && commandName);
+
+	/*
+	 * construct the command to be executed
+	 */
+	dp = xlogRecoveryCmd;
+	endp = xlogRecoveryCmd + MAXPGPATH - 1;
+	*endp = '\0';
+
+	for (sp = command; *sp; sp++)
+	{
+		if (*sp == '%')
+		{
+			switch (sp[1])
+			{
+				case 'r':
+					/* %r: filename of last restartpoint */
+					sp++;
+					strlcpy(dp, lastRestartPointFileName, endp - dp);
+					dp += strlen(dp);
+					break;
+				case '%':
+					/* convert %% to a single % */
+					sp++;
+					if (dp < endp)
+						*dp++ = *sp;
+					break;
+				default:
+					/* otherwise treat the % as not special */
+					if (dp < endp)
+						*dp++ = *sp;
+					break;
+			}
+		}
+		else
+		{
+			if (dp < endp)
+				*dp++ = *sp;
+		}
+	}
+	*dp = '\0';
+
+	ereport(DEBUG3,
+			(errmsg_internal("executing %s \"%s\"", commandName, command)));
+
+	/*
+	 * execute the constructed command
+	 */
+	fflush(NULL);
+	pgstat_report_wait_start(wait_event_info);
+	rc = system(xlogRecoveryCmd);
+	pgstat_report_wait_end();
+
+	if (rc != 0)
+	{
+		/*
+		 * If the failure was due to any sort of signal, it's best to punt and
+		 * abort recovery.  See comments in shell_restore().
+		 */
+		ereport((failOnSignal && wait_result_is_any_signal(rc, true)) ? FATAL : WARNING,
+		/*------
+		   translator: First %s represents a postgresql.conf parameter name like
+		  "recovery_end_command", the 2nd is the value of that parameter, the
+		  third an already translated error message. */
+				(errmsg("%s \"%s\": %s", commandName,
+						command, wait_result_to_str(rc))));
+	}
+}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 0070d56b0b..fdce12614a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -4887,10 +4887,24 @@ CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog,
 	 * Execute the recovery_end_command, if any.
 	 */
 	if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
-		ExecuteRecoveryCommand(recoveryEndCommand,
-							   "recovery_end_command",
-							   true,
-							   WAIT_EVENT_RECOVERY_END_COMMAND);
+	{
+		char		lastRestartPointFname[MAXPGPATH];
+		XLogSegNo	restartSegNo;
+		XLogRecPtr	restartRedoPtr;
+		TimeLineID	restartTli;
+
+		/*
+		 * Calculate the archive file cutoff point for use during log shipping
+		 * replication. All files earlier than this point can be deleted from
+		 * the archive, though there is no requirement to do so.
+		 */
+		GetOldestRestartPoint(&restartRedoPtr, &restartTli);
+		XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
+		XLogFileName(lastRestartPointFname, restartTli, restartSegNo,
+					 wal_segment_size);
+
+		shell_recovery_end(lastRestartPointFname);
+	}
 
 	/*
 	 * We switched to a new timeline. Clean up segments on the old timeline.
@@ -7307,10 +7321,24 @@ CreateRestartPoint(int flags)
 	 * Finally, execute archive_cleanup_command, if any.
 	 */
 	if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
-		ExecuteRecoveryCommand(archiveCleanupCommand,
-							   "archive_cleanup_command",
-							   false,
-							   WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
+	{
+		char		lastRestartPointFname[MAXPGPATH];
+		XLogSegNo	restartSegNo;
+		XLogRecPtr	restartRedoPtr;
+		TimeLineID	restartTli;
+
+		/*
+		 * Calculate the archive file cutoff point for use during log shipping
+		 * replication. All files earlier than this point can be deleted from
+		 * the archive, though there is no requirement to do so.
+		 */
+		GetOldestRestartPoint(&restartRedoPtr, &restartTli);
+		XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
+		XLogFileName(lastRestartPointFname, restartTli, restartSegNo,
+					 wal_segment_size);
+
+		shell_archive_cleanup(lastRestartPointFname);
+	}
 
 	return true;
 }
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
index 76abc74c67..b5cb060d55 100644
--- a/src/backend/access/transam/xlogarchive.c
+++ b/src/backend/access/transam/xlogarchive.c
@@ -56,9 +56,8 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 					bool cleanupEnabled)
 {
 	char		xlogpath[MAXPGPATH];
-	char	   *xlogRestoreCmd;
 	char		lastRestartPointFname[MAXPGPATH];
-	int			rc;
+	bool		ret;
 	struct stat stat_buf;
 	XLogSegNo	restartSegNo;
 	XLogRecPtr	restartRedoPtr;
@@ -149,18 +148,6 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 	else
 		XLogFileName(lastRestartPointFname, 0, 0L, wal_segment_size);
 
-	/* Build the restore command to execute */
-	xlogRestoreCmd = BuildRestoreCommand(recoveryRestoreCommand,
-										 xlogpath, xlogfname,
-										 lastRestartPointFname);
-	if (xlogRestoreCmd == NULL)
-		elog(ERROR, "could not build restore command \"%s\"",
-			 recoveryRestoreCommand);
-
-	ereport(DEBUG3,
-			(errmsg_internal("executing restore command \"%s\"",
-							 xlogRestoreCmd)));
-
 	/*
 	 * Check signals before restore command and reset afterwards.
 	 */
@@ -169,15 +156,11 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 	/*
 	 * Copy xlog from archival storage to XLOGDIR
 	 */
-	fflush(NULL);
-	pgstat_report_wait_start(WAIT_EVENT_RESTORE_COMMAND);
-	rc = system(xlogRestoreCmd);
-	pgstat_report_wait_end();
+	ret = shell_restore(xlogfname, xlogpath, lastRestartPointFname);
 
 	PostRestoreCommand();
-	pfree(xlogRestoreCmd);
 
-	if (rc == 0)
+	if (ret)
 	{
 		/*
 		 * command apparently succeeded, but let's make sure the file is
@@ -233,37 +216,6 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 		}
 	}
 
-	/*
-	 * Remember, we rollforward UNTIL the restore fails so failure here is
-	 * just part of the process... that makes it difficult to determine
-	 * whether the restore failed because there isn't an archive to restore,
-	 * or because the administrator has specified the restore program
-	 * incorrectly.  We have to assume the former.
-	 *
-	 * However, if the failure was due to any sort of signal, it's best to
-	 * punt and abort recovery.  (If we "return false" here, upper levels will
-	 * assume that recovery is complete and start up the database!) It's
-	 * essential to abort on child SIGINT and SIGQUIT, because per spec
-	 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
-	 * those it's a good bet we should have gotten it too.
-	 *
-	 * On SIGTERM, assume we have received a fast shutdown request, and exit
-	 * cleanly. It's pure chance whether we receive the SIGTERM first, or the
-	 * child process. If we receive it first, the signal handler will call
-	 * proc_exit, otherwise we do it here. If we or the child process received
-	 * SIGTERM for any other reason than a fast shutdown request, postmaster
-	 * will perform an immediate shutdown when it sees us exiting
-	 * unexpectedly.
-	 *
-	 * We treat hard shell errors such as "command not found" as fatal, too.
-	 */
-	if (wait_result_is_signal(rc, SIGTERM))
-		proc_exit(1);
-
-	ereport(wait_result_is_any_signal(rc, true) ? FATAL : DEBUG2,
-			(errmsg("could not restore file \"%s\" from archive: %s",
-					xlogfname, wait_result_to_str(rc))));
-
 not_available:
 
 	/*
@@ -277,110 +229,6 @@ not_available:
 	return false;
 }
 
-/*
- * Attempt to execute an external shell command during recovery.
- *
- * 'command' is the shell command to be executed, 'commandName' is a
- * human-readable name describing the command emitted in the logs. If
- * 'failOnSignal' is true and the command is killed by a signal, a FATAL
- * error is thrown. Otherwise a WARNING is emitted.
- *
- * This is currently used for recovery_end_command and archive_cleanup_command.
- */
-void
-ExecuteRecoveryCommand(const char *command, const char *commandName,
-					   bool failOnSignal, uint32 wait_event_info)
-{
-	char		xlogRecoveryCmd[MAXPGPATH];
-	char		lastRestartPointFname[MAXPGPATH];
-	char	   *dp;
-	char	   *endp;
-	const char *sp;
-	int			rc;
-	XLogSegNo	restartSegNo;
-	XLogRecPtr	restartRedoPtr;
-	TimeLineID	restartTli;
-
-	Assert(command && commandName);
-
-	/*
-	 * Calculate the archive file cutoff point for use during log shipping
-	 * replication. All files earlier than this point can be deleted from the
-	 * archive, though there is no requirement to do so.
-	 */
-	GetOldestRestartPoint(&restartRedoPtr, &restartTli);
-	XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
-	XLogFileName(lastRestartPointFname, restartTli, restartSegNo,
-				 wal_segment_size);
-
-	/*
-	 * construct the command to be executed
-	 */
-	dp = xlogRecoveryCmd;
-	endp = xlogRecoveryCmd + MAXPGPATH - 1;
-	*endp = '\0';
-
-	for (sp = command; *sp; sp++)
-	{
-		if (*sp == '%')
-		{
-			switch (sp[1])
-			{
-				case 'r':
-					/* %r: filename of last restartpoint */
-					sp++;
-					strlcpy(dp, lastRestartPointFname, endp - dp);
-					dp += strlen(dp);
-					break;
-				case '%':
-					/* convert %% to a single % */
-					sp++;
-					if (dp < endp)
-						*dp++ = *sp;
-					break;
-				default:
-					/* otherwise treat the % as not special */
-					if (dp < endp)
-						*dp++ = *sp;
-					break;
-			}
-		}
-		else
-		{
-			if (dp < endp)
-				*dp++ = *sp;
-		}
-	}
-	*dp = '\0';
-
-	ereport(DEBUG3,
-			(errmsg_internal("executing %s \"%s\"", commandName, command)));
-
-	/*
-	 * execute the constructed command
-	 */
-	fflush(NULL);
-	pgstat_report_wait_start(wait_event_info);
-	rc = system(xlogRecoveryCmd);
-	pgstat_report_wait_end();
-
-	if (rc != 0)
-	{
-		/*
-		 * If the failure was due to any sort of signal, it's best to punt and
-		 * abort recovery.  See comments in RestoreArchivedFile().
-		 */
-		ereport((failOnSignal && wait_result_is_any_signal(rc, true)) ? FATAL : WARNING,
-		/*------
-		   translator: First %s represents a postgresql.conf parameter name like
-		  "recovery_end_command", the 2nd is the value of that parameter, the
-		  third an already translated error message. */
-				(errmsg("%s \"%s\": %s", commandName,
-						command, wait_result_to_str(rc))));
-	}
-}
-
-
 /*
  * A file was restored from the archive under a temporary filename (path),
  * and now we want to keep it. Rename it under the permanent filename in
diff --git a/src/include/access/xlogarchive.h b/src/include/access/xlogarchive.h
index 31ff206034..299304703e 100644
--- a/src/include/access/xlogarchive.h
+++ b/src/include/access/xlogarchive.h
@@ -20,8 +20,6 @@
 extern bool RestoreArchivedFile(char *path, const char *xlogfname,
 								const char *recovername, off_t expectedSize,
 								bool cleanupEnabled);
-extern void ExecuteRecoveryCommand(const char *command, const char *commandName,
-								   bool failOnSignal, uint32 wait_event_info);
 extern void KeepFileRestoredFromArchive(const char *path, const char *xlogfname);
 extern void XLogArchiveNotify(const char *xlog);
 extern void XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli);
@@ -32,4 +30,9 @@ extern bool XLogArchiveIsReady(const char *xlog);
 extern bool XLogArchiveIsReadyOrDone(const char *xlog);
 extern void XLogArchiveCleanup(const char *xlog);
 
+extern bool shell_restore(const char *file, const char *path,
+						  const char *lastRestartPointFileName);
+extern void shell_archive_cleanup(const char *lastRestartPointFileName);
+extern void shell_recovery_end(const char *lastRestartPointFileName);
+
 #endif							/* XLOG_ARCHIVE_H */
-- 
2.25.1

