make pg_ctl more friendly

Started by Crisp Leeabout 2 years ago21 messages
#1Crisp Lee
litianxiang01@gmail.com
1 attachment(s)

Hi hackers:

I got a basebackup using pg_basebackup -R. After that, I created a restore
point named test on primary, and set recovery_target_name to test,
recovery_target_action to shutdown in standby datadir. I got a failure
startup message after 'pg_ctl start -D $standby_datadir'. I think it is
not a failure, and makes users nervous, especially for newbies.

My thought is to generate a recovery.done file if the postmaster receives
exit code 3 from the startup process. When postmaster exits, pg_ctl will
give a more friendly message to users.

Attachments:

0001-Make-pg_ctl-more-friendly-for-users.patchapplication/octet-stream; name=0001-Make-pg_ctl-more-friendly-for-users.patchDownload
From d46275236c8c3c2573d58c9d0214b11f71909f02 Mon Sep 17 00:00:00 2001
From: krisdiano <litianxiang01@gmail.com>
Date: Thu, 2 Nov 2023 13:16:36 +0800
Subject: [PATCH] Make pg_ctl more friendly for users

Utility pg_basebackup could generate a standby datadir. Before startup, setting
recovery_target_action to shutdown. Maybe users get a failure message after startup.

Now, postmaster will generate a recovery.done file if startup process exits with
code 3. Utility pg_ctl check whether the file exists or not.
---
 src/backend/access/transam/xlogrecovery.c |  1 -
 src/backend/postmaster/postmaster.c       | 17 ++++++++++++++++
 src/bin/pg_ctl/pg_ctl.c                   | 24 +++++++++++++++++++++--
 src/include/access/transam.h              |  4 ++++
 src/include/access/xlog.h                 |  2 +-
 src/include/catalog/pg_database.dat       |  9 +++++----
 src/include/utils/wait_event.h            |  2 +-
 src/tools/pgindent/typedefs.list          | 14 ++++++-------
 8 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index c61566666a..412cdf13dd 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -66,7 +66,6 @@
 
 /* Unsupported old recovery command file names (relative to $PGDATA) */
 #define RECOVERY_COMMAND_FILE	"recovery.conf"
-#define RECOVERY_COMMAND_DONE	"recovery.done"
 
 /*
  * GUC support
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 7b6b613c4a..0ca13ee743 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -2936,6 +2936,23 @@ process_pm_child_exit(void)
 				Shutdown = Max(Shutdown, SmartShutdown);
 				TerminateChildren(SIGTERM);
 				pmState = PM_WAIT_BACKENDS;
+
+				/*
+				 * This file gives a tip to pg_ctl which could provide a more
+				 * friendly message to users.
+				 */
+				PG_TRY();
+				{
+					FILE	   *fp = AllocateFile(RECOVERY_COMMAND_DONE, PG_BINARY_W);
+
+					FreeFile(fp);
+				}
+				PG_CATCH();
+				{
+					/* Ignore errors, it just best errort */
+				}
+				PG_END_TRY();
+
 				/* PostmasterStateMachine logic does the rest */
 				continue;
 			}
diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index 3b145bd838..d9ab12ab85 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -15,12 +15,14 @@
 #include <signal.h>
 #include <time.h>
 #include <sys/resource.h>
+#include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/wait.h>
 #include <unistd.h>
 
 
+#include "access/transam.h"
 #include "catalog/pg_control.h"
 #include "common/controldata_utils.h"
 #include "common/file_perm.h"
@@ -46,6 +48,7 @@ typedef enum
 	POSTMASTER_READY,
 	POSTMASTER_STILL_STARTING,
 	POSTMASTER_FAILED,
+	POSTMASTER_RECOVERY_SHUTDOWN,
 } WaitPMResult;
 
 typedef enum
@@ -98,6 +101,7 @@ static char version_file[MAXPGPATH];
 static char pid_file[MAXPGPATH];
 static char promote_file[MAXPGPATH];
 static char logrotate_file[MAXPGPATH];
+static char recovery_done_file[MAXPGPATH];
 
 static volatile pid_t postmasterPID = -1;
 
@@ -572,6 +576,17 @@ start_postmaster(void)
 #endif							/* WIN32 */
 }
 
+static bool
+has_recovery_done_file()
+{
+
+	struct stat statbuf;
+	int			errcode;
+
+	errcode = stat(recovery_done_file, &statbuf);
+	return errcode == 0 || errno == EACCES;
+
+}
 
 
 /*
@@ -662,11 +677,11 @@ wait_for_postmaster_start(pid_t pm_pid, bool do_checkpoint)
 			int			exitstatus;
 
 			if (waitpid(pm_pid, &exitstatus, WNOHANG) == pm_pid)
-				return POSTMASTER_FAILED;
+				return has_recovery_done_file() ? POSTMASTER_RECOVERY_SHUTDOWN : POSTMASTER_FAILED;
 		}
 #else
 		if (WaitForSingleObject(postmasterProcess, 0) == WAIT_OBJECT_0)
-			return POSTMASTER_FAILED;
+			return has_recovery_done_file() ? POSTMASTER_RECOVERY_SHUTDOWN : POSTMASTER_FAILED;
 #endif
 
 		/* Startup still in process; wait, printing a dot once per second */
@@ -985,6 +1000,10 @@ do_start(void)
 				print_msg(_(" done\n"));
 				print_msg(_("server started\n"));
 				break;
+			case POSTMASTER_RECOVERY_SHUTDOWN:
+				print_msg(_("PITR shutdown\n"));
+				print_msg(_("update configuration for startup again if needed\n"));
+				break;
 			case POSTMASTER_STILL_STARTING:
 				print_msg(_(" stopped waiting\n"));
 				write_stderr(_("%s: server did not start in time\n"),
@@ -2446,6 +2465,7 @@ main(int argc, char **argv)
 		snprintf(postopts_file, MAXPGPATH, "%s/postmaster.opts", pg_data);
 		snprintf(version_file, MAXPGPATH, "%s/PG_VERSION", pg_data);
 		snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pg_data);
+		snprintf(recovery_done_file, MAXPGPATH, "%s/%s", pg_data, RECOVERY_COMMAND_DONE);
 
 		/*
 		 * Set mask based on PGDATA permissions,
diff --git a/src/include/access/transam.h b/src/include/access/transam.h
index f5af6d3055..4ab41a6860 100644
--- a/src/include/access/transam.h
+++ b/src/include/access/transam.h
@@ -257,6 +257,10 @@ typedef struct VariableCacheData
 typedef VariableCacheData *VariableCache;
 
 
+/* This file are generated when PITR shutdown */
+#define RECOVERY_COMMAND_DONE	"recovery.done"
+
+
 /* ----------------
  *		extern declarations
  * ----------------
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index a14126d164..cb8d127d5f 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -26,7 +26,7 @@ typedef enum WalSyncMethod
 	WAL_SYNC_METHOD_OPEN,		/* for O_SYNC */
 	WAL_SYNC_METHOD_FSYNC_WRITETHROUGH,
 	WAL_SYNC_METHOD_OPEN_DSYNC	/* for O_DSYNC */
-} WalSyncMethod;
+}			WalSyncMethod;
 extern PGDLLIMPORT int wal_sync_method;
 
 extern PGDLLIMPORT XLogRecPtr ProcLastRecPtr;
diff --git a/src/include/catalog/pg_database.dat b/src/include/catalog/pg_database.dat
index 8d91e3bf8d..3057f1a230 100644
--- a/src/include/catalog/pg_database.dat
+++ b/src/include/catalog/pg_database.dat
@@ -16,9 +16,10 @@
   descr => 'default template for new databases',
   datname => 'template1', encoding => 'ENCODING',
   datlocprovider => 'LOCALE_PROVIDER', datistemplate => 't',
-  datallowconn => 't', dathasloginevt => 'f', datconnlimit => '-1', datfrozenxid => '0',
-  datminmxid => '1', dattablespace => 'pg_default', datcollate => 'LC_COLLATE',
-  datctype => 'LC_CTYPE', daticulocale => 'ICU_LOCALE',
-  daticurules => 'ICU_RULES', datacl => '_null_' },
+  datallowconn => 't', dathasloginevt => 'f', datconnlimit => '-1',
+  datfrozenxid => '0', datminmxid => '1', dattablespace => 'pg_default',
+  datcollate => 'LC_COLLATE', datctype => 'LC_CTYPE',
+  daticulocale => 'ICU_LOCALE', daticurules => 'ICU_RULES',
+  datacl => '_null_' },
 
 ]
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index 00f7d620e7..9d89e87195 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -57,7 +57,7 @@ typedef enum
 {
 	WAIT_EVENT_EXTENSION = PG_WAIT_EXTENSION,
 	WAIT_EVENT_EXTENSION_FIRST_USER_DEFINED,
-} WaitEventExtension;
+}			WaitEventExtension;
 
 extern void WaitEventExtensionShmemInit(void);
 extern Size WaitEventExtensionShmemSize(void);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 87c1aee379..c0749e4df9 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -367,7 +367,6 @@ CatalogId
 CatalogIdMapEntry
 CatalogIndexState
 ChangeVarNodes_context
-ReplaceVarnoContext
 CheckPoint
 CheckPointStmt
 CheckpointStatsData
@@ -1276,9 +1275,9 @@ JsonManifestWALRangeField
 JsonObjectAgg
 JsonObjectConstructor
 JsonOutput
-JsonParseExpr
 JsonParseContext
 JsonParseErrorType
+JsonParseExpr
 JsonPath
 JsonPathBool
 JsonPathExecContext
@@ -1341,6 +1340,7 @@ LINE
 LLVMAttributeRef
 LLVMBasicBlockRef
 LLVMBuilderRef
+LLVMContextRef
 LLVMErrorRef
 LLVMIntPredicate
 LLVMJITEventListenerRef
@@ -1916,7 +1916,6 @@ ParallelHashJoinBatch
 ParallelHashJoinBatchAccessor
 ParallelHashJoinState
 ParallelIndexScanDesc
-ParallelReadyList
 ParallelSlot
 ParallelSlotArray
 ParallelSlotResultHandler
@@ -2343,6 +2342,7 @@ ReorderBufferUpdateProgressTxnCB
 ReorderTuple
 RepOriginId
 ReparameterizeForeignPathByChild_function
+ReplaceVarnoContext
 ReplaceVarsFromTargetList_context
 ReplaceVarsNoMatchOption
 ReplicaIdentityStmt
@@ -2919,6 +2919,7 @@ UnicodeNormalizationQC
 Unique
 UniquePath
 UniquePathMethod
+UniqueRelInfo
 UniqueState
 UnlistenStmt
 UnresolvedTup
@@ -2975,7 +2976,6 @@ VolatileFunctionStatus
 Vsrt
 WAIT_ORDER
 WALAvailability
-WalInsertClass
 WALInsertLock
 WALInsertLockPadded
 WALOpenSegment
@@ -2997,7 +2997,6 @@ WaitEvent
 WaitEventActivity
 WaitEventBufferPin
 WaitEventClient
-WaitEventExtension
 WaitEventExtensionCounterData
 WaitEventExtensionEntryById
 WaitEventExtensionEntryByName
@@ -3008,6 +3007,7 @@ WaitEventTimeout
 WaitPMResult
 WalCloseMethod
 WalCompression
+WalInsertClass
 WalLevel
 WalRcvData
 WalRcvExecResult
@@ -3021,7 +3021,6 @@ WalSnd
 WalSndCtlData
 WalSndSendDataCallback
 WalSndState
-WalSyncMethod
 WalTimeSample
 WalUsage
 WalWriteMethod
@@ -3407,6 +3406,7 @@ indexed_tlist
 inet
 inetKEY
 inet_struct
+initRowMethod
 init_function
 inline_cte_walker_context
 inline_error_callback_arg
@@ -3839,7 +3839,6 @@ unicodeStyleColumnFormat
 unicodeStyleFormat
 unicodeStyleRowFormat
 unicode_linestyle
-UniqueRelInfo
 unit_conversion
 unlogged_relation_entry
 utf_local_conversion_func
@@ -3875,7 +3874,6 @@ wchar2mb_with_len_converter
 wchar_t
 win32_deadchild_waitinfo
 wint_t
-worker_spi_state
 worker_state
 worktable
 wrap
-- 
2.30.1

#2Andres Freund
andres@anarazel.de
In reply to: Crisp Lee (#1)
Re: make pg_ctl more friendly

Hi,

On 2023-11-02 14:50:14 +0800, Crisp Lee wrote:

I got a basebackup using pg_basebackup -R. After that, I created a restore
point named test on primary, and set recovery_target_name to test,
recovery_target_action to shutdown in standby datadir. I got a failure
startup message after 'pg_ctl start -D $standby_datadir'. I think it is
not a failure, and makes users nervous, especially for newbies.

My thought is to generate a recovery.done file if the postmaster receives
exit code 3 from the startup process. When postmaster exits, pg_ctl will
give a more friendly message to users.

I think we can detect this without any additional state - pg_ctl already
accesses pg_control (via get_control_dbstate()). We should be able to detect
your case by issuing a different warning if

a) get_control_dbstate() at the start was *not* DB_SHUTDOWNED
b) get_control_dbstate() at the end is DB_SHUTDOWNED

Greetings,

Andres Freund

#3Crisp Lee
litianxiang01@gmail.com
In reply to: Andres Freund (#2)
Re: make pg_ctl more friendly

How to judge from 'DB_SHUTDOWNED' that PITR ends normally? 'DB_SHUTDOWNED'
is just a state, it could not give more meaning, so I reuse the
recovery.done.

On Sat, Nov 4, 2023 at 9:56 AM Andres Freund <andres@anarazel.de> wrote:

Show quoted text

Hi,

On 2023-11-02 14:50:14 +0800, Crisp Lee wrote:

I got a basebackup using pg_basebackup -R. After that, I created a

restore

point named test on primary, and set recovery_target_name to test,
recovery_target_action to shutdown in standby datadir. I got a failure
startup message after 'pg_ctl start -D $standby_datadir'. I think it is
not a failure, and makes users nervous, especially for newbies.

My thought is to generate a recovery.done file if the postmaster receives
exit code 3 from the startup process. When postmaster exits, pg_ctl will
give a more friendly message to users.

I think we can detect this without any additional state - pg_ctl already
accesses pg_control (via get_control_dbstate()). We should be able to
detect
your case by issuing a different warning if

a) get_control_dbstate() at the start was *not* DB_SHUTDOWNED
b) get_control_dbstate() at the end is DB_SHUTDOWNED

Greetings,

Andres Freund

#4Andres Freund
andres@anarazel.de
In reply to: Crisp Lee (#3)
Re: make pg_ctl more friendly

Hi,

On 2023-11-09 09:29:32 +0800, Crisp Lee wrote:

How to judge from 'DB_SHUTDOWNED' that PITR ends normally? 'DB_SHUTDOWNED'
is just a state, it could not give more meaning, so I reuse the
recovery.done.

DB_SHUTDOWNED cannot be encountered while recovery is ongoing. If there was a
hard crash, you'd see DB_IN_ARCHIVE_RECOVERY or such, if the command was shut
down orderly before PITR has finished, you'd see DB_SHUTDOWNED_IN_RECOVERY.

- Andres

#5Crisp Lee
litianxiang01@gmail.com
In reply to: Andres Freund (#4)
Re: make pg_ctl more friendly

Hi,

I know it. But my question is not that. I did a PITR operation with
recovery_target_name and recovery_target_action('shutdown'). The PITR
process was very short and the PITR was done before pg_ctl check. The
postmaster shutdown due to recovery_target_action, and there was no crash.
But pg_ctl told me about startup failure. I think the startup had
succeeded and the result was not a exception. pg_ctl should tell users
about detailed messages.

On Thu, Nov 9, 2023 at 9:32 AM Andres Freund <andres@anarazel.de> wrote:

Show quoted text

Hi,

On 2023-11-09 09:29:32 +0800, Crisp Lee wrote:

How to judge from 'DB_SHUTDOWNED' that PITR ends normally?

'DB_SHUTDOWNED'

is just a state, it could not give more meaning, so I reuse the
recovery.done.

DB_SHUTDOWNED cannot be encountered while recovery is ongoing. If there
was a
hard crash, you'd see DB_IN_ARCHIVE_RECOVERY or such, if the command was
shut
down orderly before PITR has finished, you'd see DB_SHUTDOWNED_IN_RECOVERY.

- Andres

#6Junwang Zhao
zhjwpku@gmail.com
In reply to: Crisp Lee (#5)
1 attachment(s)
Re: make pg_ctl more friendly

On Thu, Nov 9, 2023 at 9:57 AM Crisp Lee <litianxiang01@gmail.com> wrote:

Hi,

I know it. But my question is not that. I did a PITR operation with recovery_target_name and recovery_target_action('shutdown'). The PITR process was very short and the PITR was done before pg_ctl check. The postmaster shutdown due to recovery_target_action, and there was no crash. But pg_ctl told me about startup failure. I think the startup had succeeded and the result was not a exception. pg_ctl should tell users about detailed messages.

On Thu, Nov 9, 2023 at 9:32 AM Andres Freund <andres@anarazel.de> wrote:

Hi,

On 2023-11-09 09:29:32 +0800, Crisp Lee wrote:

How to judge from 'DB_SHUTDOWNED' that PITR ends normally? 'DB_SHUTDOWNED'
is just a state, it could not give more meaning, so I reuse the
recovery.done.

DB_SHUTDOWNED cannot be encountered while recovery is ongoing. If there was a
hard crash, you'd see DB_IN_ARCHIVE_RECOVERY or such, if the command was shut
down orderly before PITR has finished, you'd see DB_SHUTDOWNED_IN_RECOVERY.

- Andres

After a PITR shutdown, the db state should be *shut down in recovery*, try the
patch attached.

--
Regards
Junwang Zhao

Attachments:

0001-PITR-shutdown-should-not-report-error-by-pg_ctl.patchapplication/octet-stream; name=0001-PITR-shutdown-should-not-report-error-by-pg_ctl.patchDownload
From f5a8fe721e2cf099e0e8818c0512abbc0b5a657e Mon Sep 17 00:00:00 2001
From: Zhao Junwang <zhjwpku@gmail.com>
Date: Thu, 9 Nov 2023 15:03:38 +0800
Subject: [PATCH] PITR shutdown should not report error by pg_ctl

After a PITR recovery, the dbstate in pg_control is
DB_SHUTDOWNED_IN_RECOVERY, check this and do not report error
in pg_ctl.

Signed-off-by: Zhao Junwang <zhjwpku@gmail.com>
---
 src/bin/pg_ctl/pg_ctl.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index 3b145bd838..bc257dc060 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -46,6 +46,7 @@ typedef enum
 	POSTMASTER_READY,
 	POSTMASTER_STILL_STARTING,
 	POSTMASTER_FAILED,
+	POSTMASTER_RECOVERY_SHUTDOWN,
 } WaitPMResult;
 
 typedef enum
@@ -662,11 +663,19 @@ wait_for_postmaster_start(pid_t pm_pid, bool do_checkpoint)
 			int			exitstatus;
 
 			if (waitpid(pm_pid, &exitstatus, WNOHANG) == pm_pid)
-				return POSTMASTER_FAILED;
+			{
+				if (get_control_dbstate() == DB_SHUTDOWNED_IN_RECOVERY)
+					return POSTMASTER_RECOVERY_SHUTDOWN;
+				else return POSTMASTER_FAILED;
+			}
 		}
 #else
 		if (WaitForSingleObject(postmasterProcess, 0) == WAIT_OBJECT_0)
-			return POSTMASTER_FAILED;
+		{
+			if (get_control_dbstate() == DB_SHUTDOWNED_IN_RECOVERY)
+				return POSTMASTER_RECOVERY_SHUTDOWN;
+			else return POSTMASTER_FAILED;
+		}
 #endif
 
 		/* Startup still in process; wait, printing a dot once per second */
@@ -991,10 +1000,14 @@ do_start(void)
 							 progname);
 				exit(1);
 				break;
+			case POSTMASTER_RECOVERY_SHUTDOWN:
+				print_msg(_("PITR shutdown\n"));
+				print_msg(_("update configuration for startup again if needed\n"));
+				break;
 			case POSTMASTER_FAILED:
 				print_msg(_(" stopped waiting\n"));
 				write_stderr(_("%s: could not start server\n"
-							   "Examine the log output.\n"),
+							   "Examine the log output\n"),
 							 progname);
 				exit(1);
 				break;
-- 
2.41.0

#7Junwang Zhao
zhjwpku@gmail.com
In reply to: Junwang Zhao (#6)
1 attachment(s)
Re: make pg_ctl more friendly

On Thu, Nov 9, 2023 at 3:08 PM Junwang Zhao <zhjwpku@gmail.com> wrote:

On Thu, Nov 9, 2023 at 9:57 AM Crisp Lee <litianxiang01@gmail.com> wrote:

Hi,

I know it. But my question is not that. I did a PITR operation with recovery_target_name and recovery_target_action('shutdown'). The PITR process was very short and the PITR was done before pg_ctl check. The postmaster shutdown due to recovery_target_action, and there was no crash. But pg_ctl told me about startup failure. I think the startup had succeeded and the result was not a exception. pg_ctl should tell users about detailed messages.

On Thu, Nov 9, 2023 at 9:32 AM Andres Freund <andres@anarazel.de> wrote:

Hi,

On 2023-11-09 09:29:32 +0800, Crisp Lee wrote:

How to judge from 'DB_SHUTDOWNED' that PITR ends normally? 'DB_SHUTDOWNED'
is just a state, it could not give more meaning, so I reuse the
recovery.done.

DB_SHUTDOWNED cannot be encountered while recovery is ongoing. If there was a
hard crash, you'd see DB_IN_ARCHIVE_RECOVERY or such, if the command was shut
down orderly before PITR has finished, you'd see DB_SHUTDOWNED_IN_RECOVERY.

- Andres

After a PITR shutdown, the db state should be *shut down in recovery*, try the
patch attached.

previous patch has some format issues, V2 attached.

--
Regards
Junwang Zhao

--
Regards
Junwang Zhao

Attachments:

v2-0001-PITR-shutdown-should-not-report-error-by-pg_ctl.patchapplication/octet-stream; name=v2-0001-PITR-shutdown-should-not-report-error-by-pg_ctl.patchDownload
From 4cf1d7bd8f4f4d542672c838608d1b77c1248836 Mon Sep 17 00:00:00 2001
From: Zhao Junwang <zhjwpku@gmail.com>
Date: Thu, 9 Nov 2023 15:03:38 +0800
Subject: [PATCH v2] PITR shutdown should not report error by pg_ctl

After a PITR recovery, the dbstate in pg_control is
DB_SHUTDOWNED_IN_RECOVERY, check this and do not report error
in pg_ctl.

Signed-off-by: Zhao Junwang <zhjwpku@gmail.com>
---
 src/bin/pg_ctl/pg_ctl.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index 3b145bd838..0f38a0124b 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -46,6 +46,7 @@ typedef enum
 	POSTMASTER_READY,
 	POSTMASTER_STILL_STARTING,
 	POSTMASTER_FAILED,
+	POSTMASTER_RECOVERY_SHUTDOWN,
 } WaitPMResult;
 
 typedef enum
@@ -662,11 +663,21 @@ wait_for_postmaster_start(pid_t pm_pid, bool do_checkpoint)
 			int			exitstatus;
 
 			if (waitpid(pm_pid, &exitstatus, WNOHANG) == pm_pid)
-				return POSTMASTER_FAILED;
+			{
+				if (get_control_dbstate() == DB_SHUTDOWNED_IN_RECOVERY)
+					return POSTMASTER_RECOVERY_SHUTDOWN;
+				else
+					return POSTMASTER_FAILED;
+			}
 		}
 #else
 		if (WaitForSingleObject(postmasterProcess, 0) == WAIT_OBJECT_0)
-			return POSTMASTER_FAILED;
+		{
+			if (get_control_dbstate() == DB_SHUTDOWNED_IN_RECOVERY)
+				return POSTMASTER_RECOVERY_SHUTDOWN;
+			else
+				return POSTMASTER_FAILED;
+		}
 #endif
 
 		/* Startup still in process; wait, printing a dot once per second */
@@ -991,10 +1002,14 @@ do_start(void)
 							 progname);
 				exit(1);
 				break;
+			case POSTMASTER_RECOVERY_SHUTDOWN:
+				print_msg(_("PITR shutdown\n"));
+				print_msg(_("update configuration for startup again if needed\n"));
+				break;
 			case POSTMASTER_FAILED:
 				print_msg(_(" stopped waiting\n"));
 				write_stderr(_("%s: could not start server\n"
-							   "Examine the log output.\n"),
+							   "Examine the log output\n"),
 							 progname);
 				exit(1);
 				break;
-- 
2.41.0

#8Crisp Lee
litianxiang01@gmail.com
In reply to: Junwang Zhao (#7)
Re: make pg_ctl more friendly

Hi,

I thought the PITR shutdown was DB_SHUTDOWN. I made a mistake. The v2
attach looks good.

On Thu, Nov 9, 2023 at 3:19 PM Junwang Zhao <zhjwpku@gmail.com> wrote:

Show quoted text

On Thu, Nov 9, 2023 at 3:08 PM Junwang Zhao <zhjwpku@gmail.com> wrote:

On Thu, Nov 9, 2023 at 9:57 AM Crisp Lee <litianxiang01@gmail.com>

wrote:

Hi,

I know it. But my question is not that. I did a PITR operation with

recovery_target_name and recovery_target_action('shutdown'). The PITR
process was very short and the PITR was done before pg_ctl check. The
postmaster shutdown due to recovery_target_action, and there was no crash.
But pg_ctl told me about startup failure. I think the startup had
succeeded and the result was not a exception. pg_ctl should tell users
about detailed messages.

On Thu, Nov 9, 2023 at 9:32 AM Andres Freund <andres@anarazel.de>

wrote:

Hi,

On 2023-11-09 09:29:32 +0800, Crisp Lee wrote:

How to judge from 'DB_SHUTDOWNED' that PITR ends normally?

'DB_SHUTDOWNED'

is just a state, it could not give more meaning, so I reuse the
recovery.done.

DB_SHUTDOWNED cannot be encountered while recovery is ongoing. If

there was a

hard crash, you'd see DB_IN_ARCHIVE_RECOVERY or such, if the command

was shut

down orderly before PITR has finished, you'd see

DB_SHUTDOWNED_IN_RECOVERY.

- Andres

After a PITR shutdown, the db state should be *shut down in recovery*,

try the

patch attached.

previous patch has some format issues, V2 attached.

--
Regards
Junwang Zhao

--
Regards
Junwang Zhao

#9Nazir Bilal Yavuz
byavuz81@gmail.com
In reply to: Junwang Zhao (#7)
Re: make pg_ctl more friendly

Hi,

Thank you for working on this! I agree that the current message is not friendly.

On Thu, 9 Nov 2023 at 10:19, Junwang Zhao <zhjwpku@gmail.com> wrote:

On Thu, Nov 9, 2023 at 3:08 PM Junwang Zhao <zhjwpku@gmail.com> wrote:

After a PITR shutdown, the db state should be *shut down in recovery*, try the
patch attached.

previous patch has some format issues, V2 attached.

v2-0001-PITR-shutdown-should-not-report-error-by-pg_ctl.patch:

-                               "Examine the log output.\n"),
+                               "Examine the log output\n"),
                              progname);

I don't think that this is needed.

Other than that, the patch looks good and I confirm that after PITR shutdown:

"PITR shutdown"
"update configuration for startup again if needed"

message shows up, instead of:

"pg_ctl: could not start server"
"Examine the log output.".

nitpick: It would be better if the order of the error message cases
and enums is the same ( i.e. 'POSTMASTER_RECOVERY_SHUTDOWN' before
'POSTMASTER_FAILED' in enum )

--
Regards,
Nazir Bilal Yavuz
Microsoft

#10Junwang Zhao
zhjwpku@gmail.com
In reply to: Nazir Bilal Yavuz (#9)
1 attachment(s)
Re: make pg_ctl more friendly

Hi Nazir,

On Tue, Jan 9, 2024 at 9:23 PM Nazir Bilal Yavuz <byavuz81@gmail.com> wrote:

Hi,

Thank you for working on this! I agree that the current message is not friendly.

On Thu, 9 Nov 2023 at 10:19, Junwang Zhao <zhjwpku@gmail.com> wrote:

On Thu, Nov 9, 2023 at 3:08 PM Junwang Zhao <zhjwpku@gmail.com> wrote:

After a PITR shutdown, the db state should be *shut down in recovery*, try the
patch attached.

previous patch has some format issues, V2 attached.

v2-0001-PITR-shutdown-should-not-report-error-by-pg_ctl.patch:

-                               "Examine the log output.\n"),
+                               "Examine the log output\n"),
progname);

I don't think that this is needed.

There seems to be no common sense for the ending dot when using
write_stderr, so I will leave this not changed.

Other than that, the patch looks good and I confirm that after PITR shutdown:

"PITR shutdown"
"update configuration for startup again if needed"

message shows up, instead of:

"pg_ctl: could not start server"
"Examine the log output.".

nitpick: It would be better if the order of the error message cases
and enums is the same ( i.e. 'POSTMASTER_RECOVERY_SHUTDOWN' before
'POSTMASTER_FAILED' in enum )

Agreed, fixed in V3

--
Regards,
Nazir Bilal Yavuz
Microsoft

--
Regards
Junwang Zhao

Attachments:

v3-0001-PITR-shutdown-should-not-report-error-by-pg_ctl.patchapplication/octet-stream; name=v3-0001-PITR-shutdown-should-not-report-error-by-pg_ctl.patchDownload
From c6de2b764d5010cff35a53e9c7b000d0d20a59dc Mon Sep 17 00:00:00 2001
From: Zhao Junwang <zhjwpku@gmail.com>
Date: Thu, 9 Nov 2023 15:03:38 +0800
Subject: [PATCH v3] PITR shutdown should not report error by pg_ctl

After a PITR recovery, the dbstate in pg_control is
DB_SHUTDOWNED_IN_RECOVERY, check this and do not report error
in pg_ctl.

Signed-off-by: Zhao Junwang <zhjwpku@gmail.com>
---
 src/bin/pg_ctl/pg_ctl.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index 3b145bd838..349f7e5abe 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -45,6 +45,7 @@ typedef enum
 {
 	POSTMASTER_READY,
 	POSTMASTER_STILL_STARTING,
+	POSTMASTER_RECOVERY_SHUTDOWN,
 	POSTMASTER_FAILED,
 } WaitPMResult;
 
@@ -662,11 +663,21 @@ wait_for_postmaster_start(pid_t pm_pid, bool do_checkpoint)
 			int			exitstatus;
 
 			if (waitpid(pm_pid, &exitstatus, WNOHANG) == pm_pid)
-				return POSTMASTER_FAILED;
+			{
+				if (get_control_dbstate() == DB_SHUTDOWNED_IN_RECOVERY)
+					return POSTMASTER_RECOVERY_SHUTDOWN;
+				else
+					return POSTMASTER_FAILED;
+			}
 		}
 #else
 		if (WaitForSingleObject(postmasterProcess, 0) == WAIT_OBJECT_0)
-			return POSTMASTER_FAILED;
+		{
+			if (get_control_dbstate() == DB_SHUTDOWNED_IN_RECOVERY)
+				return POSTMASTER_RECOVERY_SHUTDOWN;
+			else
+				return POSTMASTER_FAILED;
+		}
 #endif
 
 		/* Startup still in process; wait, printing a dot once per second */
@@ -991,6 +1002,10 @@ do_start(void)
 							 progname);
 				exit(1);
 				break;
+			case POSTMASTER_RECOVERY_SHUTDOWN:
+				print_msg(_("PITR shutdown\n"));
+				print_msg(_("update configuration for startup again if needed\n"));
+				break;
 			case POSTMASTER_FAILED:
 				print_msg(_(" stopped waiting\n"));
 				write_stderr(_("%s: could not start server\n"
-- 
2.41.0

#11Nazir Bilal Yavuz
byavuz81@gmail.com
In reply to: Junwang Zhao (#10)
Re: make pg_ctl more friendly

Hi,

On Wed, 10 Jan 2024 at 06:33, Junwang Zhao <zhjwpku@gmail.com> wrote:

Hi Nazir,

On Tue, Jan 9, 2024 at 9:23 PM Nazir Bilal Yavuz <byavuz81@gmail.com> wrote:

v2-0001-PITR-shutdown-should-not-report-error-by-pg_ctl.patch:

-                               "Examine the log output.\n"),
+                               "Examine the log output\n"),
progname);

I don't think that this is needed.

There seems to be no common sense for the ending dot when using
write_stderr, so I will leave this not changed.

Other than that, the patch looks good and I confirm that after PITR shutdown:

"PITR shutdown"
"update configuration for startup again if needed"

message shows up, instead of:

"pg_ctl: could not start server"
"Examine the log output.".

nitpick: It would be better if the order of the error message cases
and enums is the same ( i.e. 'POSTMASTER_RECOVERY_SHUTDOWN' before
'POSTMASTER_FAILED' in enum )

Agreed, fixed in V3

Thank you for the update. It looks good to me.

--
Regards,
Nazir Bilal Yavuz
Microsoft

#12Nathan Bossart
nathandbossart@gmail.com
In reply to: Junwang Zhao (#10)
Re: make pg_ctl more friendly

+ POSTMASTER_RECOVERY_SHUTDOWN,

Perhaps this should be POSTMASTER_SHUTDOWN_IN_RECOVERY to match the state
in the control file?

+			case POSTMASTER_RECOVERY_SHUTDOWN:
+				print_msg(_("PITR shutdown\n"));
+				print_msg(_("update configuration for startup again if needed\n"));
+				break;

I'm not sure I agree that this is a substantially friendlier message. From
a quick skim of the thread, it seems like you want to avoid sending a scary
error message if Postgres was intentionally shut down while in recovery.
If I got this particular message, I think I would be worried that something
went wrong during my point-in-time restore, and I'd be scrambling to figure
out what configuration this message wants me to update.

If I'm correctly interpreting the intent here, it might be worth fleshing
out the messages a bit more. For example, instead of "PITR shutdown,"
perhaps we could say "shut down while in recovery." And maybe we should
point to the specific settings in the latter message.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#13Junwang Zhao
zhjwpku@gmail.com
In reply to: Nathan Bossart (#12)
1 attachment(s)
Re: make pg_ctl more friendly

Hi Nathan,

On Tue, Jan 16, 2024 at 5:39 AM Nathan Bossart <nathandbossart@gmail.com> wrote:

+ POSTMASTER_RECOVERY_SHUTDOWN,

Perhaps this should be POSTMASTER_SHUTDOWN_IN_RECOVERY to match the state
in the control file?

Agreed

+                       case POSTMASTER_RECOVERY_SHUTDOWN:
+                               print_msg(_("PITR shutdown\n"));
+                               print_msg(_("update configuration for startup again if needed\n"));
+                               break;

I'm not sure I agree that this is a substantially friendlier message. From
a quick skim of the thread, it seems like you want to avoid sending a scary
error message if Postgres was intentionally shut down while in recovery.
If I got this particular message, I think I would be worried that something
went wrong during my point-in-time restore, and I'd be scrambling to figure
out what configuration this message wants me to update.

If I'm correctly interpreting the intent here, it might be worth fleshing
out the messages a bit more. For example, instead of "PITR shutdown,"
perhaps we could say "shut down while in recovery."

Make sense. Fixed. See V4

And maybe we should
point to the specific settings in the latter message.

I've changed this latter message to:
update recovery target settings for startup again if needed

What do you think?

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

--
Regards
Junwang Zhao

Attachments:

v4-0001-PITR-shutdown-should-not-report-error-by-pg_ctl.patchapplication/octet-stream; name=v4-0001-PITR-shutdown-should-not-report-error-by-pg_ctl.patchDownload
From e736445a862f63c51c8b5d89b9d1a25cbd0f4c33 Mon Sep 17 00:00:00 2001
From: Zhao Junwang <zhjwpku@gmail.com>
Date: Thu, 9 Nov 2023 15:03:38 +0800
Subject: [PATCH v4] PITR shutdown should not report error by pg_ctl

After a PITR recovery, the dbstate in pg_control is
DB_SHUTDOWNED_IN_RECOVERY, check this and do not report error
in pg_ctl.

Signed-off-by: Zhao Junwang <zhjwpku@gmail.com>
---
 src/bin/pg_ctl/pg_ctl.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index 6900b27675..389becf421 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -45,6 +45,7 @@ typedef enum
 {
 	POSTMASTER_READY,
 	POSTMASTER_STILL_STARTING,
+	POSTMASTER_SHUTDOWN_IN_RECOVERY,
 	POSTMASTER_FAILED,
 } WaitPMResult;
 
@@ -662,11 +663,21 @@ wait_for_postmaster_start(pid_t pm_pid, bool do_checkpoint)
 			int			exitstatus;
 
 			if (waitpid(pm_pid, &exitstatus, WNOHANG) == pm_pid)
-				return POSTMASTER_FAILED;
+			{
+				if (get_control_dbstate() == DB_SHUTDOWNED_IN_RECOVERY)
+					return POSTMASTER_SHUTDOWN_IN_RECOVERY;
+				else
+					return POSTMASTER_FAILED;
+			}
 		}
 #else
 		if (WaitForSingleObject(postmasterProcess, 0) == WAIT_OBJECT_0)
-			return POSTMASTER_FAILED;
+		{
+			if (get_control_dbstate() == DB_SHUTDOWNED_IN_RECOVERY)
+				return POSTMASTER_SHUTDOWN_IN_RECOVERY;
+			else
+				return POSTMASTER_FAILED;
+		}
 #endif
 
 		/* Startup still in process; wait, printing a dot once per second */
@@ -991,6 +1002,10 @@ do_start(void)
 							 progname);
 				exit(1);
 				break;
+			case POSTMASTER_SHUTDOWN_IN_RECOVERY:
+				print_msg(_(" shut down while in recovery\n"));
+				print_msg(_("update recovery target settings for startup again if needed\n"));
+				break;
 			case POSTMASTER_FAILED:
 				print_msg(_(" stopped waiting\n"));
 				write_stderr(_("%s: could not start server\n"
-- 
2.41.0

#14Alvaro Herrera
alvherre@alvh.no-ip.org
In reply to: Junwang Zhao (#13)
Re: make pg_ctl more friendly

I think this needs more comments. First, in the WaitPMResult enum, we
currently have three values -- READY, STILL_STARTING, FAILED. These are
all pretty self-explanatory. But this patch adds SHUTDOWN_IN_RECOVERY,
and that's not at all self-explanatory. Did postmaster start or not?
The enum value's name doesn't make that clear. So I'd do something like

typedef enum
{
POSTMASTER_READY,
POSTMASTER_STILL_STARTING,
/*
* postmaster no longer running, because it stopped after recovery
* completed.
*/
POSTMASTER_SHUTDOWN_IN_RECOVERY,
POSTMASTER_FAILED,
} WaitPMResult;

Maybe put the comments in wait_for_postmaster_start instead.

Secondly, the patch proposes to add new text to be returned by
do_start() when this happens, which would look like this:

waiting for server to start... shut down while in recovery
update recovery target settings for startup again if needed

Is this crystal clear? I'm not sure. How about something like this?

waiting for server to start... done, but not running
server shut down because of recovery target settings

variations on the first phrase:

"done, no longer running"
"done, but no longer running"
"done, automatically shut down"
"done, automatically shut down after recovery"

--
Álvaro Herrera PostgreSQL Developer — https://www.EnterpriseDB.com/
"Now I have my system running, not a byte was off the shelf;
It rarely breaks and when it does I fix the code myself.
It's stable, clean and elegant, and lightning fast as well,
And it doesn't cost a nickel, so Bill Gates can go to hell."

#15Junwang Zhao
zhjwpku@gmail.com
In reply to: Alvaro Herrera (#14)
1 attachment(s)
Re: make pg_ctl more friendly

Hi Alvaro,

On Wed, Jan 17, 2024 at 4:54 PM Alvaro Herrera <alvherre@alvh.no-ip.org> wrote:

I think this needs more comments. First, in the WaitPMResult enum, we
currently have three values -- READY, STILL_STARTING, FAILED. These are
all pretty self-explanatory. But this patch adds SHUTDOWN_IN_RECOVERY,
and that's not at all self-explanatory. Did postmaster start or not?
The enum value's name doesn't make that clear. So I'd do something like

typedef enum
{
POSTMASTER_READY,
POSTMASTER_STILL_STARTING,
/*
* postmaster no longer running, because it stopped after recovery
* completed.
*/
POSTMASTER_SHUTDOWN_IN_RECOVERY,
POSTMASTER_FAILED,
} WaitPMResult;

Maybe put the comments in wait_for_postmaster_start instead.

I put the comments in WaitPMResult since we need to add two
of those if in wait_for_postmaster_start.

Secondly, the patch proposes to add new text to be returned by
do_start() when this happens, which would look like this:

waiting for server to start... shut down while in recovery
update recovery target settings for startup again if needed

Is this crystal clear? I'm not sure. How about something like this?

waiting for server to start... done, but not running
server shut down because of recovery target settings

Agreed.

variations on the first phrase:

"done, no longer running"
"done, but no longer running"
"done, automatically shut down"
"done, automatically shut down after recovery"

I chose the last one because it gives information to users.
See V5, thanks for the wording ;)

--
Álvaro Herrera PostgreSQL Developer — https://www.EnterpriseDB.com/
"Now I have my system running, not a byte was off the shelf;
It rarely breaks and when it does I fix the code myself.
It's stable, clean and elegant, and lightning fast as well,
And it doesn't cost a nickel, so Bill Gates can go to hell."

--
Regards
Junwang Zhao

Attachments:

v5-0001-PITR-shutdown-should-not-report-error-by-pg_ctl.patchapplication/octet-stream; name=v5-0001-PITR-shutdown-should-not-report-error-by-pg_ctl.patchDownload
From f0d0098a1ffbd7a319db6e37329dd9b7e2588d3f Mon Sep 17 00:00:00 2001
From: Zhao Junwang <zhjwpku@gmail.com>
Date: Thu, 9 Nov 2023 15:03:38 +0800
Subject: [PATCH v5] PITR shutdown should not report error by pg_ctl

After a PITR recovery, the dbstate in pg_control is
DB_SHUTDOWNED_IN_RECOVERY, check this and do not report error
in pg_ctl.

Signed-off-by: Zhao Junwang <zhjwpku@gmail.com>
---
 src/bin/pg_ctl/pg_ctl.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index 6900b27675..22c4bbc71b 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -45,6 +45,11 @@ typedef enum
 {
 	POSTMASTER_READY,
 	POSTMASTER_STILL_STARTING,
+	/*
+	 * postmaster no longer running, because it stopped after recovery
+	 * completed.
+	 */
+	POSTMASTER_SHUTDOWN_IN_RECOVERY,
 	POSTMASTER_FAILED,
 } WaitPMResult;
 
@@ -662,11 +667,21 @@ wait_for_postmaster_start(pid_t pm_pid, bool do_checkpoint)
 			int			exitstatus;
 
 			if (waitpid(pm_pid, &exitstatus, WNOHANG) == pm_pid)
-				return POSTMASTER_FAILED;
+			{
+				if (get_control_dbstate() == DB_SHUTDOWNED_IN_RECOVERY)
+					return POSTMASTER_SHUTDOWN_IN_RECOVERY;
+				else
+					return POSTMASTER_FAILED;
+			}
 		}
 #else
 		if (WaitForSingleObject(postmasterProcess, 0) == WAIT_OBJECT_0)
-			return POSTMASTER_FAILED;
+		{
+			if (get_control_dbstate() == DB_SHUTDOWNED_IN_RECOVERY)
+				return POSTMASTER_SHUTDOWN_IN_RECOVERY;
+			else
+				return POSTMASTER_FAILED;
+		}
 #endif
 
 		/* Startup still in process; wait, printing a dot once per second */
@@ -991,6 +1006,10 @@ do_start(void)
 							 progname);
 				exit(1);
 				break;
+			case POSTMASTER_SHUTDOWN_IN_RECOVERY:
+				print_msg(_(" done, automatically shut down after recovery\n"));
+				print_msg(_("server shut down because of recovery target settings\n"));
+				break;
 			case POSTMASTER_FAILED:
 				print_msg(_(" stopped waiting\n"));
 				write_stderr(_("%s: could not start server\n"
-- 
2.41.0

#16Laurenz Albe
laurenz.albe@cybertec.at
In reply to: Junwang Zhao (#15)
1 attachment(s)
Re: make pg_ctl more friendly

On Wed, 2024-01-17 at 17:33 +0800, Junwang Zhao wrote:

On Wed, Jan 17, 2024 at 4:54 PM Alvaro Herrera <alvherre@alvh.no-ip.org> wrote:

I think this needs more comments. First, in the WaitPMResult enum, we
currently have three values -- READY, STILL_STARTING, FAILED. These are
all pretty self-explanatory. But this patch adds SHUTDOWN_IN_RECOVERY,
and that's not at all self-explanatory. Did postmaster start or not?
The enum value's name doesn't make that clear. So I'd do something like

typedef enum
{
POSTMASTER_READY,
POSTMASTER_STILL_STARTING,
/*
* postmaster no longer running, because it stopped after recovery
* completed.
*/
POSTMASTER_SHUTDOWN_IN_RECOVERY,
POSTMASTER_FAILED,
} WaitPMResult;

Maybe put the comments in wait_for_postmaster_start instead.

I put the comments in WaitPMResult since we need to add two
of those if in wait_for_postmaster_start.

I don't think that any comment is needed; the name says it all.

Secondly, the patch proposes to add new text to be returned by
do_start() when this happens, which would look like this:

waiting for server to start... shut down while in recovery
update recovery target settings for startup again if needed

Is this crystal clear? I'm not sure. How about something like this?

waiting for server to start... done, but not running
server shut down because of recovery target settings

variations on the first phrase:

"done, no longer running"
"done, but no longer running"
"done, automatically shut down"
"done, automatically shut down after recovery"

I chose the last one because it gives information to users.
See V5, thanks for the wording ;)

Why not just leave it at plain "done"?
After all, the server was started successfully.
The second message should make sufficiently clear that the server has stopped.

I didn't like the code duplication introduced by the patch, so I rewrote
that part a bit.

Attached is my suggestion.

I'll set the status to "waiting for author"; if you are fine with my version,
I think that the patch is "ready for committer".

Yours,
Laurenz Albe

Attachments:

v6-0001-Improve-pg_ctl-message-for-shutdown-after-recover.patchtext/x-patch; charset=ISO-8859-1; name=v6-0001-Improve-pg_ctl-message-for-shutdown-after-recover.patchDownload
From 538c662aa562a5cc7630ac845e1b8a6b4abfffa1 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Tue, 9 Jul 2024 21:48:41 +0200
Subject: [PATCH v6] Improve pg_ctl message for shutdown after recovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the postmaster exits after a successful point-in-time-recovery,
pg_ctl used to report a message that indicated a problem.

Author: Zhao Junwang, Crisp Lee
Reviewed by: Nazir Bilal Yavuz, Nathan Bossart, Álvaro Herrera, Laurenz Albe
Discussion: https://postgr.es/m/CAGHPtV7GttPZ-HvxZuYRy70jLGQMEm5%3DLQc4fKGa%3DJ74m2VZbg%40mail.gmail.com
---
 src/bin/pg_ctl/pg_ctl.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index 405e223c19..324a6da096 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -45,6 +45,7 @@ typedef enum
 {
 	POSTMASTER_READY,
 	POSTMASTER_STILL_STARTING,
+	POSTMASTER_SHUTDOWN_IN_RECOVERY,
 	POSTMASTER_FAILED,
 } WaitPMResult;
 
@@ -657,17 +658,23 @@ wait_for_postmaster_start(pid_t pm_pid, bool do_checkpoint)
 		 * On Windows, we may be checking the postmaster's parent shell, but
 		 * that's fine for this purpose.
 		 */
-#ifndef WIN32
 		{
+			bool		pm_died;
+#ifndef WIN32
 			int			exitstatus;
 
-			if (waitpid(pm_pid, &exitstatus, WNOHANG) == pm_pid)
-				return POSTMASTER_FAILED;
-		}
+			pm_died = (waitpid(pm_pid, &exitstatus, WNOHANG) == pm_pid);
 #else
-		if (WaitForSingleObject(postmasterProcess, 0) == WAIT_OBJECT_0)
-			return POSTMASTER_FAILED;
+			pm_died = (WaitForSingleObject(postmasterProcess, 0) == WAIT_OBJECT_0);
 #endif
+			if (pm_died)
+			{
+				if (get_control_dbstate() == DB_SHUTDOWNED_IN_RECOVERY)
+					return POSTMASTER_SHUTDOWN_IN_RECOVERY;
+				else
+					return POSTMASTER_FAILED;
+			}
+		}
 
 		/* Startup still in process; wait, printing a dot once per second */
 		if (i % WAITS_PER_SEC == 0)
@@ -991,6 +998,10 @@ do_start(void)
 							 progname);
 				exit(1);
 				break;
+			case POSTMASTER_SHUTDOWN_IN_RECOVERY:
+				print_msg(_(" done\n"));
+				print_msg(_("server shut down because of recovery target settings\n"));
+				break;
 			case POSTMASTER_FAILED:
 				print_msg(_(" stopped waiting\n"));
 				write_stderr(_("%s: could not start server\n"
-- 
2.45.2

#17Junwang Zhao
zhjwpku@gmail.com
In reply to: Laurenz Albe (#16)
Re: make pg_ctl more friendly

Hi, Laurenz

On Wed, Jul 10, 2024 at 3:59 AM Laurenz Albe <laurenz.albe@cybertec.at> wrote:

On Wed, 2024-01-17 at 17:33 +0800, Junwang Zhao wrote:

On Wed, Jan 17, 2024 at 4:54 PM Alvaro Herrera <alvherre@alvh.no-ip.org> wrote:

I think this needs more comments. First, in the WaitPMResult enum, we
currently have three values -- READY, STILL_STARTING, FAILED. These are
all pretty self-explanatory. But this patch adds SHUTDOWN_IN_RECOVERY,
and that's not at all self-explanatory. Did postmaster start or not?
The enum value's name doesn't make that clear. So I'd do something like

typedef enum
{
POSTMASTER_READY,
POSTMASTER_STILL_STARTING,
/*
* postmaster no longer running, because it stopped after recovery
* completed.
*/
POSTMASTER_SHUTDOWN_IN_RECOVERY,
POSTMASTER_FAILED,
} WaitPMResult;

Maybe put the comments in wait_for_postmaster_start instead.

I put the comments in WaitPMResult since we need to add two
of those if in wait_for_postmaster_start.

I don't think that any comment is needed; the name says it all.

Secondly, the patch proposes to add new text to be returned by
do_start() when this happens, which would look like this:

waiting for server to start... shut down while in recovery
update recovery target settings for startup again if needed

Is this crystal clear? I'm not sure. How about something like this?

waiting for server to start... done, but not running
server shut down because of recovery target settings

variations on the first phrase:

"done, no longer running"
"done, but no longer running"
"done, automatically shut down"
"done, automatically shut down after recovery"

I chose the last one because it gives information to users.
See V5, thanks for the wording ;)

Why not just leave it at plain "done"?
After all, the server was started successfully.
The second message should make sufficiently clear that the server has stopped.

I didn't like the code duplication introduced by the patch, so I rewrote
that part a bit.

Attached is my suggestion.

The patch LGTM.

I'll set the status to "waiting for author"; if you are fine with my version,
I think that the patch is "ready for committer".

I've set it to "ready for committer", thanks.

Yours,
Laurenz Albe

--
Regards
Junwang Zhao

#18Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Junwang Zhao (#17)
Re: make pg_ctl more friendly

On 2024/07/10 11:45, Junwang Zhao wrote:

Attached is my suggestion.

The patch LGTM.

+			case POSTMASTER_SHUTDOWN_IN_RECOVERY:
+				print_msg(_(" done\n"));
+				print_msg(_("server shut down because of recovery target settings\n"));

"because of recovery target settings" isn't always accurate.
For example, if the DBA shuts down the server during recovery,
POSTMASTER_SHUTDOWN_IN_RECOVERY can be returned regardless of
the recovery target settings. Should we change the message to
something like "server shut down in recovery" for accuracy?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#19Tom Lane
tgl@sss.pgh.pa.us
In reply to: Junwang Zhao (#17)
Re: make pg_ctl more friendly

Junwang Zhao <zhjwpku@gmail.com> writes:

On Wed, Jul 10, 2024 at 3:59 AM Laurenz Albe <laurenz.albe@cybertec.at> wrote:

Attached is my suggestion.

The patch LGTM.

I'll set the status to "waiting for author"; if you are fine with my version,
I think that the patch is "ready for committer".

I've set it to "ready for committer", thanks.

Pushed, thanks.

regards, tom lane

#20Tom Lane
tgl@sss.pgh.pa.us
In reply to: Fujii Masao (#18)
Re: make pg_ctl more friendly

Fujii Masao <masao.fujii@oss.nttdata.com> writes:

"because of recovery target settings" isn't always accurate.
For example, if the DBA shuts down the server during recovery,
POSTMASTER_SHUTDOWN_IN_RECOVERY can be returned regardless of
the recovery target settings. Should we change the message to
something like "server shut down in recovery" for accuracy?

Hmm, I just pushed it with Laurenz's wording. I don't mind
if we change it again, but I'm not sure that there's much
wrong with it as it stands. Keep in mind that the context
is the DBA doing "pg_ctl start". It seems unlikely that
he/she would concurrently do "pg_ctl stop". Even if that
did happen, do we really need to phrase the message to account
for it?

I like Laurenz's wording because it points the user in the
direction of the settings that would need adjustment if an
immediate shutdown wasn't what was expected/wanted. If we
just say "shut down in recovery", that may be accurate,
but it offers little help as to what to do next.

regards, tom lane

#21Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Tom Lane (#20)
Re: make pg_ctl more friendly

On 2024/07/19 2:58, Tom Lane wrote:

Fujii Masao <masao.fujii@oss.nttdata.com> writes:

"because of recovery target settings" isn't always accurate.
For example, if the DBA shuts down the server during recovery,
POSTMASTER_SHUTDOWN_IN_RECOVERY can be returned regardless of
the recovery target settings. Should we change the message to
something like "server shut down in recovery" for accuracy?

Hmm, I just pushed it with Laurenz's wording. I don't mind
if we change it again, but I'm not sure that there's much
wrong with it as it stands. Keep in mind that the context
is the DBA doing "pg_ctl start". It seems unlikely that
he/she would concurrently do "pg_ctl stop". Even if that
did happen, do we really need to phrase the message to account
for it?

I like Laurenz's wording because it points the user in the
direction of the settings that would need adjustment if an
immediate shutdown wasn't what was expected/wanted. If we
just say "shut down in recovery", that may be accurate,
but it offers little help as to what to do next.

I was thinking the scenario where "pg_ctl -w start" exits due to
a recovery target setting, especially with recovery_target_action=shutdown,
can happen not so many times. This is because the server typically
can reach PM_STATUS_READY or PM_STATUS_STANDBY,
and pg_ctl exits normally before the recovery target is reached.

On the other thand, if users start the crash recovery and find
misconfiguration of parameter requiring a server restart,
they might shut down the server during recovery to fix it.
In this case, mentioning "recovery target" could be confusing.
This scenario also might not be so common, but seems a bit more
likely than the recovery target case. I understand this might be
a minority opinion, though..

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION