From 2075441bebc47d3dd5b6e0a76e16f5ebb12858af Mon Sep 17 00:00:00 2001
From: Jehan-Guillaume de Rorthais <jgdr@dalibo.com>
Date: Fri, 10 Apr 2020 18:01:45 +0200
Subject: [PATCH] Demote PoC

---
 src/backend/access/transam/xlog.c       |   3 +-
 src/backend/postmaster/postmaster.c     | 206 ++++++++++++++++++------
 src/bin/pg_controldata/pg_controldata.c |   2 +
 src/bin/pg_ctl/pg_ctl.c                 | 105 ++++++++++++
 src/include/catalog/pg_control.h        |   1 +
 src/include/libpq/libpq-be.h            |   7 +-
 src/include/utils/pidfile.h             |   1 +
 7 files changed, 271 insertions(+), 54 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 55cac186dc..8a7f1a0855 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -8493,6 +8493,7 @@ ShutdownXLOG(int code, Datum arg)
 	CurrentResourceOwner = AuxProcessResourceOwner;
 
 	/* Don't be chatty in standalone mode */
+	// FIXME: what message when demoting?
 	ereport(IsPostmasterEnvironment ? LOG : NOTICE,
 			(errmsg("shutting down")));
 
@@ -8760,7 +8761,7 @@ CreateCheckPoint(int flags)
 	if (shutdown)
 	{
 		LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-		ControlFile->state = DB_SHUTDOWNING;
+		ControlFile->state = DB_SHUTDOWNING; // DEMOTING?
 		ControlFile->time = (pg_time_t) time(NULL);
 		UpdateControlFile();
 		LWLockRelease(ControlFileLock);
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index b4d475bb0b..465d020f9d 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -150,6 +150,9 @@
 
 #define BACKEND_TYPE_WORKER		(BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER)
 
+/* file to signal demotion from primary to standby */
+#define DEMOTE_SIGNAL_FILE		"demote"
+
 /*
  * List of active backends (or child processes anyway; we don't actually
  * know whether a given child has become a backend or is still in the
@@ -269,18 +272,23 @@ typedef enum
 static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING;
 
 /* Startup/shutdown state */
-#define			NoShutdown		0
-#define			SmartShutdown	1
-#define			FastShutdown	2
-#define			ImmediateShutdown	3
-
-static int	Shutdown = NoShutdown;
+typedef enum StepDownState {
+	NoShutdown = 0, /* find better label? */
+	SmartShutdown,
+	SmartDemote,
+	FastShutdown,
+	FastDemote,
+	ImmediateShutdown
+} StepDownState;
+
+static StepDownState StepDown = NoShutdown;
+static bool DemoteSignal = false; /* true on demote request */
 
 static bool FatalError = false; /* T if recovering from backend crash */
 
 /*
- * We use a simple state machine to control startup, shutdown, and
- * crash recovery (which is rather like shutdown followed by startup).
+ * We use a simple state machine to control startup, shutdown, demote and
+ * crash recovery (both are rather like shutdown followed by startup).
  *
  * After doing all the postmaster initialization work, we enter PM_STARTUP
  * state and the startup process is launched. The startup process begins by
@@ -314,7 +322,7 @@ static bool FatalError = false; /* T if recovering from backend crash */
  * will not be very long).
  *
  * Notice that this state variable does not distinguish *why* we entered
- * states later than PM_RUN --- Shutdown and FatalError must be consulted
+ * states later than PM_RUN --- StepDown and FatalError must be consulted
  * to find that out.  FatalError is never true in PM_RECOVERY_* or PM_RUN
  * states, nor in PM_SHUTDOWN states (because we don't enter those states
  * when trying to recover from a crash).  It can be true in PM_STARTUP state,
@@ -414,6 +422,8 @@ static bool RandomCancelKey(int32 *cancel_key);
 static void signal_child(pid_t pid, int signal);
 static bool SignalSomeChildren(int signal, int targets);
 static void TerminateChildren(int signal);
+static bool CheckDemoteSignal(void);
+
 
 #define SignalChildren(sig)			   SignalSomeChildren(sig, BACKEND_TYPE_ALL)
 
@@ -1550,7 +1560,7 @@ DetermineSleepTime(struct timeval *timeout)
 	 * Normal case: either there are no background workers at all, or we're in
 	 * a shutdown sequence (during which we ignore bgworkers altogether).
 	 */
-	if (Shutdown > NoShutdown ||
+	if (StepDown > NoShutdown ||
 		(!StartWorkerNeeded && !HaveCrashedWorker))
 	{
 		if (AbortStartTime != 0)
@@ -1830,7 +1840,7 @@ ServerLoop(void)
 		 *
 		 * Note we also do this during recovery from a process crash.
 		 */
-		if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
+		if ((StepDown >= ImmediateShutdown || (FatalError && !SendStop)) &&
 			AbortStartTime != 0 &&
 			(now - AbortStartTime) >= SIGKILL_CHILDREN_AFTER_SECS)
 		{
@@ -2305,6 +2315,11 @@ retry1:
 					(errcode(ERRCODE_CANNOT_CONNECT_NOW),
 					 errmsg("the database system is starting up")));
 			break;
+		case CAC_DEMOTE:
+			ereport(FATAL,
+					(errcode(ERRCODE_CANNOT_CONNECT_NOW),
+					 errmsg("the database system is demoting")));
+			break;
 		case CAC_SHUTDOWN:
 			ereport(FATAL,
 					(errcode(ERRCODE_CANNOT_CONNECT_NOW),
@@ -2436,7 +2451,7 @@ canAcceptConnections(int backend_type)
 	CAC_state	result = CAC_OK;
 
 	/*
-	 * Can't start backends when in startup/shutdown/inconsistent recovery
+	 * Can't start backends when in startup/demote/shutdown/inconsistent recovery
 	 * state.  We treat autovac workers the same as user backends for this
 	 * purpose.  However, bgworkers are excluded from this test; we expect
 	 * bgworker_should_start_now() decided whether the DB state allows them.
@@ -2452,7 +2467,9 @@ canAcceptConnections(int backend_type)
 	{
 		if (pmState == PM_WAIT_BACKUP)
 			result = CAC_WAITBACKUP;	/* allow superusers only */
-		else if (Shutdown > NoShutdown)
+		else if (StepDown == SmartDemote || StepDown == FastDemote)
+			return CAC_DEMOTE;	/* demote is pending */
+		else if (StepDown > NoShutdown)
 			return CAC_SHUTDOWN;	/* shutdown is pending */
 		else if (!FatalError &&
 				 (pmState == PM_STARTUP ||
@@ -2683,7 +2700,8 @@ SIGHUP_handler(SIGNAL_ARGS)
 	PG_SETMASK(&BlockSig);
 #endif
 
-	if (Shutdown <= SmartShutdown)
+	if (StepDown == NoShutdown || StepDown == SmartShutdown ||
+		StepDown == SmartDemote)
 	{
 		ereport(LOG,
 				(errmsg("received SIGHUP, reloading configuration files")));
@@ -2769,26 +2787,72 @@ pmdie(SIGNAL_ARGS)
 			(errmsg_internal("postmaster received signal %d",
 							 postgres_signal_arg)));
 
+	if (CheckDemoteSignal())
+	{
+		if (pmState != PM_RUN)
+		{
+			DemoteSignal = false;
+			unlink(DEMOTE_SIGNAL_FILE);
+			ereport(LOG,
+					(errmsg("ignoring demote signal because already in standby mode")));
+		}
+		else if (postgres_signal_arg == SIGQUIT) {
+			DemoteSignal = false;
+			ereport(WARNING,
+					(errmsg("can not demote in immediate stop mode")));
+			// FIXME: should we abort the shutdown process?
+		}
+		else
+		{
+			FILE	   *standby_file;
+
+			DemoteSignal = true;
+
+			/* create the standby signal file */
+			standby_file = AllocateFile(STANDBY_SIGNAL_FILE, "w");
+			if (!standby_file)
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not create file \"%s\": %m",
+								STANDBY_SIGNAL_FILE)));
+
+			if (FreeFile(standby_file))
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not write file \"%s\": %m",
+								STANDBY_SIGNAL_FILE)));
+		}
+
+		unlink(DEMOTE_SIGNAL_FILE);
+	}
+
 	switch (postgres_signal_arg)
 	{
 		case SIGTERM:
 
 			/*
-			 * Smart Shutdown:
+			 * Smart Stepdown:
 			 *
-			 * Wait for children to end their work, then shut down.
+			 * Wait for children to end their work, then shut down or demote.
 			 */
-			if (Shutdown >= SmartShutdown)
+			if (StepDown >= SmartShutdown)
 				break;
-			Shutdown = SmartShutdown;
-			ereport(LOG,
-					(errmsg("received smart shutdown request")));
 
-			/* Report status */
-			AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
+			if (DemoteSignal) {
+				StepDown = SmartDemote;
+				ereport(LOG, (errmsg("received smart demote request")));
+				/* Report status */
+				AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_DEMOTING);
+			}
+			else {
+				StepDown = SmartShutdown;
+				ereport(LOG, (errmsg("received smart shutdown request")));
+				/* Report status */
+				AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
 #ifdef USE_SYSTEMD
-			sd_notify(0, "STOPPING=1");
+				sd_notify(0, "STOPPING=1");
 #endif
+			}
 
 			if (pmState == PM_RUN || pmState == PM_RECOVERY ||
 				pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
@@ -2831,22 +2895,29 @@ pmdie(SIGNAL_ARGS)
 		case SIGINT:
 
 			/*
-			 * Fast Shutdown:
+			 * Fast StepDown:
 			 *
 			 * Abort all children with SIGTERM (rollback active transactions
-			 * and exit) and shut down when they are gone.
+			 * and exit) and shut down or demote when they are gone.
 			 */
-			if (Shutdown >= FastShutdown)
+			if (StepDown >= FastShutdown)
 				break;
-			Shutdown = FastShutdown;
-			ereport(LOG,
-					(errmsg("received fast shutdown request")));
 
-			/* Report status */
-			AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
+			if (DemoteSignal) {
+				StepDown = FastDemote;
+				ereport(LOG, (errmsg("received fast demote request")));
+				/* Report status */
+				AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_DEMOTING);
+			}
+			else {
+				StepDown = FastShutdown;
+				ereport(LOG, (errmsg("received fast shutdown request")));
+				/* Report status */
+				AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
 #ifdef USE_SYSTEMD
-			sd_notify(0, "STOPPING=1");
+				sd_notify(0, "STOPPING=1");
 #endif
+			}
 
 			if (StartupPID != 0)
 				signal_child(StartupPID, SIGTERM);
@@ -2903,9 +2974,9 @@ pmdie(SIGNAL_ARGS)
 			 * terminate remaining ones with SIGKILL, then exit without
 			 * attempt to properly shut down the data base system.
 			 */
-			if (Shutdown >= ImmediateShutdown)
+			if (StepDown >= ImmediateShutdown)
 				break;
-			Shutdown = ImmediateShutdown;
+			StepDown = ImmediateShutdown;
 			ereport(LOG,
 					(errmsg("received immediate shutdown request")));
 
@@ -2967,10 +3038,11 @@ reaper(SIGNAL_ARGS)
 			StartupPID = 0;
 
 			/*
-			 * Startup process exited in response to a shutdown request (or it
-			 * completed normally regardless of the shutdown request).
+			 * Startup process exited in response to a shutdown or demote
+			 * request (or it completed normally regardless of the shutdown
+			 * request).
 			 */
-			if (Shutdown > NoShutdown &&
+			if (StepDown > NoShutdown &&
 				(EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
 			{
 				StartupStatus = STARTUP_NOT_RUNNING;
@@ -2984,7 +3056,7 @@ reaper(SIGNAL_ARGS)
 				ereport(LOG,
 						(errmsg("shutdown at recovery target")));
 				StartupStatus = STARTUP_NOT_RUNNING;
-				Shutdown = SmartShutdown;
+				StepDown = SmartShutdown;
 				TerminateChildren(SIGTERM);
 				pmState = PM_WAIT_BACKENDS;
 				/* PostmasterStateMachine logic does the rest */
@@ -3124,7 +3196,7 @@ reaper(SIGNAL_ARGS)
 				 * archive cycle and quit. Likewise, if we have walsender
 				 * processes, tell them to send any remaining WAL and quit.
 				 */
-				Assert(Shutdown > NoShutdown);
+				Assert(StepDown > NoShutdown);
 
 				/* Waken archiver for the last time */
 				if (PgArchPID != 0)
@@ -3484,7 +3556,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
 	 * signaled children, nonzero exit status is to be expected, so don't
 	 * clutter log.
 	 */
-	take_action = !FatalError && Shutdown != ImmediateShutdown;
+	take_action = !FatalError && StepDown != ImmediateShutdown;
 
 	if (take_action)
 	{
@@ -3702,7 +3774,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
 
 	/* We do NOT restart the syslogger */
 
-	if (Shutdown != ImmediateShutdown)
+	if (StepDown != ImmediateShutdown)
 		FatalError = true;
 
 	/* We now transit into a state of waiting for children to die */
@@ -3845,11 +3917,11 @@ PostmasterStateMachine(void)
 			WalReceiverPID == 0 &&
 			BgWriterPID == 0 &&
 			(CheckpointerPID == 0 ||
-			 (!FatalError && Shutdown < ImmediateShutdown)) &&
+			 (!FatalError && StepDown < ImmediateShutdown)) &&
 			WalWriterPID == 0 &&
 			AutoVacPID == 0)
 		{
-			if (Shutdown >= ImmediateShutdown || FatalError)
+			if (StepDown >= ImmediateShutdown || FatalError)
 			{
 				/*
 				 * Start waiting for dead_end children to die.  This state
@@ -3870,7 +3942,7 @@ PostmasterStateMachine(void)
 				 * the regular children are gone, and it's time to tell the
 				 * checkpointer to do a shutdown checkpoint.
 				 */
-				Assert(Shutdown > NoShutdown);
+				Assert(StepDown > NoShutdown);
 				/* Start the checkpointer if not running */
 				if (CheckpointerPID == 0)
 					CheckpointerPID = StartCheckpointer();
@@ -3958,7 +4030,8 @@ PostmasterStateMachine(void)
 	 * EOF on its input pipe, which happens when there are no more upstream
 	 * processes.
 	 */
-	if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)
+	if (pmState == PM_NO_CHILDREN && (StepDown == SmartShutdown ||
+		StepDown == FastShutdown || StepDown == ImmediateShutdown))
 	{
 		if (FatalError)
 		{
@@ -3991,15 +4064,29 @@ PostmasterStateMachine(void)
 	 * startup process fails, because more than likely it will just fail again
 	 * and we will keep trying forever.
 	 */
-	if (pmState == PM_NO_CHILDREN &&
+	if (pmState == PM_NO_CHILDREN && !DemoteSignal &&
 		(StartupStatus == STARTUP_CRASHED || !restart_after_crash))
 		ExitPostmaster(1);
 
+	/* Handle demote signal */
+	if (DemoteSignal && pmState == PM_NO_CHILDREN)
+	{
+		ereport(LOG, (errmsg("all server processes terminated; demoting")));
+
+		// Signal bgworkers?
+
+		StartupPID = StartupDataBase();
+		Assert(StartupPID != 0);
+		StartupStatus = STARTUP_RUNNING;
+		pmState = PM_STARTUP;
+		StepDown = NoShutdown;
+	}
+
 	/*
 	 * If we need to recover from a crash, wait for all non-syslogger children
 	 * to exit, then reset shmem and StartupDataBase.
 	 */
-	if (FatalError && pmState == PM_NO_CHILDREN)
+	else if (FatalError && pmState == PM_NO_CHILDREN)
 	{
 		ereport(LOG,
 				(errmsg("all server processes terminated; reinitializing")));
@@ -5195,7 +5282,7 @@ sigusr1_handler(SIGNAL_ARGS)
 	 * first. We don't want to go back to recovery in that case.
 	 */
 	if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) &&
-		pmState == PM_STARTUP && Shutdown == NoShutdown)
+		pmState == PM_STARTUP && StepDown == NoShutdown)
 	{
 		/* WAL redo has started. We're out of reinitialization. */
 		FatalError = false;
@@ -5234,7 +5321,7 @@ sigusr1_handler(SIGNAL_ARGS)
 		pmState = PM_RECOVERY;
 	}
 	if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY) &&
-		pmState == PM_RECOVERY && Shutdown == NoShutdown)
+		pmState == PM_RECOVERY && StepDown == NoShutdown)
 	{
 		/*
 		 * Likewise, start other special children as needed.
@@ -5284,7 +5371,7 @@ sigusr1_handler(SIGNAL_ARGS)
 	}
 
 	if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER) &&
-		Shutdown == NoShutdown)
+		StepDown == NoShutdown)
 	{
 		/*
 		 * Start one iteration of the autovacuum daemon, even if autovacuuming
@@ -5299,7 +5386,7 @@ sigusr1_handler(SIGNAL_ARGS)
 	}
 
 	if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER) &&
-		Shutdown == NoShutdown)
+		StepDown == NoShutdown)
 	{
 		/* The autovacuum launcher wants us to start a worker process. */
 		StartAutovacuumWorker();
@@ -5644,7 +5731,7 @@ MaybeStartWalReceiver(void)
 	if (WalReceiverPID == 0 &&
 		(pmState == PM_STARTUP || pmState == PM_RECOVERY ||
 		 pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) &&
-		Shutdown == NoShutdown)
+		StepDown == NoShutdown)
 	{
 		WalReceiverPID = StartWalReceiver();
 		if (WalReceiverPID != 0)
@@ -6647,3 +6734,18 @@ InitPostmasterDeathWatchHandle(void)
 								 GetLastError())));
 #endif							/* WIN32 */
 }
+
+/*
+ * Check if a promote request appeared. Should be called by postmaster before
+ * shutting down.
+ */
+bool
+CheckDemoteSignal(void)
+{
+	struct stat stat_buf;
+
+	if (stat(DEMOTE_SIGNAL_FILE, &stat_buf) == 0)
+		return true;
+
+	return false;
+}
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index e73639df74..c144cc35d3 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -57,6 +57,8 @@ dbState(DBState state)
 			return _("shut down");
 		case DB_SHUTDOWNED_IN_RECOVERY:
 			return _("shut down in recovery");
+		case DB_DEMOTING:
+			return _("demoting");
 		case DB_SHUTDOWNING:
 			return _("shutting down");
 		case DB_IN_CRASH_RECOVERY:
diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index 3c03ace7ed..0bb7d69682 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -62,6 +62,7 @@ typedef enum
 	RESTART_COMMAND,
 	RELOAD_COMMAND,
 	STATUS_COMMAND,
+	DEMOTE_COMMAND,
 	PROMOTE_COMMAND,
 	LOGROTATE_COMMAND,
 	KILL_COMMAND,
@@ -103,6 +104,7 @@ static char version_file[MAXPGPATH];
 static char pid_file[MAXPGPATH];
 static char backup_file[MAXPGPATH];
 static char promote_file[MAXPGPATH];
+static char demote_file[MAXPGPATH];
 static char logrotate_file[MAXPGPATH];
 
 static volatile pgpid_t postmasterPID = -1;
@@ -129,6 +131,7 @@ static void do_stop(void);
 static void do_restart(void);
 static void do_reload(void);
 static void do_status(void);
+static void do_demote(void);
 static void do_promote(void);
 static void do_logrotate(void);
 static void do_kill(pgpid_t pid);
@@ -1029,6 +1032,103 @@ do_stop(void)
 }
 
 
+static void
+do_demote(void)
+{
+	int			cnt;
+	FILE	   *dmtfile;
+	pgpid_t		pid;
+	struct stat statbuf;
+
+	pid = get_pgpid(false);
+
+	if (pid == 0)				/* no pid file */
+	{
+		write_stderr(_("%s: PID file \"%s\" does not exist\n"), progname, pid_file);
+		write_stderr(_("Is server running?\n"));
+		exit(1);
+	}
+	else if (pid < 0)			/* standalone backend, not postmaster */
+	{
+		pid = -pid;
+		write_stderr(_("%s: cannot demote server; "
+					   "single-user server is running (PID: %ld)\n"),
+					 progname, pid);
+		exit(1);
+	}
+
+	snprintf(demote_file, MAXPGPATH, "%s/demote", pg_data);
+
+	if ((dmtfile = fopen(demote_file, "w")) == NULL)
+	{
+		write_stderr(_("%s: could not create demote signal file \"%s\": %s\n"),
+					 progname, demote_file, strerror(errno));
+		exit(1);
+	}
+	if (fclose(dmtfile))
+	{
+		write_stderr(_("%s: could not write demote signal file \"%s\": %s\n"),
+					 progname, demote_file, strerror(errno));
+		exit(1);
+	}
+
+	if (kill((pid_t) pid, sig) != 0)
+	{
+		write_stderr(_("%s: could not send stop signal (PID: %ld): %s\n"), progname, pid,
+					 strerror(errno));
+		exit(1);
+	}
+
+	if (!do_wait)
+	{
+		print_msg(_("server demoting\n"));
+		return;
+	}
+	else
+	{
+		/*
+		 * If backup_label exists, an online backup is running. Warn the user
+		 * that smart demote will wait for it to finish. However, if the
+		 * server is in archive recovery, we're recovering from an online
+		 * backup instead of performing one.
+		 */
+		if (shutdown_mode == SMART_MODE &&
+			stat(backup_file, &statbuf) == 0 &&
+			get_control_dbstate() != DB_IN_ARCHIVE_RECOVERY)
+		{
+			print_msg(_("WARNING: online backup mode is active\n"
+						"Demote will not complete until pg_stop_backup() is called.\n\n"));
+		}
+
+		print_msg(_("waiting for server to demote..."));
+
+		for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++)
+		{
+			if (get_control_dbstate() == DB_IN_ARCHIVE_RECOVERY)
+				break;
+
+			if (cnt % WAITS_PER_SEC == 0)
+				print_msg(".");
+			pg_usleep(USEC_PER_SEC / WAITS_PER_SEC);
+		}
+
+		if (get_control_dbstate() != DB_IN_ARCHIVE_RECOVERY)
+		{
+			print_msg(_(" failed\n"));
+
+			write_stderr(_("%s: server does not demote\n"), progname);
+			if (shutdown_mode == SMART_MODE)
+				write_stderr(_("HINT: The \"-m fast\" option immediately disconnects sessions rather than\n"
+							   "waiting for session-initiated disconnection.\n"));
+			exit(1);
+		}
+		print_msg(_(" done\n"));
+
+		print_msg(_("server demoted\n"));
+	}
+}
+
+
 /*
  *	restart/reload routines
  */
@@ -2452,6 +2552,8 @@ main(int argc, char **argv)
 				ctl_command = RELOAD_COMMAND;
 			else if (strcmp(argv[optind], "status") == 0)
 				ctl_command = STATUS_COMMAND;
+			else if (strcmp(argv[optind], "demote") == 0)
+				ctl_command = DEMOTE_COMMAND;
 			else if (strcmp(argv[optind], "promote") == 0)
 				ctl_command = PROMOTE_COMMAND;
 			else if (strcmp(argv[optind], "logrotate") == 0)
@@ -2559,6 +2661,9 @@ main(int argc, char **argv)
 		case RELOAD_COMMAND:
 			do_reload();
 			break;
+		case DEMOTE_COMMAND:
+			do_demote();
+			break;
 		case PROMOTE_COMMAND:
 			do_promote();
 			break;
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index de5670e538..f529f8c7bd 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -87,6 +87,7 @@ typedef enum DBState
 	DB_STARTUP = 0,
 	DB_SHUTDOWNED,
 	DB_SHUTDOWNED_IN_RECOVERY,
+	DB_DEMOTING,
 	DB_SHUTDOWNING,
 	DB_IN_CRASH_RECOVERY,
 	DB_IN_ARCHIVE_RECOVERY,
diff --git a/src/include/libpq/libpq-be.h b/src/include/libpq/libpq-be.h
index 179ebaa104..a9e27f009e 100644
--- a/src/include/libpq/libpq-be.h
+++ b/src/include/libpq/libpq-be.h
@@ -70,7 +70,12 @@ typedef struct
 
 typedef enum CAC_state
 {
-	CAC_OK, CAC_STARTUP, CAC_SHUTDOWN, CAC_RECOVERY, CAC_TOOMANY,
+	CAC_OK,
+	CAC_STARTUP,
+	CAC_DEMOTE,
+	CAC_SHUTDOWN,
+	CAC_RECOVERY,
+	CAC_TOOMANY,
 	CAC_WAITBACKUP
 } CAC_state;
 
diff --git a/src/include/utils/pidfile.h b/src/include/utils/pidfile.h
index 63fefe5c4c..f761d2c4ef 100644
--- a/src/include/utils/pidfile.h
+++ b/src/include/utils/pidfile.h
@@ -50,6 +50,7 @@
  */
 #define PM_STATUS_STARTING		"starting"	/* still starting up */
 #define PM_STATUS_STOPPING		"stopping"	/* in shutdown sequence */
+#define PM_STATUS_DEMOTING		"demoting"	/* demote sequence */
 #define PM_STATUS_READY			"ready   "	/* ready for connections */
 #define PM_STATUS_STANDBY		"standby "	/* up, won't accept connections */
 
-- 
2.20.1

