*** a/src/backend/postmaster/postmaster.c
--- b/src/backend/postmaster/postmaster.c
***************
*** 225,235 **** static pid_t StartupPID = 0,
--- 225,257 ----
  static int	Shutdown = NoShutdown;
  
  static bool FatalError = false; /* T if recovering from backend crash */
+ static bool RecoveryError = false; /* T if recovery failed */
  
  /*
   * We use a simple state machine to control startup, shutdown, and
   * crash recovery (which is rather like shutdown followed by startup).
   *
+  * After doing all the postmaster initialization work, we enter PM_STARTUP
+  * state and the startup process is launched. The startup process begins by
+  * reading the control file and other preliminary initialization steps. When
+  * it's ready to start WAL redo, it signals postmaster, and we switch to
+  * PM_RECOVERY phase. The background writer is launched, while the startup
+  * process continues applying WAL. 
+  * 
+  * After reaching a consistent point in WAL redo, startup process signals
+  * us again, and we switch to PM_RECOVERY_CONSISTENT phase. There's currently
+  * no difference between PM_RECOVERY and PM_RECOVERY_CONSISTENT, but we
+  * could start accepting connections to perform read-only queries at this
+  * point, if we had the infrastructure to do that.
+  *
+  * When the WAL redo is finished, the startup process signals us the third
+  * time, and exits. We don't process the 3d signal immediately but when we
+  * see the that the startup process has exited, we check that we have
+  * received the signal. If everything is OK, we then switch to PM_RUN state.
+  * The startup process can also skip the recovery and consistent recovery
+  * phases altogether, as it will during normal startup when there's no
+  * recovery to be done, for example.
+  *
   * Normal child backends can only be launched when we are in PM_RUN state.
   * (We also allow it in PM_WAIT_BACKUP state, but only for superusers.)
   * In other states we handle connection requests by launching "dead_end"
***************
*** 245,259 **** static bool FatalError = false; /* T if recovering from backend crash */
   *
   * Notice that this state variable does not distinguish *why* we entered
   * states later than PM_RUN --- Shutdown and FatalError must be consulted
!  * to find that out.  FatalError is never true in PM_RUN state, nor in
!  * PM_SHUTDOWN states (because we don't enter those states when trying to
!  * recover from a crash).  It can be true in PM_STARTUP state, because we
!  * don't clear it until we've successfully recovered.
   */
  typedef enum
  {
  	PM_INIT,					/* postmaster starting */
  	PM_STARTUP,					/* waiting for startup subprocess */
  	PM_RUN,						/* normal "database is alive" state */
  	PM_WAIT_BACKUP,				/* waiting for online backup mode to end */
  	PM_WAIT_BACKENDS,			/* waiting for live backends to exit */
--- 267,285 ----
   *
   * Notice that this state variable does not distinguish *why* we entered
   * states later than PM_RUN --- Shutdown and FatalError must be consulted
!  * to find that out.  FatalError is never true in PM_RECOVERY_* or PM_RUN
!  * states, nor in PM_SHUTDOWN states (because we don't enter those states
!  * when trying to recover from a crash).  It can be true in PM_STARTUP state,
!  * because we don't clear it until we've successfully started WAL redo.
!  * Similarly, RecoveryError means that we have crashed during recovery, and
!  * should not try to restart.
   */
  typedef enum
  {
  	PM_INIT,					/* postmaster starting */
  	PM_STARTUP,					/* waiting for startup subprocess */
+ 	PM_RECOVERY,				/* in recovery mode */
+ 	PM_RECOVERY_CONSISTENT,		/* consistent recovery mode */
  	PM_RUN,						/* normal "database is alive" state */
  	PM_WAIT_BACKUP,				/* waiting for online backup mode to end */
  	PM_WAIT_BACKENDS,			/* waiting for live backends to exit */
***************
*** 1302,1308 **** ServerLoop(void)
  		 * state that prevents it, start one.  It doesn't matter if this
  		 * fails, we'll just try again later.
  		 */
! 		if (BgWriterPID == 0 && pmState == PM_RUN)
  			BgWriterPID = StartBackgroundWriter();
  
  		/*
--- 1328,1336 ----
  		 * state that prevents it, start one.  It doesn't matter if this
  		 * fails, we'll just try again later.
  		 */
! 		if (BgWriterPID == 0 &&
! 			(pmState == PM_RUN || pmState == PM_RECOVERY || 
! 			 pmState == PM_RECOVERY_CONSISTENT))
  			BgWriterPID = StartBackgroundWriter();
  
  		/*
***************
*** 1752,1758 **** canAcceptConnections(void)
  			return CAC_WAITBACKUP;	/* allow superusers only */
  		if (Shutdown > NoShutdown)
  			return CAC_SHUTDOWN;	/* shutdown is pending */
! 		if (pmState == PM_STARTUP && !FatalError)
  			return CAC_STARTUP; /* normal startup */
  		return CAC_RECOVERY;	/* else must be crash recovery */
  	}
--- 1780,1789 ----
  			return CAC_WAITBACKUP;	/* allow superusers only */
  		if (Shutdown > NoShutdown)
  			return CAC_SHUTDOWN;	/* shutdown is pending */
! 		if (!FatalError &&
! 			(pmState == PM_STARTUP ||
! 			 pmState == PM_RECOVERY ||
! 			 pmState == PM_RECOVERY_CONSISTENT))
  			return CAC_STARTUP; /* normal startup */
  		return CAC_RECOVERY;	/* else must be crash recovery */
  	}
***************
*** 1982,1988 **** pmdie(SIGNAL_ARGS)
  			ereport(LOG,
  					(errmsg("received smart shutdown request")));
  
! 			if (pmState == PM_RUN)
  			{
  				/* autovacuum workers are told to shut down immediately */
  				SignalAutovacWorkers(SIGTERM);
--- 2013,2020 ----
  			ereport(LOG,
  					(errmsg("received smart shutdown request")));
  
! 			if (pmState == PM_RUN || pmState == PM_RECOVERY ||
! 				pmState == PM_RECOVERY_CONSISTENT)
  			{
  				/* autovacuum workers are told to shut down immediately */
  				SignalAutovacWorkers(SIGTERM);
***************
*** 2019,2025 **** pmdie(SIGNAL_ARGS)
  
  			if (StartupPID != 0)
  				signal_child(StartupPID, SIGTERM);
! 			if (pmState == PM_RUN || pmState == PM_WAIT_BACKUP)
  			{
  				ereport(LOG,
  						(errmsg("aborting any active transactions")));
--- 2051,2064 ----
  
  			if (StartupPID != 0)
  				signal_child(StartupPID, SIGTERM);
! 			if (pmState == PM_RECOVERY)
! 			{
! 				/* only bgwriter is active in this state */
! 				pmState = PM_WAIT_BACKENDS;
! 			}
! 			if (pmState == PM_RUN ||
! 				pmState == PM_WAIT_BACKUP ||
! 				pmState == PM_RECOVERY_CONSISTENT)
  			{
  				ereport(LOG,
  						(errmsg("aborting any active transactions")));
***************
*** 2115,2125 **** reaper(SIGNAL_ARGS)
  		 */
  		if (pid == StartupPID)
  		{
  			StartupPID = 0;
- 			Assert(pmState == PM_STARTUP);
  
! 			/* FATAL exit of startup is treated as catastrophic */
! 			if (!EXIT_STATUS_0(exitstatus))
  			{
  				LogChildExit(LOG, _("startup process"),
  							 pid, exitstatus);
--- 2154,2177 ----
  		 */
  		if (pid == StartupPID)
  		{
+ 			bool recoveryCompleted;
+ 
  			StartupPID = 0;
  
! 			/*
! 			 * Check if the startup process completed recovery before exiting
! 			 */
! 			if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_COMPLETED))
! 				recoveryCompleted = true;
! 			else
! 				recoveryCompleted = false;
! 
! 			/*
! 			 * Unexpected exit of startup process (including FATAL exit)
! 			 * during PM_STARTUP is treated as catastrophic. There is no
! 			 * other processes running yet, so we can just exit.
! 			 */
! 			if (pmState == PM_STARTUP && !recoveryCompleted)
  			{
  				LogChildExit(LOG, _("startup process"),
  							 pid, exitstatus);
***************
*** 2127,2141 **** reaper(SIGNAL_ARGS)
  				(errmsg("aborting startup due to startup process failure")));
  				ExitPostmaster(1);
  			}
- 
  			/*
! 			 * Startup succeeded - we are done with system startup or
! 			 * recovery.
  			 */
! 			FatalError = false;
! 
  			/*
! 			 * Go to shutdown mode if a shutdown request was pending.
  			 */
  			if (Shutdown > NoShutdown)
  			{
--- 2179,2199 ----
  				(errmsg("aborting startup due to startup process failure")));
  				ExitPostmaster(1);
  			}
  			/*
! 			 * Any unexpected exit (including FATAL exit) of the startup
! 			 * process is treated as a crash, except that we don't want
! 			 * to reinitialize.
  			 */
! 			if (!EXIT_STATUS_0(exitstatus))
! 			{
! 				RecoveryError = true;
! 				HandleChildCrash(pid, exitstatus,
! 								 _("startup process"));
! 				continue;
! 			}
  			/*
! 			 * Startup process exited in response to a shutdown request (or
! 			 * it finished normally regardless of the shutdown request).
  			 */
  			if (Shutdown > NoShutdown)
  			{
***************
*** 2143,2151 **** reaper(SIGNAL_ARGS)
  				/* PostmasterStateMachine logic does the rest */
  				continue;
  			}
  
  			/*
! 			 * Otherwise, commence normal operations.
  			 */
  			pmState = PM_RUN;
  
--- 2201,2221 ----
  				/* PostmasterStateMachine logic does the rest */
  				continue;
  			}
+ 			/*
+ 			 * Startup process exited normally, but didn't finish recovery.
+ 			 * This can happen if someone else than postmaster kills the
+ 			 * startup process with SIGTERM. Treat it like a crash.
+ 			 */
+ 			if (!recoveryCompleted)
+ 			{
+ 				RecoveryError = true;
+ 				HandleChildCrash(pid, exitstatus,
+ 								 _("startup process"));
+ 				continue;
+ 			}
  
  			/*
! 			 * Startup succeeded, commence normal operations
  			 */
  			pmState = PM_RUN;
  
***************
*** 2157,2167 **** reaper(SIGNAL_ARGS)
  			load_role();
  
  			/*
! 			 * Crank up the background writer.	It doesn't matter if this
! 			 * fails, we'll just try again later.
  			 */
! 			Assert(BgWriterPID == 0);
! 			BgWriterPID = StartBackgroundWriter();
  
  			/*
  			 * Likewise, start other special children as needed.  In a restart
--- 2227,2238 ----
  			load_role();
  
  			/*
! 			 * Crank up the background writer, if we didn't do that already
! 			 * when we entered consistent recovery phase.  It doesn't matter
! 			 * if this fails, we'll just try again later.
  			 */
! 			if (BgWriterPID == 0)
! 				BgWriterPID = StartBackgroundWriter();
  
  			/*
  			 * Likewise, start other special children as needed.  In a restart
***************
*** 2178,2186 **** reaper(SIGNAL_ARGS)
  
  			/* at this point we are really open for business */
  			ereport(LOG,
! 				 (errmsg("database system is ready to accept connections")));
! 
! 			continue;
  		}
  
  		/*
--- 2249,2255 ----
  
  			/* at this point we are really open for business */
  			ereport(LOG,
! 				(errmsg("database system is ready to accept connections")));
  		}
  
  		/*
***************
*** 2443,2448 **** HandleChildCrash(int pid, int exitstatus, const char *procname)
--- 2512,2529 ----
  		}
  	}
  
+ 	/* Take care of the startup process too */
+ 	if (pid == StartupPID)
+ 		StartupPID = 0;
+ 	else if (StartupPID != 0 && !FatalError)
+ 	{
+ 		ereport(DEBUG2,
+ 				(errmsg_internal("sending %s to process %d",
+ 								 (SendStop ? "SIGSTOP" : "SIGQUIT"),
+ 								 (int) StartupPID)));
+ 		signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
+ 	}
+ 
  	/* Take care of the bgwriter too */
  	if (pid == BgWriterPID)
  		BgWriterPID = 0;
***************
*** 2514,2520 **** HandleChildCrash(int pid, int exitstatus, const char *procname)
  
  	FatalError = true;
  	/* We now transit into a state of waiting for children to die */
! 	if (pmState == PM_RUN ||
  		pmState == PM_WAIT_BACKUP ||
  		pmState == PM_SHUTDOWN)
  		pmState = PM_WAIT_BACKENDS;
--- 2595,2603 ----
  
  	FatalError = true;
  	/* We now transit into a state of waiting for children to die */
! 	if (pmState == PM_RECOVERY ||
! 		pmState == PM_RECOVERY_CONSISTENT ||
! 		pmState == PM_RUN ||
  		pmState == PM_WAIT_BACKUP ||
  		pmState == PM_SHUTDOWN)
  		pmState = PM_WAIT_BACKENDS;
***************
*** 2723,2728 **** PostmasterStateMachine(void)
--- 2806,2820 ----
  	}
  
  	/*
+ 	 * If recovery failed, wait for all non-syslogger children to exit,
+ 	 * and then exit postmaster. We don't try to reinitialize when recovery
+ 	 * fails, because more than likely it will just fail again and we will
+ 	 * keep trying forever.
+ 	 */
+ 	if (RecoveryError && pmState == PM_NO_CHILDREN)
+ 		ExitPostmaster(1);		
+ 
+ 	/*
  	 * If we need to recover from a crash, wait for all non-syslogger
  	 * children to exit, then reset shmem and StartupDataBase.
  	 */
***************
*** 3847,3852 **** sigusr1_handler(SIGNAL_ARGS)
--- 3939,3988 ----
  
  	PG_SETMASK(&BlockSig);
  
+ 	/*
+ 	 * RECOVERY_STARTED and RECOVERY_CONSISTENT signals are ignored in
+ 	 * unexpected states. If the startup process quickly starts up, completes
+ 	 * recovery, exits, we might process the death of the startup process
+ 	 * first. We don't want to go back to recovery in that case.
+ 	 */
+ 	if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) &&
+ 		pmState == PM_STARTUP)
+ 	{
+ 		/* WAL redo has started. We're out of reinitialization. */
+ 		FatalError = false;
+ 
+ 		/*
+ 		 * Crank up the background writer.	It doesn't matter if this
+ 		 * fails, we'll just try again later.
+ 		 */
+ 		Assert(BgWriterPID == 0);
+ 		BgWriterPID = StartBackgroundWriter();
+ 
+ 		pmState = PM_RECOVERY;
+ 	}
+ 	if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT) &&
+ 		pmState == PM_RECOVERY)
+ 	{
+ 		/*
+ 		 * Load the flat authorization file into postmaster's cache. The
+ 		 * startup process won't have recomputed this from the database yet,
+ 		 * so we it may change following recovery. 
+ 		 */
+ 		load_role();
+ 
+ 		/*
+ 		 * Likewise, start other special children as needed.
+ 		 */
+ 		Assert(PgStatPID == 0);
+ 		PgStatPID = pgstat_start();
+ 
+ 		/* XXX at this point we could accept read-only connections */
+ 		ereport(DEBUG1,
+ 				(errmsg("database system is in consistent recovery mode")));
+ 
+ 		pmState = PM_RECOVERY_CONSISTENT;
+ 	}
+ 
  	if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
  	{
  		/*