Hot Standby and deadlock detection

Started by Simon Riggsalmost 16 years ago6 messages
#1Simon Riggs
simon@2ndQuadrant.com
1 attachment(s)

Greg Stark has requested that I re-add max_standby_delay = -1.
I deferred that in favour of relation-specific conflict resolution,
though that seems too major a change from comments received.

As discussed in various other posts, in order to re-add the -1 option we
need to add deadlock detection. I woke up today with a simplifying
assumption and have worked out a solution, the easy parts of which I
have committed earlier.

Part #2 is to make Startup process do deadlock detection. I attach a WIP
patch for comments since signal handling has been a much-discussed area
in recent weeks.

Normal deadlock detection waits for deadlock_timeout before doing the
detection. That is a simple performance tuning mechanism which I think
is probably unnecessary with hot standby, at least in the first
instance.

The way this would work is if Startup waits on a buffer pin we
immediately send out a request to all backends to cancel themselves if
they are holding the buffer pin required && waiting on a lock. We then
sleep until max_standby_delay. When max_standby_delay = -1 we only sleep
until deadlock timeout and then check (on the Startup process).

That keeps the signal handler code simple and reduces the number of test
cases required to confirm everything is solid.

This patch and the last commit together present everything we need to
reenable max_standby_delay = -1, so that change is included here also.

?

--
Simon Riggs www.2ndQuadrant.com

Attachments:

startup_deadlock_check.patchtext/x-patch; charset=UTF-8; name=startup_deadlock_check.patchDownload
*** a/src/backend/storage/ipc/procsignal.c
--- b/src/backend/storage/ipc/procsignal.c
***************
*** 272,277 **** procsignal_sigusr1_handler(SIGNAL_ARGS)
--- 272,280 ----
  	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT))
  		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT);
  
+ 	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK))
+ 		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+ 
  	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
  		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
  
*** a/src/backend/storage/ipc/standby.c
--- b/src/backend/storage/ipc/standby.c
***************
*** 127,132 **** WaitExceedsMaxStandbyDelay(void)
--- 127,135 ----
  	long	delay_secs;
  	int		delay_usecs;
  
+ 	if (MaxStandbyDelay == -1)
+ 		return false;
+ 
  	/* Are we past max_standby_delay? */
  	TimestampDifference(GetLatestXLogTime(), GetCurrentTimestamp(),
  						&delay_secs, &delay_usecs);
***************
*** 372,378 **** ResolveRecoveryConflictWithBufferPin(void)
  	 * Signal immediately or set alarm for later.
  	 */
  	if (MaxStandbyDelay == 0)
! 		SendRecoveryConflictWithBufferPin();
  	else
  	{
  		TimestampTz now;
--- 375,391 ----
  	 * Signal immediately or set alarm for later.
  	 */
  	if (MaxStandbyDelay == 0)
! 		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
! 	else if (MaxStandbyDelay == -1)
! 	{
! 		/*
! 		 * Set the deadlock timer only, we don't timeout in any other case
! 		 */
! 		if (enable_standby_deadlock_alarm())
! 			sig_alarm_enabled = true;
! 		else
! 			elog(FATAL, "could not set timer for process wakeup");
! 	}
  	else
  	{
  		TimestampTz now;
***************
*** 386,392 **** ResolveRecoveryConflictWithBufferPin(void)
  							&standby_delay_secs, &standby_delay_usecs);
  
  		if (standby_delay_secs >= MaxStandbyDelay)
! 			SendRecoveryConflictWithBufferPin();
  		else
  		{
  			TimestampTz fin_time;			/* Expected wake-up time by timer */
--- 399,405 ----
  							&standby_delay_secs, &standby_delay_usecs);
  
  		if (standby_delay_secs >= MaxStandbyDelay)
! 			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
  		else
  		{
  			TimestampTz fin_time;			/* Expected wake-up time by timer */
***************
*** 394,399 **** ResolveRecoveryConflictWithBufferPin(void)
--- 407,419 ----
  			int		timer_delay_usecs = 0;
  
  			/*
+ 			 * Send out a request to check for buffer pin deadlocks before we wait.
+ 			 * This is fairly cheap, so no need to wait for deadlock timeout before
+ 			 * trying to send it out.
+ 			 */
+ 			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+ 
+ 			/*
  			 * How much longer we should wait?
  			 */
  			timer_delay_secs = MaxStandbyDelay - standby_delay_secs;
***************
*** 435,449 **** ResolveRecoveryConflictWithBufferPin(void)
  }
  
  void
! SendRecoveryConflictWithBufferPin(void)
  {
  	/*
  	 * We send signal to all backends to ask them if they are holding
  	 * the buffer pin which is delaying the Startup process. We must
  	 * not set the conflict flag yet, since most backends will be innocent.
  	 * Let the SIGUSR1 handling in each backend decide their own fate.
  	 */
! 	CancelDBBackends(InvalidOid, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, false);
  }
  
  /*
--- 455,472 ----
  }
  
  void
! SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
  {
+ 	Assert(reason == PROCSIG_RECOVERY_CONFLICT_BUFFERPIN ||
+ 		   reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+ 
  	/*
  	 * We send signal to all backends to ask them if they are holding
  	 * the buffer pin which is delaying the Startup process. We must
  	 * not set the conflict flag yet, since most backends will be innocent.
  	 * Let the SIGUSR1 handling in each backend decide their own fate.
  	 */
! 	CancelDBBackends(InvalidOid, reason, false);
  }
  
  /*
*** a/src/backend/storage/lmgr/proc.c
--- b/src/backend/storage/lmgr/proc.c
***************
*** 45,50 ****
--- 45,51 ----
  #include "storage/pmsignal.h"
  #include "storage/proc.h"
  #include "storage/procarray.h"
+ #include "storage/procsignal.h"
  #include "storage/spin.h"
  
  
***************
*** 77,82 **** static volatile bool standby_timeout_active = false;
--- 78,84 ----
  static volatile bool statement_timeout_active = false;
  static volatile bool deadlock_timeout_active = false;
  static volatile DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
+ static volatile ProcSignalReason action_at_standby_timeout;
  volatile bool cancel_from_timeout = false;
  
  /* timeout_start_time is set when log_lock_waits is true */
***************
*** 556,561 **** HaveNFreeProcs(int n)
--- 558,572 ----
  	return (n <= 0);
  }
  
+ bool
+ IsWaitingForLock(void)
+ {
+ 	if (lockAwaited == NULL)
+ 		return false;
+ 
+ 	return true;
+ }
+ 
  /*
   * Cancel any pending wait for lock, when aborting a transaction.
   *
***************
*** 1618,1624 **** enable_standby_sig_alarm(long delay_s, int delay_us, TimestampTz fin_time)
  	Assert(delay_s >= 0 && delay_us >= 0);
  
  	statement_fin_time = fin_time;
! 
  	standby_timeout_active = true;
  
  	MemSet(&timeval, 0, sizeof(struct itimerval));
--- 1629,1635 ----
  	Assert(delay_s >= 0 && delay_us >= 0);
  
  	statement_fin_time = fin_time;
! 	action_at_standby_timeout = PROCSIG_RECOVERY_CONFLICT_BUFFERPIN;
  	standby_timeout_active = true;
  
  	MemSet(&timeval, 0, sizeof(struct itimerval));
***************
*** 1630,1635 **** enable_standby_sig_alarm(long delay_s, int delay_us, TimestampTz fin_time)
--- 1641,1664 ----
  }
  
  bool
+ enable_standby_deadlock_alarm(void)
+ {
+ 	struct itimerval timeval;
+ 
+ 	statement_fin_time = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
+ 													(DeadlockTimeout * 1000));
+ 	action_at_standby_timeout = PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK;
+ 	standby_timeout_active = true;
+ 
+ 	MemSet(&timeval, 0, sizeof(struct itimerval));
+ 	timeval.it_value.tv_sec = DeadlockTimeout;
+ 	timeval.it_value.tv_usec = 0;
+ 	if (setitimer(ITIMER_REAL, &timeval, NULL))
+ 		return false;
+ 	return true;
+ }
+ 
+ bool
  disable_standby_sig_alarm(void)
  {
  	/*
***************
*** 1671,1677 **** CheckStandbyTimeout(void)
  	now = GetCurrentTimestamp();
  
  	if (now >= statement_fin_time)
! 		SendRecoveryConflictWithBufferPin();
  	else
  	{
  		/* Not time yet, so (re)schedule the interrupt */
--- 1700,1706 ----
  	now = GetCurrentTimestamp();
  
  	if (now >= statement_fin_time)
! 		SendRecoveryConflictWithBufferPin(action_at_standby_timeout);
  	else
  	{
  		/* Not time yet, so (re)schedule the interrupt */
*** a/src/backend/tcop/postgres.c
--- b/src/backend/tcop/postgres.c
***************
*** 2278,2283 **** errdetail_recovery_conflict(void)
--- 2278,2286 ----
  		case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
  				errdetail("User query might have needed to see row versions that must be removed.");
  				break;
+ 		case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ 				errdetail("User transaction caused buffer deadlock with recovery.");
+ 				break;
  		case PROCSIG_RECOVERY_CONFLICT_DATABASE:
  				errdetail("User was connected to a database that must be dropped.");
  				break;
***************
*** 2754,2759 **** RecoveryConflictInterrupt(ProcSignalReason reason)
--- 2757,2771 ----
  		RecoveryConflictReason = reason;
  		switch (reason)
  		{
+ 			case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ 					/*
+ 					 * If we aren't waiting for a lock we can never deadlock.
+ 					 */
+ 					if (!IsWaitingForLock())
+ 						return;
+ 
+ 					/* Intentional drop through to check wait for pin */
+ 
  			case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
  					/*
  					 * If we aren't blocking the Startup process there is
***************
*** 2819,2824 **** RecoveryConflictInterrupt(ProcSignalReason reason)
--- 2831,2838 ----
  					elog(FATAL, "Unknown conflict mode");
  		}
  
+ 		Assert(RecoveryConflictPending && (QueryCancelPending || ProcDiePending));
+ 
  		/*
  		 * If it's safe to interrupt, and we're waiting for input or a lock,
  		 * service the interrupt immediately
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 1392,1398 **** static struct config_int ConfigureNamesInt[] =
  			NULL
  		},
  		&MaxStandbyDelay,
! 		30, 0, INT_MAX, NULL, NULL
  	},
  
  	{
--- 1392,1398 ----
  			NULL
  		},
  		&MaxStandbyDelay,
! 		30, -1, INT_MAX, NULL, NULL
  	},
  
  	{
*** a/src/include/storage/proc.h
--- b/src/include/storage/proc.h
***************
*** 189,194 **** extern void ProcQueueInit(PROC_QUEUE *queue);
--- 189,195 ----
  extern int	ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
  extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
  extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
+ extern bool IsWaitingForLock(void);
  extern void LockWaitCancel(void);
  
  extern void ProcWaitForSignal(void);
***************
*** 199,204 **** extern bool disable_sig_alarm(bool is_statement_timeout);
--- 200,206 ----
  extern void handle_sig_alarm(SIGNAL_ARGS);
  
  extern bool enable_standby_sig_alarm(long delay_s, int delay_us, TimestampTz fin_time);
+ extern bool enable_standby_deadlock_alarm(void);
  extern bool disable_standby_sig_alarm(void);
  extern void handle_standby_sig_alarm(SIGNAL_ARGS);
  
*** a/src/include/storage/procsignal.h
--- b/src/include/storage/procsignal.h
***************
*** 38,43 **** typedef enum
--- 38,44 ----
  	PROCSIG_RECOVERY_CONFLICT_LOCK,
  	PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
  	PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
+ 	PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
  
  	NUM_PROCSIGNALS				/* Must be last! */
  } ProcSignalReason;
*** a/src/include/storage/standby.h
--- b/src/include/storage/standby.h
***************
*** 16,21 ****
--- 16,22 ----
  
  #include "access/xlog.h"
  #include "storage/lock.h"
+ #include "storage/procsignal.h"
  #include "storage/relfilenode.h"
  
  extern int	vacuum_defer_cleanup_age;
***************
*** 30,36 **** extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
  extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
  
  extern void ResolveRecoveryConflictWithBufferPin(void);
! extern void SendRecoveryConflictWithBufferPin(void);
  extern void CheckRecoveryConflictDeadlock(LWLockId partitionLock);
  
  /*
--- 31,37 ----
  extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
  
  extern void ResolveRecoveryConflictWithBufferPin(void);
! extern void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
  extern void CheckRecoveryConflictDeadlock(LWLockId partitionLock);
  
  /*
#2Heikki Linnakangas
heikki.linnakangas@enterprisedb.com
In reply to: Simon Riggs (#1)
Re: Hot Standby and deadlock detection

Simon Riggs wrote:

The way this would work is if Startup waits on a buffer pin we
immediately send out a request to all backends to cancel themselves if
they are holding the buffer pin required && waiting on a lock. We then
sleep until max_standby_delay. When max_standby_delay = -1 we only sleep
until deadlock timeout and then check (on the Startup process).

Should wake up to check for deadlocks after deadlock_timeout also when
max_standby_delay > deadlock_timeout. max_standby_delay could be hours -
we want to detect a deadlock sooner than that.

Generally speaking, max_standby_delay==-1 codepath shouldn't be any
different from the max_standby_delay>0 codepath.

--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com

#3Simon Riggs
simon@2ndQuadrant.com
In reply to: Heikki Linnakangas (#2)
Re: Hot Standby and deadlock detection

On Mon, 2010-02-01 at 09:40 +0200, Heikki Linnakangas wrote:

Simon Riggs wrote:

The way this would work is if Startup waits on a buffer pin we
immediately send out a request to all backends to cancel themselves if
they are holding the buffer pin required && waiting on a lock. We then
sleep until max_standby_delay. When max_standby_delay = -1 we only sleep
until deadlock timeout and then check (on the Startup process).

Should wake up to check for deadlocks after deadlock_timeout also when
max_standby_delay > deadlock_timeout. max_standby_delay could be hours -
we want to detect a deadlock sooner than that.

The patch does detect deadlocks sooner that that - "immediately", as
described above.

The simplified logic is

if (MaxStandbyDelay == 0)
immediate time out any buffer pin holders
else if (MaxStandbyDelay == -1)
wait for deadlock_timeout then check for deadlockers
else if (standby_delay > MaxStandbyDelay)
immediate time out on buffer pin
else
{
immediate(*) check for deadlockers
wait for remainder of time then time out any buffer pin holders
}

(*) Doing it this way makes the logic sigalarm handler code easier/more
bug free. The only difference is a potential performance gain from not
running deadlock detection early.

--
Simon Riggs www.2ndQuadrant.com

#4Heikki Linnakangas
heikki.linnakangas@enterprisedb.com
In reply to: Simon Riggs (#3)
Re: Hot Standby and deadlock detection

Simon Riggs wrote:

On Mon, 2010-02-01 at 09:40 +0200, Heikki Linnakangas wrote:

Simon Riggs wrote:

The way this would work is if Startup waits on a buffer pin we
immediately send out a request to all backends to cancel themselves if
they are holding the buffer pin required && waiting on a lock. We then
sleep until max_standby_delay. When max_standby_delay = -1 we only sleep
until deadlock timeout and then check (on the Startup process).

Should wake up to check for deadlocks after deadlock_timeout also when
max_standby_delay > deadlock_timeout. max_standby_delay could be hours -
we want to detect a deadlock sooner than that.

The patch does detect deadlocks sooner that that - "immediately", as
described above.

Umm, so why not run the deadlock check immediately in
max_standby_delay=-1 case as well? Why is that case handled differently
from max_standby_delay>0 case?

--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com

#5Simon Riggs
simon@2ndQuadrant.com
In reply to: Heikki Linnakangas (#4)
Re: Hot Standby and deadlock detection

On Mon, 2010-02-01 at 17:50 +0200, Heikki Linnakangas wrote:

Simon Riggs wrote:

On Mon, 2010-02-01 at 09:40 +0200, Heikki Linnakangas wrote:

Simon Riggs wrote:

The way this would work is if Startup waits on a buffer pin we
immediately send out a request to all backends to cancel themselves if
they are holding the buffer pin required && waiting on a lock. We then
sleep until max_standby_delay. When max_standby_delay = -1 we only sleep
until deadlock timeout and then check (on the Startup process).

Should wake up to check for deadlocks after deadlock_timeout also when
max_standby_delay > deadlock_timeout. max_standby_delay could be hours -
we want to detect a deadlock sooner than that.

The patch does detect deadlocks sooner that that - "immediately", as
described above.

Umm, so why not run the deadlock check immediately in
max_standby_delay=-1 case as well? Why is that case handled differently
from max_standby_delay>0 case?

Cos the code to do that is easy.

I'll do the deadlock check immediately and make it even easier.

--
Simon Riggs www.2ndQuadrant.com

#6Simon Riggs
simon@2ndQuadrant.com
In reply to: Heikki Linnakangas (#4)
1 attachment(s)
Re: Hot Standby and deadlock detection

On Mon, 2010-02-01 at 17:50 +0200, Heikki Linnakangas wrote:

Umm, so why not run the deadlock check immediately in
max_standby_delay=-1 case as well? Why is that case handled differently
from max_standby_delay>0 case?

Done, tested, working.

Will commit tomorrow if no further questions or comments.

--
Simon Riggs www.2ndQuadrant.com

Attachments:

startup_deadlock_check.v2.patchtext/x-patch; charset=UTF-8; name=startup_deadlock_check.v2.patchDownload
*** a/src/backend/storage/ipc/procsignal.c
--- b/src/backend/storage/ipc/procsignal.c
***************
*** 272,277 **** procsignal_sigusr1_handler(SIGNAL_ARGS)
--- 272,280 ----
  	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT))
  		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT);
  
+ 	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK))
+ 		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+ 
  	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
  		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
  
*** a/src/backend/storage/ipc/standby.c
--- b/src/backend/storage/ipc/standby.c
***************
*** 127,132 **** WaitExceedsMaxStandbyDelay(void)
--- 127,135 ----
  	long	delay_secs;
  	int		delay_usecs;
  
+ 	if (MaxStandbyDelay == -1)
+ 		return false;
+ 
  	/* Are we past max_standby_delay? */
  	TimestampDifference(GetLatestXLogTime(), GetCurrentTimestamp(),
  						&delay_secs, &delay_usecs);
***************
*** 351,365 **** ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid)
   * they hold one of the buffer pins that is blocking Startup process. If so,
   * backends will take an appropriate error action, ERROR or FATAL.
   *
!  * A secondary purpose of this is to avoid deadlocks that might occur between
!  * the Startup process and lock waiters. Deadlocks occur because if queries
   * wait on a lock, that must be behind an AccessExclusiveLock, which can only
!  * be clared if the Startup process replays a transaction completion record.
!  * If Startup process is waiting then that is a deadlock. If we allowed a
!  * setting of max_standby_delay that meant "wait forever" we would then need
!  * special code to protect against deadlock. Such deadlocks are rare, so the
!  * code would be almost certainly buggy, so we avoid both long waits and
!  * deadlocks using the same mechanism.
   */
  void
  ResolveRecoveryConflictWithBufferPin(void)
--- 354,368 ----
   * they hold one of the buffer pins that is blocking Startup process. If so,
   * backends will take an appropriate error action, ERROR or FATAL.
   *
!  * We also check for deadlocks before we wait, though applications that cause
!  * these will be extremely rare.  Deadlocks occur because if queries
   * wait on a lock, that must be behind an AccessExclusiveLock, which can only
!  * be cleared if the Startup process replays a transaction completion record.
!  * If Startup process is also waiting then that is a deadlock. The deadlock
!  * can occur if the query is waiting and then the Startup sleeps, or if
!  * Startup is sleeping and the the query waits on a lock. We protect against
!  * only the former sequence here, the latter sequence is checked prior to
!  * the query sleeping, in CheckRecoveryConflictDeadlock().
   */
  void
  ResolveRecoveryConflictWithBufferPin(void)
***************
*** 368,378 **** ResolveRecoveryConflictWithBufferPin(void)
  
  	Assert(InHotStandby);
  
- 	/*
- 	 * Signal immediately or set alarm for later.
- 	 */
  	if (MaxStandbyDelay == 0)
! 		SendRecoveryConflictWithBufferPin();
  	else
  	{
  		TimestampTz now;
--- 371,393 ----
  
  	Assert(InHotStandby);
  
  	if (MaxStandbyDelay == 0)
! 	{
! 		/*
! 		 * We don't want to wait, so just tell everybody holding the pin to 
! 		 * get out of town.
! 		 */
! 		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
! 	}
! 	else if (MaxStandbyDelay == -1)
! 	{
! 		/*
! 		 * Send out a request to check for buffer pin deadlocks before we wait.
! 		 * This is fairly cheap, so no need to wait for deadlock timeout before
! 		 * trying to send it out.
! 		 */
! 		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
! 	}
  	else
  	{
  		TimestampTz now;
***************
*** 386,392 **** ResolveRecoveryConflictWithBufferPin(void)
  							&standby_delay_secs, &standby_delay_usecs);
  
  		if (standby_delay_secs >= MaxStandbyDelay)
! 			SendRecoveryConflictWithBufferPin();
  		else
  		{
  			TimestampTz fin_time;			/* Expected wake-up time by timer */
--- 401,412 ----
  							&standby_delay_secs, &standby_delay_usecs);
  
  		if (standby_delay_secs >= MaxStandbyDelay)
! 		{
! 			/*
! 			 * We're already behind, so clear a path as quickly as possible.
! 			 */
! 			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
! 		}
  		else
  		{
  			TimestampTz fin_time;			/* Expected wake-up time by timer */
***************
*** 394,399 **** ResolveRecoveryConflictWithBufferPin(void)
--- 414,426 ----
  			int		timer_delay_usecs = 0;
  
  			/*
+ 			 * Send out a request to check for buffer pin deadlocks before we wait.
+ 			 * This is fairly cheap, so no need to wait for deadlock timeout before
+ 			 * trying to send it out.
+ 			 */
+ 			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+ 
+ 			/*
  			 * How much longer we should wait?
  			 */
  			timer_delay_secs = MaxStandbyDelay - standby_delay_secs;
***************
*** 435,449 **** ResolveRecoveryConflictWithBufferPin(void)
  }
  
  void
! SendRecoveryConflictWithBufferPin(void)
  {
  	/*
  	 * We send signal to all backends to ask them if they are holding
  	 * the buffer pin which is delaying the Startup process. We must
  	 * not set the conflict flag yet, since most backends will be innocent.
  	 * Let the SIGUSR1 handling in each backend decide their own fate.
  	 */
! 	CancelDBBackends(InvalidOid, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, false);
  }
  
  /*
--- 462,479 ----
  }
  
  void
! SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
  {
+ 	Assert(reason == PROCSIG_RECOVERY_CONFLICT_BUFFERPIN ||
+ 		   reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+ 
  	/*
  	 * We send signal to all backends to ask them if they are holding
  	 * the buffer pin which is delaying the Startup process. We must
  	 * not set the conflict flag yet, since most backends will be innocent.
  	 * Let the SIGUSR1 handling in each backend decide their own fate.
  	 */
! 	CancelDBBackends(InvalidOid, reason, false);
  }
  
  /*
*** a/src/backend/storage/lmgr/proc.c
--- b/src/backend/storage/lmgr/proc.c
***************
*** 45,50 ****
--- 45,51 ----
  #include "storage/pmsignal.h"
  #include "storage/proc.h"
  #include "storage/procarray.h"
+ #include "storage/procsignal.h"
  #include "storage/spin.h"
  
  
***************
*** 556,561 **** HaveNFreeProcs(int n)
--- 557,571 ----
  	return (n <= 0);
  }
  
+ bool
+ IsWaitingForLock(void)
+ {
+ 	if (lockAwaited == NULL)
+ 		return false;
+ 
+ 	return true;
+ }
+ 
  /*
   * Cancel any pending wait for lock, when aborting a transaction.
   *
***************
*** 1671,1677 **** CheckStandbyTimeout(void)
  	now = GetCurrentTimestamp();
  
  	if (now >= statement_fin_time)
! 		SendRecoveryConflictWithBufferPin();
  	else
  	{
  		/* Not time yet, so (re)schedule the interrupt */
--- 1681,1687 ----
  	now = GetCurrentTimestamp();
  
  	if (now >= statement_fin_time)
! 		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
  	else
  	{
  		/* Not time yet, so (re)schedule the interrupt */
*** a/src/backend/tcop/postgres.c
--- b/src/backend/tcop/postgres.c
***************
*** 2278,2283 **** errdetail_recovery_conflict(void)
--- 2278,2286 ----
  		case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
  				errdetail("User query might have needed to see row versions that must be removed.");
  				break;
+ 		case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ 				errdetail("User transaction caused buffer deadlock with recovery.");
+ 				break;
  		case PROCSIG_RECOVERY_CONFLICT_DATABASE:
  				errdetail("User was connected to a database that must be dropped.");
  				break;
***************
*** 2754,2759 **** RecoveryConflictInterrupt(ProcSignalReason reason)
--- 2757,2771 ----
  		RecoveryConflictReason = reason;
  		switch (reason)
  		{
+ 			case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ 					/*
+ 					 * If we aren't waiting for a lock we can never deadlock.
+ 					 */
+ 					if (!IsWaitingForLock())
+ 						return;
+ 
+ 					/* Intentional drop through to check wait for pin */
+ 
  			case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
  					/*
  					 * If we aren't blocking the Startup process there is
***************
*** 2819,2824 **** RecoveryConflictInterrupt(ProcSignalReason reason)
--- 2831,2838 ----
  					elog(FATAL, "Unknown conflict mode");
  		}
  
+ 		Assert(RecoveryConflictPending && (QueryCancelPending || ProcDiePending));
+ 
  		/*
  		 * If it's safe to interrupt, and we're waiting for input or a lock,
  		 * service the interrupt immediately
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 1381,1387 **** static struct config_int ConfigureNamesInt[] =
  			NULL
  		},
  		&MaxStandbyDelay,
! 		30, 0, INT_MAX, NULL, NULL
  	},
  
  	{
--- 1381,1387 ----
  			NULL
  		},
  		&MaxStandbyDelay,
! 		30, -1, INT_MAX, NULL, NULL
  	},
  
  	{
*** a/src/include/storage/proc.h
--- b/src/include/storage/proc.h
***************
*** 189,194 **** extern void ProcQueueInit(PROC_QUEUE *queue);
--- 189,195 ----
  extern int	ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
  extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
  extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
+ extern bool IsWaitingForLock(void);
  extern void LockWaitCancel(void);
  
  extern void ProcWaitForSignal(void);
*** a/src/include/storage/procsignal.h
--- b/src/include/storage/procsignal.h
***************
*** 38,43 **** typedef enum
--- 38,44 ----
  	PROCSIG_RECOVERY_CONFLICT_LOCK,
  	PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
  	PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
+ 	PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
  
  	NUM_PROCSIGNALS				/* Must be last! */
  } ProcSignalReason;
*** a/src/include/storage/standby.h
--- b/src/include/storage/standby.h
***************
*** 16,21 ****
--- 16,22 ----
  
  #include "access/xlog.h"
  #include "storage/lock.h"
+ #include "storage/procsignal.h"
  #include "storage/relfilenode.h"
  
  extern int	vacuum_defer_cleanup_age;
***************
*** 30,36 **** extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
  extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
  
  extern void ResolveRecoveryConflictWithBufferPin(void);
! extern void SendRecoveryConflictWithBufferPin(void);
  extern void CheckRecoveryConflictDeadlock(LWLockId partitionLock);
  
  /*
--- 31,37 ----
  extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
  
  extern void ResolveRecoveryConflictWithBufferPin(void);
! extern void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
  extern void CheckRecoveryConflictDeadlock(LWLockId partitionLock);
  
  /*