From f466fe53e8e64cfa49bf56dbdf5920f9ea4e3562 Mon Sep 17 00:00:00 2001
From: Daniel Farina <daniel@heroku.com>
Date: Thu, 15 Mar 2012 20:46:38 -0700
Subject: [PATCH] Implement race-free sql-originated backend
 cancellation/termination

If promising, this patch requires a few documentation updates in
addendum.

Since 0495aaad8b337642830a4d4e82f8b8c02b27b1be, pg_cancel_backend can
be run on backends that have the same role as the backend
pg_cancel_backend is being invoked from.  Since that time, a
documented caveat exists stating that there was a race whereby a
process death-and-recycle could result in an otherwise unrelated
backend receiving SIGINT.

Now it is desirable for pg_terminate_backend -- which also has the
effect of having the backend exit, and closing the socket -- to also
be usable by non-superusers.  Presuming SIGINT was acceptable to race,
it's not clear that it's acceptable for SIGTERM to race in the same
way, so this patch seeks to try to do something about that.

This patch attempts to close this race condition by targeting query
cancellation/termination against the per-backend-startup unique
BackendId to unambiguously identify the session rather than a PID.
This makes the SQL function act more like how cancellation keys work
already (perhaps these paths can be converged somehow).

Signed-off-by: Daniel Farina <daniel@heroku.com>
---
 src/backend/access/transam/twophase.c |    4 +
 src/backend/storage/ipc/procsignal.c  |    3 +
 src/backend/storage/lmgr/proc.c       |    3 +
 src/backend/tcop/postgres.c           |   55 +++++++++++++++++
 src/backend/utils/adt/misc.c          |  104 +++++++++++++++++++-------------
 src/include/miscadmin.h               |    1 +
 src/include/storage/proc.h            |   17 +++++
 src/include/storage/procsignal.h      |    1 +
 8 files changed, 146 insertions(+), 42 deletions(-)

*** a/src/backend/access/transam/twophase.c
--- b/src/backend/access/transam/twophase.c
***************
*** 62,67 ****
--- 62,68 ----
  #include "storage/procarray.h"
  #include "storage/sinvaladt.h"
  #include "storage/smgr.h"
+ #include "storage/spin.h"
  #include "utils/builtins.h"
  #include "utils/memutils.h"
  #include "utils/timestamp.h"
***************
*** 326,331 **** MarkAsPreparing(TransactionId xid, const char *gid,
--- 327,335 ----
  	proc->backendId = InvalidBackendId;
  	proc->databaseId = databaseid;
  	proc->roleId = owner;
+ 	SpinLockInit(&MyProc->adminMutex);
+ 	proc->adminAction = ADMIN_ACTION_NONE;
+ 	proc->adminBackendId = InvalidBackendId;
  	proc->lwWaiting = false;
  	proc->lwWaitMode = 0;
  	proc->lwWaitLink = NULL;
*** a/src/backend/storage/ipc/procsignal.c
--- b/src/backend/storage/ipc/procsignal.c
***************
*** 258,263 **** procsignal_sigusr1_handler(SIGNAL_ARGS)
--- 258,266 ----
  	if (CheckProcSignal(PROCSIG_NOTIFY_INTERRUPT))
  		HandleNotifyInterrupt();
  
+ 	if (CheckProcSignal(PROCSIG_ADMIN_ACTION_INTERRUPT))
+ 		HandleAdminActionInterrupt();
+ 
  	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_DATABASE))
  		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_DATABASE);
  
*** a/src/backend/storage/lmgr/proc.c
--- b/src/backend/storage/lmgr/proc.c
***************
*** 356,361 **** InitProcess(void)
--- 356,364 ----
  	MyProc->backendId = InvalidBackendId;
  	MyProc->databaseId = InvalidOid;
  	MyProc->roleId = InvalidOid;
+ 	SpinLockInit(&MyProc->adminMutex);
+ 	MyProc->adminAction = ADMIN_ACTION_NONE;
+ 	MyProc->adminBackendId = InvalidBackendId;
  	MyPgXact->inCommit = false;
  	MyPgXact->vacuumFlags = 0;
  	/* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */
*** a/src/backend/tcop/postgres.c
--- b/src/backend/tcop/postgres.c
***************
*** 64,69 ****
--- 64,70 ----
  #include "storage/proc.h"
  #include "storage/procsignal.h"
  #include "storage/sinval.h"
+ #include "storage/spin.h"
  #include "tcop/fastpath.h"
  #include "tcop/pquery.h"
  #include "tcop/tcopprot.h"
***************
*** 2783,2788 **** RecoveryConflictInterrupt(ProcSignalReason reason)
--- 2784,2843 ----
  }
  
  /*
+  * HandleAdminActionInterrupt: handle backend administrative actions
+  *
+  * Backend admin actions are secure and unambiguous compared to sending a
+  * signal directly to a process id because in order for the intended function
+  * of the interrupt to be applied a correct BackendId must be applied.
+  */
+ void
+ HandleAdminActionInterrupt(void)
+ {
+ 	int save_errno = errno;
+ 
+ 	if (!proc_exit_inprogress)
+ 	{
+ 		volatile PGPROC	*myVolatileProc = MyProc;
+ 		BEAdminAction	 actionToPerform;
+ 		BackendId		 submittedBackendId;
+ 
+ 		/* Copy out fields that must be coherent in MyProc with haste */
+ 		SpinLockAcquire(&myVolatileProc->adminMutex);
+ 		actionToPerform	   = myVolatileProc->adminAction;
+ 		submittedBackendId = myVolatileProc->adminBackendId;
+ 		SpinLockRelease(&myVolatileProc->adminMutex);
+ 
+ 		/*
+ 		 * Abort if the MyBackendId doesn't match the submitted one, or is
+ 		 * invalid.
+ 		 */
+ 		if (submittedBackendId != MyBackendId ||
+ 			MyBackendId == InvalidBackendId)
+ 			goto finish;
+ 
+ 		/* Perform the administrative action */
+ 		Assert(submittedBackendId == MyBackendId);
+ 		switch (actionToPerform)
+ 		{
+ 			case ADMIN_ACTION_NONE:
+ 				break;
+ 
+ 			case ADMIN_ACTION_CANCEL:
+ 				InterruptPending = true;
+ 				QueryCancelPending = true;
+ 				break;
+ 
+ 			case ADMIN_ACTION_TERMINATE:
+ 				die(SIGTERM);
+ 				break;
+ 		}
+ 	}
+ 
+ finish:
+ 	errno = save_errno;
+ }
+ 
+ /*
   * ProcessInterrupts: out-of-line portion of CHECK_FOR_INTERRUPTS() macro
   *
   * If an interrupt condition is pending, and it's safe to service it,
*** a/src/backend/utils/adt/misc.c
--- b/src/backend/utils/adt/misc.c
***************
*** 32,37 ****
--- 32,38 ----
  #include "storage/pmsignal.h"
  #include "storage/proc.h"
  #include "storage/procarray.h"
+ #include "storage/spin.h"
  #include "utils/lsyscache.h"
  #include "tcop/tcopprot.h"
  #include "utils/builtins.h"
***************
*** 88,147 **** current_query(PG_FUNCTION_ARGS)
  static int
  pg_signal_backend(int pid, int sig)
  {
! 	PGPROC	   *proc;
  
! 	if (!superuser())
  	{
! 		/*
! 		 * Since the user is not superuser, check for matching roles. Trust
! 		 * that BackendPidGetProc will return NULL if the pid isn't valid,
! 		 * even though the check for whether it's a backend process is below.
! 		 * The IsBackendPid check can't be relied on as definitive even if it
! 		 * was first. The process might end between successive checks
! 		 * regardless of their order. There's no way to acquire a lock on an
! 		 * arbitrary process to prevent that. But since so far all the callers
! 		 * of this mechanism involve some request for ending the process
! 		 * anyway, that it might end on its own first is not a problem.
! 		 */
! 		proc = BackendPidGetProc(pid);
  
- 		if (proc == NULL || proc->roleId != GetUserId())
- 			return SIGNAL_BACKEND_NOPERMISSION;
- 	}
- 
- 	if (!IsBackendPid(pid))
- 	{
  		/*
! 		 * This is just a warning so a loop-through-resultset will not abort
! 		 * if one backend terminated on it's own during the run
  		 */
! 		ereport(WARNING,
! 				(errmsg("PID %d is not a PostgreSQL server process", pid)));
! 		return SIGNAL_BACKEND_ERROR;
  	}
  
  	/*
! 	 * Can the process we just validated above end, followed by the pid being
! 	 * recycled for a new process, before reaching here?  Then we'd be trying
! 	 * to kill the wrong thing.  Seems near impossible when sequential pid
! 	 * assignment and wraparound is used.  Perhaps it could happen on a system
! 	 * where pid re-use is randomized.	That race condition possibility seems
! 	 * too unlikely to worry about.
  	 */
  
! 	/* If we have setsid(), signal the backend's whole process group */
! #ifdef HAVE_SETSID
! 	if (kill(-pid, sig))
! #else
! 	if (kill(pid, sig))
! #endif
  	{
! 		/* Again, just a warning to allow loops */
! 		ereport(WARNING,
! 				(errmsg("could not send signal to process %d: %m", pid)));
! 		return SIGNAL_BACKEND_ERROR;
  	}
- 	return SIGNAL_BACKEND_SUCCESS;
  }
  
  /*
--- 89,167 ----
  static int
  pg_signal_backend(int pid, int sig)
  {
! 	volatile PGPROC *proc;
  
! 	/* Guard against unexpected signals */
! 	switch (sig)
  	{
! 		default:
! 			/* Should never happen. */
! 			ereport(FATAL,
! 					(errcode(ERRCODE_INTERNAL_ERROR),
! 					 errmsg("unexpected signal passed to %s",
! 							PG_FUNCNAME_MACRO)));
  
  		/*
! 		 * The following are the only expected signals this function can be
! 		 * called with.
  		 */
! 		case SIGINT:
! 		case SIGTERM:
! 			/* fall-through */
! 			;
  	}
  
  	/*
! 	 * Set another backend's BEAdminAction if the current backend is
! 	 * controlledb by a superuser or a user of the same role of the other
! 	 * backend.
! 	 *
! 	 * Trust that BackendPidGetProc will return NULL if the pid isn't
! 	 * valid, even though the check for whether it's a backend process is
! 	 * below.  The IsBackendPid check can't be relied on as definitive even
! 	 * if it was first. The process might end between successive checks
! 	 * regardless of their order. There's no way to acquire a lock on an
! 	 * arbitrary process to prevent that. But since so far all the callers
! 	 * of this mechanism involve some request for ending the process
! 	 * anyway, that it might end on its own first is not a problem.
  	 */
+ 	proc = BackendPidGetProc(pid);
  
! 	if (proc == NULL)
! 		return SIGNAL_BACKEND_NOPERMISSION;
! 	else
  	{
! 		const BackendId		backendId = proc->backendId;
! 		const Oid			roleId	  = proc->roleId;
! 
! 		if (roleId != GetUserId())
! 			return SIGNAL_BACKEND_NOPERMISSION;
! 
! 		Assert(roleId == GetUserId() || superuser());
! 
! 		SpinLockAcquire(&proc->adminMutex);
! 		proc->adminBackendId = backendId;
! 
! 		switch (sig)
! 		{
! 			case SIGINT:
! 				proc->adminAction = ADMIN_ACTION_CANCEL;
! 				break;
! 			case SIGTERM:
! 				proc->adminAction = ADMIN_ACTION_TERMINATE;
! 				break;
! 			default:
! 				break;
! 		}
! 
! 		SpinLockRelease(&proc->adminMutex);
! 
! 		if (SendProcSignal(proc->pid, PROCSIG_ADMIN_ACTION_INTERRUPT,
! 						   backendId) < 0)
! 			return SIGNAL_BACKEND_ERROR;
! 		else
! 			return SIGNAL_BACKEND_SUCCESS;
  	}
  }
  
  /*
*** a/src/include/miscadmin.h
--- b/src/include/miscadmin.h
***************
*** 247,252 **** extern bool VacuumCostActive;
--- 247,253 ----
  
  /* in tcop/postgres.c */
  extern void check_stack_depth(void);
+ extern void HandleAdminActionInterrupt(void);
  
  /* in tcop/utility.c */
  extern void PreventCommandIfReadOnly(const char *cmdname);
*** a/src/include/storage/proc.h
--- b/src/include/storage/proc.h
***************
*** 19,24 ****
--- 19,25 ----
  #include "storage/latch.h"
  #include "storage/lock.h"
  #include "storage/pg_sema.h"
+ #include "storage/s_lock.h"
  
  /*
   * Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds
***************
*** 55,60 **** struct XidCache
--- 56,68 ----
   */
  #define		FP_LOCK_SLOTS_PER_BACKEND 16
  
+ /* The available administrative actions that can be performed on a backend */
+ typedef enum BEAdminAction {
+ 	ADMIN_ACTION_NONE = 0,
+ 	ADMIN_ACTION_CANCEL,
+ 	ADMIN_ACTION_TERMINATE
+ } BEAdminAction;
+ 
  /*
   * Each backend has a PGPROC struct in shared memory.  There is also a list of
   * currently-unused PGPROC structs that will be reallocated to new backends.
***************
*** 93,98 **** struct PGPROC
--- 101,115 ----
  	Oid			roleId;			/* OID of role using this backend */
  
  	/*
+ 	 * Backend administration fields
+ 	 *
+ 	 * adminActionMutex protects reading/writing out the admin fields
+ 	 */
+ 	slock_t			adminMutex;
+ 	BEAdminAction	adminAction;
+ 	BackendId		adminBackendId;
+ 
+ 	/*
  	 * While in hot standby mode, shows that a conflict signal has been sent
  	 * for the current transaction. Set/cleared while holding ProcArrayLock,
  	 * though not required. Accessed without lock, if needed.
*** a/src/include/storage/procsignal.h
--- b/src/include/storage/procsignal.h
***************
*** 31,36 **** typedef enum
--- 31,37 ----
  {
  	PROCSIG_CATCHUP_INTERRUPT,	/* sinval catchup interrupt */
  	PROCSIG_NOTIFY_INTERRUPT,	/* listen/notify interrupt */
+ 	PROCSIG_ADMIN_ACTION_INTERRUPT, /* trigger backend administrative action */
  
  	/* Recovery conflict reasons */
  	PROCSIG_RECOVERY_CONFLICT_DATABASE,
