From 4c8fd0ab71299e57fbeb18dec70051bd1d035c7c Mon Sep 17 00:00:00 2001
From: "yizhi.fzh" <yizhi.fzh@alibaba-inc.com>
Date: Thu, 25 Jan 2024 15:19:49 +0800
Subject: [PATCH v9 1/1] Detect misuse of spin lock automatically

Spin lock are intended for very short-term locks, but it is possible
to be misused in many cases. e.g. Acquiring another LWLocks or regular
locks, memory allocation, errstart when holding a spin lock. this patch
would detect such misuse automatically in a USE_ASSERT_CHECKING build.

CHECK_FOR_INTERRUPTS should be avoided as well when holding a spin lock.
Depends on what signals are left to handle, PG may raise error/fatal
which would cause the code jump to some other places which is hardly to
release the spin lock anyway.
---
 src/backend/storage/buffer/bufmgr.c | 24 +++++++----
 src/backend/storage/lmgr/lock.c     |  6 +++
 src/backend/storage/lmgr/lwlock.c   | 21 +++++++---
 src/backend/storage/lmgr/s_lock.c   | 63 ++++++++++++++++++++---------
 src/backend/tcop/postgres.c         |  6 +++
 src/backend/utils/error/elog.c      | 10 +++++
 src/backend/utils/mmgr/mcxt.c       | 16 ++++++++
 src/include/miscadmin.h             | 21 +++++++++-
 src/include/storage/buf_internals.h |  1 +
 src/include/storage/s_lock.h        | 56 ++++++++++++++++++-------
 src/tools/pgindent/typedefs.list    |  2 +-
 11 files changed, 176 insertions(+), 50 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 7d601bef6d..739a94209b 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -5402,12 +5402,11 @@ rlocator_comparator(const void *p1, const void *p2)
 uint32
 LockBufHdr(BufferDesc *desc)
 {
-	SpinDelayStatus delayStatus;
 	uint32		old_buf_state;
 
 	Assert(!BufferIsLocal(BufferDescriptorGetBuffer(desc)));
 
-	init_local_spin_delay(&delayStatus);
+	init_local_spin_delay();
 
 	while (true)
 	{
@@ -5416,9 +5415,9 @@ LockBufHdr(BufferDesc *desc)
 		/* if it wasn't set before we're OK */
 		if (!(old_buf_state & BM_LOCKED))
 			break;
-		perform_spin_delay(&delayStatus);
+		perform_spin_delay();
 	}
-	finish_spin_delay(&delayStatus);
+	finish_spin_delay();
 	return old_buf_state | BM_LOCKED;
 }
 
@@ -5432,20 +5431,29 @@ LockBufHdr(BufferDesc *desc)
 static uint32
 WaitBufHdrUnlocked(BufferDesc *buf)
 {
-	SpinDelayStatus delayStatus;
 	uint32		buf_state;
 
-	init_local_spin_delay(&delayStatus);
+	/*
+	 * Suppose the buf will not be locked for a long time, setup a spin on
+	 * this.
+	 */
+	init_local_spin_delay();
 
 	buf_state = pg_atomic_read_u32(&buf->state);
 
 	while (buf_state & BM_LOCKED)
 	{
-		perform_spin_delay(&delayStatus);
+		perform_spin_delay();
 		buf_state = pg_atomic_read_u32(&buf->state);
 	}
 
-	finish_spin_delay(&delayStatus);
+	finish_spin_delay();
+
+	/*
+	 * This just a 'Wait', there is no spin lock is held now semantically
+	 * since the work which the spin is for is done already.
+	 */
+	ResetSpinLockStatus();
 
 	return buf_state;
 }
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index c70a1adb9a..5383335671 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -776,6 +776,12 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	bool		found_conflict;
 	bool		log_lock = false;
 
+	/*
+	 * Spin lock should not be held for a long time, but the time needed here
+	 * may be too long, so let make sure no spin lock is held now.
+	 */
+	VerifyNoSpinLocksHeld(true);
+
 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
 		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
 	lockMethodTable = LockMethods[lockmethodid];
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 2f2de5a562..fbc933ccbd 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -902,19 +902,24 @@ LWLockWaitListLock(LWLock *lock)
 
 		/* and then spin without atomic operations until lock is released */
 		{
-			SpinDelayStatus delayStatus;
-
-			init_local_spin_delay(&delayStatus);
+			init_local_spin_delay();
 
 			while (old_state & LW_FLAG_LOCKED)
 			{
-				perform_spin_delay(&delayStatus);
+				perform_spin_delay();
 				old_state = pg_atomic_read_u32(&lock->state);
 			}
 #ifdef LWLOCK_STATS
 			delays += delayStatus.delays;
 #endif
-			finish_spin_delay(&delayStatus);
+			finish_spin_delay();
+
+			/*
+			 * This just a 'Wait', there is no spin lock is held now
+			 * semantically since the work which the spin is for is done
+			 * already.
+			 */
+			ResetSpinLockStatus();
 		}
 
 		/*
@@ -1209,6 +1214,12 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
 
 	Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
 
+	/*
+	 * Spin lock should not be held for a long time, but the time needed here
+	 * may be too long, so let make sure no spin lock is held now.
+	 */
+	VerifyNoSpinLocksHeld(true);
+
 	PRINT_LWDEBUG("LWLockAcquire", lock, mode);
 
 #ifdef LWLOCK_STATS
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index 0e5f7ab0b9..fd1cd0eb70 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -65,7 +65,7 @@
 slock_t		dummy_spinlock;
 
 static int	spins_per_delay = DEFAULT_SPINS_PER_DELAY;
-
+SpinLockStatus spinStatus = {0, 0, 0, NULL, -1, NULL, false};
 
 /*
  * s_lock_stuck() - complain about a stuck spinlock
@@ -92,18 +92,17 @@ s_lock_stuck(const char *file, int line, const char *func)
 int
 s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 {
-	SpinDelayStatus delayStatus;
 
-	init_spin_delay(&delayStatus, file, line, func);
+	init_spin_delay(file, line, func);
 
 	while (TAS_SPIN(lock))
 	{
-		perform_spin_delay(&delayStatus);
+		perform_spin_delay();
 	}
 
-	finish_spin_delay(&delayStatus);
+	finish_spin_delay();
 
-	return delayStatus.delays;
+	return spinStatus.delays;
 }
 
 #ifdef USE_DEFAULT_S_UNLOCK
@@ -123,19 +122,19 @@ s_unlock(volatile slock_t *lock)
  * Wait while spinning on a contended spinlock.
  */
 void
-perform_spin_delay(SpinDelayStatus *status)
+perform_spin_delay()
 {
 	/* CPU-specific delay each time through the loop */
 	SPIN_DELAY();
 
 	/* Block the process every spins_per_delay tries */
-	if (++(status->spins) >= spins_per_delay)
+	if (++(spinStatus.spins) >= spins_per_delay)
 	{
-		if (++(status->delays) > NUM_DELAYS)
-			s_lock_stuck(status->file, status->line, status->func);
+		if (++(spinStatus.delays) > NUM_DELAYS)
+			s_lock_stuck(spinStatus.file, spinStatus.line, spinStatus.func);
 
-		if (status->cur_delay == 0) /* first time to delay? */
-			status->cur_delay = MIN_DELAY_USEC;
+		if (spinStatus.cur_delay == 0)	/* first time to delay? */
+			spinStatus.cur_delay = MIN_DELAY_USEC;
 
 		/*
 		 * Once we start sleeping, the overhead of reporting a wait event is
@@ -146,7 +145,7 @@ perform_spin_delay(SpinDelayStatus *status)
 		 * this is better than nothing.
 		 */
 		pgstat_report_wait_start(WAIT_EVENT_SPIN_DELAY);
-		pg_usleep(status->cur_delay);
+		pg_usleep(spinStatus.cur_delay);
 		pgstat_report_wait_end();
 
 #if defined(S_LOCK_TEST)
@@ -155,13 +154,13 @@ perform_spin_delay(SpinDelayStatus *status)
 #endif
 
 		/* increase delay by a random fraction between 1X and 2X */
-		status->cur_delay += (int) (status->cur_delay *
-									pg_prng_double(&pg_global_prng_state) + 0.5);
+		spinStatus.cur_delay += (int) (spinStatus.cur_delay *
+									   pg_prng_double(&pg_global_prng_state) + 0.5);
 		/* wrap back to minimum delay when max is exceeded */
-		if (status->cur_delay > MAX_DELAY_USEC)
-			status->cur_delay = MIN_DELAY_USEC;
+		if (spinStatus.cur_delay > MAX_DELAY_USEC)
+			spinStatus.cur_delay = MIN_DELAY_USEC;
 
-		status->spins = 0;
+		spinStatus.spins = 0;
 	}
 }
 
@@ -183,9 +182,9 @@ perform_spin_delay(SpinDelayStatus *status)
  * is handled by the two routines below.
  */
 void
-finish_spin_delay(SpinDelayStatus *status)
+finish_spin_delay()
 {
-	if (status->cur_delay == 0)
+	if (spinStatus.cur_delay == 0)
 	{
 		/* we never had to delay */
 		if (spins_per_delay < MAX_SPINS_PER_DELAY)
@@ -322,3 +321,27 @@ main()
 }
 
 #endif							/* S_LOCK_TEST */
+
+#ifdef USE_ASSERT_CHECKING
+void
+VerifyNoSpinLocksHeld(bool check_in_panic)
+{
+	if (!check_in_panic && spinStatus.in_panic)
+		return;
+
+	if (spinStatus.func != NULL)
+	{
+		spinStatus.in_panic = true;
+		elog(PANIC, "A spin lock has been held at %s:%d",
+			 spinStatus.func, spinStatus.line);
+	}
+}
+
+void
+ResetSpinLockStatus(void)
+{
+	spinStatus.func = NULL;
+	spinStatus.line = -1;
+	spinStatus.file = NULL;
+}
+#endif
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 1a34bd3715..2fc8e6d3e6 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -2876,6 +2876,12 @@ quickdie(SIGNAL_ARGS)
 	sigaddset(&BlockSig, SIGQUIT);	/* prevent nested calls */
 	sigprocmask(SIG_SETMASK, &BlockSig, NULL);
 
+	/*
+	 * tell the spin lock system that we are dying. Since this backend will
+	 * exit very soon, so there is no need to reset the variable back.
+	 */
+	spinStatus.in_panic = true;
+
 	/*
 	 * Prevent interrupts while exiting; though we just blocked signals that
 	 * would queue new interrupts, one may have been pending.  We don't want a
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 2c7a20e3d3..ee75a78d54 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -79,6 +79,7 @@
 #include "postmaster/syslogger.h"
 #include "storage/ipc.h"
 #include "storage/proc.h"
+#include "storage/s_lock.h"
 #include "tcop/tcopprot.h"
 #include "utils/guc_hooks.h"
 #include "utils/memutils.h"
@@ -351,6 +352,15 @@ errstart(int elevel, const char *domain)
 	bool		output_to_client = false;
 	int			i;
 
+	/*
+	 * Logging likely happens in many places without a outstanding attention,
+	 * and it's far more than a few dozen instructions, so it should be only
+	 * called when there is no spin lock is held. However it is allowed in the
+	 * quickdie process where it is possible that some spin lock being held,
+	 * we allow the errstart here since quickdie needs some logging.
+	 */
+	VerifyNoSpinLocksHeld(false);
+
 	/*
 	 * Check some cases in which we want to promote an error into a more
 	 * severe error.  None of this logic applies for non-error messages.
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 1336944084..c27adbfc94 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -1028,6 +1028,13 @@ MemoryContextAlloc(MemoryContext context, Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	/*
+	 * Memory allocation likely happens in many places without a outstanding
+	 * attention, and it's far more than a few dozen instructions, so it
+	 * should be only called when there is no spin lock is held.
+	 */
+	VerifyNoSpinLocksHeld(false);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
@@ -1071,6 +1078,9 @@ MemoryContextAllocZero(MemoryContext context, Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	/* see comments in MemoryContextAlloc. */
+	VerifyNoSpinLocksHeld(false);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
@@ -1197,6 +1207,9 @@ palloc(Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	/* see comments in MemoryContextAlloc. */
+	VerifyNoSpinLocksHeld(false);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
@@ -1228,6 +1241,9 @@ palloc0(Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	/* see comments in MemoryContextAlloc. */
+	VerifyNoSpinLocksHeld(false);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 0b01c1f093..9ac725cd57 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -107,6 +107,13 @@ extern PGDLLIMPORT volatile uint32 CritSectionCount;
 /* in tcop/postgres.c */
 extern void ProcessInterrupts(void);
 
+/* in storage/s_lock.h */
+#ifdef USE_ASSERT_CHECKING
+extern void VerifyNoSpinLocksHeld(bool check_in_panic);
+#else
+#define VerifyNoSpinLocksHeld(check_in_panic) ((void) true)
+#endif
+
 /* Test whether an interrupt is pending */
 #ifndef WIN32
 #define INTERRUPTS_PENDING_CONDITION() \
@@ -117,9 +124,21 @@ extern void ProcessInterrupts(void);
 	 unlikely(InterruptPending))
 #endif
 
-/* Service interrupt, if one is pending and it's safe to service it now */
+/*
+ * Service interrupt, if one is pending and it's safe to service it now
+ *
+ * Spin lock doesn't have a overall infrastructure to release all the locks
+ * on ERROR. and ProcessInterrupts likely raises some ERROR or FATAL which
+ * makes the code jump to some other places, then spin lock is leaked.
+ * Let's make sure no spin lock can be held at this point.
+ *
+ * However it is possible that when a spin lock is being held, but it get a
+ * signal. The known signal handler quickdie needs using elog system so we
+ * need to telling this to spin lock detection system to bypass some check.
+ */
 #define CHECK_FOR_INTERRUPTS() \
 do { \
+	VerifyNoSpinLocksHeld(false); \
 	if (INTERRUPTS_PENDING_CONDITION()) \
 		ProcessInterrupts(); \
 } while(0)
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index f190e6e5e4..69e17a9d61 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -363,6 +363,7 @@ UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
 {
 	pg_write_barrier();
 	pg_atomic_write_u32(&desc->state, buf_state & (~BM_LOCKED));
+	ResetSpinLockStatus();
 }
 
 /* in bufmgr.c */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index aa06e49da2..c3fe75a41d 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -652,7 +652,10 @@ tas(volatile slock_t *lock)
  */
 #if !defined(S_UNLOCK)
 #define S_UNLOCK(lock)	\
-	do { __asm__ __volatile__("" : : : "memory");  *(lock) = 0; } while (0)
+	do { __asm__ __volatile__("" : : : "memory"); \
+		ResetSpinLockStatus(); \
+		*(lock) = 0; \
+} while (0)
 #endif
 
 #endif	/* defined(__GNUC__) || defined(__INTEL_COMPILER) */
@@ -835,8 +838,13 @@ extern void set_spins_per_delay(int shared_spins_per_delay);
 extern int	update_spins_per_delay(int shared_spins_per_delay);
 
 /*
- * Support for spin delay which is useful in various places where
- * spinlock-like procedures take place.
+ * Support for spin delay and spin misuse detection purpose.
+ *
+ * spin delay which is useful in various places where spinlock-like
+ * procedures take place.
+ *
+ * spin misuse is based on global spinStatus to know if a spin lock
+ * is held when a heavy operation is taking.
  */
 typedef struct
 {
@@ -846,22 +854,40 @@ typedef struct
 	const char *file;
 	int			line;
 	const char *func;
-} SpinDelayStatus;
+	bool		in_panic; /* works for spin lock misuse purpose. */
+} SpinLockStatus;
 
+extern PGDLLIMPORT SpinLockStatus spinStatus;
+
+#ifdef USE_ASSERT_CHECKING
+extern void VerifyNoSpinLocksHeld(bool check_in_panic);
+extern void ResetSpinLockStatus(void);
+#else
+#define VerifyNoSpinLocksHeld(check_in_panic) ((void) true)
+#define ResetSpinLockStatus() ((void) true)
+#endif
+
+/*
+ * start the spin delay logic and record the places where the spin lock
+ * is held which is also helpful for spin lock misuse detection purpose.
+ * init_spin_delay should be called with ResetSpinLockStatus in pair.
+ */
 static inline void
-init_spin_delay(SpinDelayStatus *status,
-				const char *file, int line, const char *func)
+init_spin_delay(const char *file, int line, const char *func)
 {
-	status->spins = 0;
-	status->delays = 0;
-	status->cur_delay = 0;
-	status->file = file;
-	status->line = line;
-	status->func = func;
+	/* it is not allowed to spin another lock when holding one already. */
+	VerifyNoSpinLocksHeld(true);
+	spinStatus.spins = 0;
+	spinStatus.delays = 0;
+	spinStatus.cur_delay = 0;
+	spinStatus.file = file;
+	spinStatus.line = line;
+	spinStatus.func = func;
+	spinStatus.in_panic = false;
 }
 
-#define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
-extern void perform_spin_delay(SpinDelayStatus *status);
-extern void finish_spin_delay(SpinDelayStatus *status);
+#define init_local_spin_delay() init_spin_delay( __FILE__, __LINE__, __func__)
+extern void perform_spin_delay(void);
+extern void finish_spin_delay(void);
 
 #endif	 /* S_LOCK_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index a200e5eb12..10bc6094ff 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2635,7 +2635,7 @@ SpGistSearchItem
 SpGistState
 SpGistTypeDesc
 SpecialJoinInfo
-SpinDelayStatus
+SpinLockStatus
 SplitInterval
 SplitLR
 SplitPageLayout
-- 
2.34.1

