From 961ba292a9a87685c5ddeefbbaa5edc02a17e0e8 Mon Sep 17 00:00:00 2001
From: "yizhi.fzh" <yizhi.fzh@alibaba-inc.com>
Date: Thu, 8 Feb 2024 21:52:24 +0800
Subject: [PATCH v10 1/1] Detect misuse of spin lock automatically

Spin lock are intended for very short-term locks, but it is possible
to be misused in many cases. e.g. Acquiring another LWLocks or regular
locks, memory allocation, errstart when holding a spin lock. this patch
would detect such misuse automatically in a USE_ASSERT_CHECKING build.

CHECK_FOR_INTERRUPTS should be avoided as well when holding a spin lock.
Depends on what signals are left to handle, PG may raise error/fatal
which would cause the code jump to some other places which is hardly to
release the spin lock anyway.
---
 src/backend/storage/buffer/bufmgr.c | 18 +++++----
 src/backend/storage/lmgr/lock.c     |  6 +++
 src/backend/storage/lmgr/lwlock.c   | 16 +++++---
 src/backend/storage/lmgr/s_lock.c   | 61 +++++++++++++++++++----------
 src/backend/tcop/postgres.c         |  8 ++++
 src/backend/utils/error/elog.c      |  9 +++++
 src/backend/utils/mmgr/mcxt.c       | 16 ++++++++
 src/include/miscadmin.h             | 21 +++++++++-
 src/include/storage/buf_internals.h |  1 +
 src/include/storage/s_lock.h        | 61 ++++++++++++++++++++++-------
 src/include/storage/spin.h          | 15 ++++++-
 src/test/regress/regress.c          | 12 ++++--
 src/tools/pgindent/typedefs.list    |  2 +-
 13 files changed, 192 insertions(+), 54 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index eb1ec3b86d..f97612ca92 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -5390,12 +5390,13 @@ rlocator_comparator(const void *p1, const void *p2)
 uint32
 LockBufHdr(BufferDesc *desc)
 {
-	SpinDelayStatus delayStatus;
 	uint32		old_buf_state;
 
 	Assert(!BufferIsLocal(BufferDescriptorGetBuffer(desc)));
 
-	init_local_spin_delay(&delayStatus);
+	VerifyNoSpinLocksHeld(true);
+
+	spinlock_local_prepare_spin();
 
 	while (true)
 	{
@@ -5404,9 +5405,9 @@ LockBufHdr(BufferDesc *desc)
 		/* if it wasn't set before we're OK */
 		if (!(old_buf_state & BM_LOCKED))
 			break;
-		perform_spin_delay(&delayStatus);
+		spinlock_perform_delay();
 	}
-	finish_spin_delay(&delayStatus);
+	spinlock_finish_spin();
 	return old_buf_state | BM_LOCKED;
 }
 
@@ -5420,20 +5421,21 @@ LockBufHdr(BufferDesc *desc)
 static uint32
 WaitBufHdrUnlocked(BufferDesc *buf)
 {
-	SpinDelayStatus delayStatus;
 	uint32		buf_state;
 
-	init_local_spin_delay(&delayStatus);
+	spinlock_local_prepare_spin();
 
 	buf_state = pg_atomic_read_u32(&buf->state);
 
 	while (buf_state & BM_LOCKED)
 	{
-		perform_spin_delay(&delayStatus);
+		spinlock_perform_delay();
 		buf_state = pg_atomic_read_u32(&buf->state);
 	}
 
-	finish_spin_delay(&delayStatus);
+	spinlock_finish_spin();
+
+	spinlock_finish_release();
 
 	return buf_state;
 }
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index c70a1adb9a..a711e63664 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -776,6 +776,12 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	bool		found_conflict;
 	bool		log_lock = false;
 
+	/*
+	 * Spin lock should not be held for a long time, but the time needed here
+	 * may be too long, so let's make sure no spin lock is held now.
+	 */
+	VerifyNoSpinLocksHeld(true);
+
 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
 		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
 	lockMethodTable = LockMethods[lockmethodid];
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 71677cf031..7705bb0a18 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -871,19 +871,19 @@ LWLockWaitListLock(LWLock *lock)
 
 		/* and then spin without atomic operations until lock is released */
 		{
-			SpinDelayStatus delayStatus;
-
-			init_local_spin_delay(&delayStatus);
+			spinlock_local_prepare_spin();
 
 			while (old_state & LW_FLAG_LOCKED)
 			{
-				perform_spin_delay(&delayStatus);
+				spinlock_perform_delay();
 				old_state = pg_atomic_read_u32(&lock->state);
 			}
 #ifdef LWLOCK_STATS
 			delays += delayStatus.delays;
 #endif
-			finish_spin_delay(&delayStatus);
+			spinlock_finish_spin();
+
+			spinlock_finish_release();
 		}
 
 		/*
@@ -1178,6 +1178,12 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
 
 	Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
 
+	/*
+	 * Spin lock should not be held for a long time, but the time needed here
+	 * may be too long, so let make sure no spin lock is held now.
+	 */
+	VerifyNoSpinLocksHeld(true);
+
 	PRINT_LWDEBUG("LWLockAcquire", lock, mode);
 
 #ifdef LWLOCK_STATS
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index 9b389d9951..1a23a76f0d 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -71,7 +71,7 @@ uint32	   *my_wait_event_info = &local_my_wait_event_info;
 #endif
 
 static int	spins_per_delay = DEFAULT_SPINS_PER_DELAY;
-
+SpinLockStatus spinStatus = {0, 0, 0, NULL, -1, NULL, false};
 
 /*
  * s_lock_stuck() - complain about a stuck spinlock
@@ -98,18 +98,17 @@ s_lock_stuck(const char *file, int line, const char *func)
 int
 s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 {
-	SpinDelayStatus delayStatus;
 
-	init_spin_delay(&delayStatus, file, line, func);
+	spinlock_prepare_spin(file, line, func);
 
 	while (TAS_SPIN(lock))
 	{
-		perform_spin_delay(&delayStatus);
+		spinlock_perform_delay();
 	}
 
-	finish_spin_delay(&delayStatus);
+	spinlock_finish_spin();
 
-	return delayStatus.delays;
+	return spinStatus.delays;
 }
 
 #ifdef USE_DEFAULT_S_UNLOCK
@@ -129,19 +128,19 @@ s_unlock(volatile slock_t *lock)
  * Wait while spinning on a contended spinlock.
  */
 void
-perform_spin_delay(SpinDelayStatus *status)
+spinlock_perform_delay()
 {
 	/* CPU-specific delay each time through the loop */
 	SPIN_DELAY();
 
 	/* Block the process every spins_per_delay tries */
-	if (++(status->spins) >= spins_per_delay)
+	if (++(spinStatus.spins) >= spins_per_delay)
 	{
-		if (++(status->delays) > NUM_DELAYS)
-			s_lock_stuck(status->file, status->line, status->func);
+		if (++(spinStatus.delays) > NUM_DELAYS)
+			s_lock_stuck(spinStatus.file, spinStatus.line, spinStatus.func);
 
-		if (status->cur_delay == 0) /* first time to delay? */
-			status->cur_delay = MIN_DELAY_USEC;
+		if (spinStatus.cur_delay == 0)	/* first time to delay? */
+			spinStatus.cur_delay = MIN_DELAY_USEC;
 
 		/*
 		 * Once we start sleeping, the overhead of reporting a wait event is
@@ -152,7 +151,7 @@ perform_spin_delay(SpinDelayStatus *status)
 		 * this is better than nothing.
 		 */
 		pgstat_report_wait_start(WAIT_EVENT_SPIN_DELAY);
-		pg_usleep(status->cur_delay);
+		pg_usleep(spinStatus.cur_delay);
 		pgstat_report_wait_end();
 
 #if defined(S_LOCK_TEST)
@@ -161,13 +160,13 @@ perform_spin_delay(SpinDelayStatus *status)
 #endif
 
 		/* increase delay by a random fraction between 1X and 2X */
-		status->cur_delay += (int) (status->cur_delay *
-									pg_prng_double(&pg_global_prng_state) + 0.5);
+		spinStatus.cur_delay += (int) (spinStatus.cur_delay *
+									   pg_prng_double(&pg_global_prng_state) + 0.5);
 		/* wrap back to minimum delay when max is exceeded */
-		if (status->cur_delay > MAX_DELAY_USEC)
-			status->cur_delay = MIN_DELAY_USEC;
+		if (spinStatus.cur_delay > MAX_DELAY_USEC)
+			spinStatus.cur_delay = MIN_DELAY_USEC;
 
-		status->spins = 0;
+		spinStatus.spins = 0;
 	}
 }
 
@@ -189,9 +188,9 @@ perform_spin_delay(SpinDelayStatus *status)
  * is handled by the two routines below.
  */
 void
-finish_spin_delay(SpinDelayStatus *status)
+spinlock_finish_spin()
 {
-	if (status->cur_delay == 0)
+	if (spinStatus.cur_delay == 0)
 	{
 		/* we never had to delay */
 		if (spins_per_delay < MAX_SPINS_PER_DELAY)
@@ -328,3 +327,25 @@ main()
 }
 
 #endif							/* S_LOCK_TEST */
+
+#ifdef USE_ASSERT_CHECKING
+void
+VerifyNoSpinLocksHeld(bool check_in_panic)
+{
+	if (!check_in_panic && spinStatus.in_panic)
+		return;
+
+	if (spinStatus.func != NULL)
+	{
+		/*
+		 * Now we have held a spin lock and then errstart is disallow, to
+		 * avoid the endless recursive call of VerifyNoSpinLocksHeld because
+		 * of the VerifyNoSpinLocksHeld checks in errstart, set
+		 * spinStatus.in_panic to true to break the cycle.
+		 */
+		spinStatus.in_panic = true;
+		elog(PANIC, "A spin lock has been held at %s:%d",
+			 spinStatus.func, spinStatus.line);
+	}
+}
+#endif
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 1a34bd3715..02fa44c1d2 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -2876,6 +2876,14 @@ quickdie(SIGNAL_ARGS)
 	sigaddset(&BlockSig, SIGQUIT);	/* prevent nested calls */
 	sigprocmask(SIG_SETMASK, &BlockSig, NULL);
 
+	/*
+	 * It is possible that getting here when holding a spin lock already.
+	 * However current function needs some actions like elog which are
+	 * disallowed when holding a spin lock by spinlock misuse detection
+	 * system. So tell that system to treat this specially.
+	 */
+	spinStatus.in_panic = true;
+
 	/*
 	 * Prevent interrupts while exiting; though we just blocked signals that
 	 * would queue new interrupts, one may have been pending.  We don't want a
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 700fbde6db..01f8397c84 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -79,6 +79,7 @@
 #include "postmaster/syslogger.h"
 #include "storage/ipc.h"
 #include "storage/proc.h"
+#include "storage/s_lock.h"
 #include "tcop/tcopprot.h"
 #include "utils/guc_hooks.h"
 #include "utils/memutils.h"
@@ -351,6 +352,14 @@ errstart(int elevel, const char *domain)
 	bool		output_to_client = false;
 	int			i;
 
+	/*
+	 * Logging likely happens in many places without a outstanding attention,
+	 * and it's far more than a few dozen instructions, so it should be only
+	 * called when there is no spin lock is held. However it is allowed in the
+	 * quickdie process where it is possible that some spin lock being held.
+	 */
+	VerifyNoSpinLocksHeld(false);
+
 	/*
 	 * Check some cases in which we want to promote an error into a more
 	 * severe error.  None of this logic applies for non-error messages.
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index ad7409a02c..eb004acd7a 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -1041,6 +1041,13 @@ MemoryContextAlloc(MemoryContext context, Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	/*
+	 * Memory allocation likely happens in many places without a outstanding
+	 * attention, and it's far more than a few dozen instructions, so it
+	 * should be only called when there is no spin lock is held.
+	 */
+	VerifyNoSpinLocksHeld(false);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
@@ -1084,6 +1091,9 @@ MemoryContextAllocZero(MemoryContext context, Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	/* see comments in MemoryContextAlloc. */
+	VerifyNoSpinLocksHeld(false);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
@@ -1210,6 +1220,9 @@ palloc(Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	/* see comments in MemoryContextAlloc. */
+	VerifyNoSpinLocksHeld(false);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
@@ -1241,6 +1254,9 @@ palloc0(Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	/* see comments in MemoryContextAlloc. */
+	VerifyNoSpinLocksHeld(false);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 0b01c1f093..9ac725cd57 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -107,6 +107,13 @@ extern PGDLLIMPORT volatile uint32 CritSectionCount;
 /* in tcop/postgres.c */
 extern void ProcessInterrupts(void);
 
+/* in storage/s_lock.h */
+#ifdef USE_ASSERT_CHECKING
+extern void VerifyNoSpinLocksHeld(bool check_in_panic);
+#else
+#define VerifyNoSpinLocksHeld(check_in_panic) ((void) true)
+#endif
+
 /* Test whether an interrupt is pending */
 #ifndef WIN32
 #define INTERRUPTS_PENDING_CONDITION() \
@@ -117,9 +124,21 @@ extern void ProcessInterrupts(void);
 	 unlikely(InterruptPending))
 #endif
 
-/* Service interrupt, if one is pending and it's safe to service it now */
+/*
+ * Service interrupt, if one is pending and it's safe to service it now
+ *
+ * Spin lock doesn't have a overall infrastructure to release all the locks
+ * on ERROR. and ProcessInterrupts likely raises some ERROR or FATAL which
+ * makes the code jump to some other places, then spin lock is leaked.
+ * Let's make sure no spin lock can be held at this point.
+ *
+ * However it is possible that when a spin lock is being held, but it get a
+ * signal. The known signal handler quickdie needs using elog system so we
+ * need to telling this to spin lock detection system to bypass some check.
+ */
 #define CHECK_FOR_INTERRUPTS() \
 do { \
+	VerifyNoSpinLocksHeld(false); \
 	if (INTERRUPTS_PENDING_CONDITION()) \
 		ProcessInterrupts(); \
 } while(0)
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index f190e6e5e4..9f1cf6dbe8 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -363,6 +363,7 @@ UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
 {
 	pg_write_barrier();
 	pg_atomic_write_u32(&desc->state, buf_state & (~BM_LOCKED));
+	spinlock_finish_release();
 }
 
 /* in bufmgr.c */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 69582f4ae7..21878342fb 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -834,8 +834,13 @@ extern void set_spins_per_delay(int shared_spins_per_delay);
 extern int	update_spins_per_delay(int shared_spins_per_delay);
 
 /*
- * Support for spin delay which is useful in various places where
- * spinlock-like procedures take place.
+ * Support for spin delay and spin misuse detection purpose.
+ *
+ * spin delay which is useful in various places where spinlock-like
+ * procedures take place.
+ *
+ * spin misuse is based on global spinStatus to know if a spin lock
+ * is held when a heavy operation is taking.
  */
 typedef struct
 {
@@ -845,22 +850,50 @@ typedef struct
 	const char *file;
 	int			line;
 	const char *func;
-} SpinDelayStatus;
+	bool		in_panic; /* works for spin lock misuse purpose. */
+} SpinLockStatus;
+
+extern PGDLLIMPORT SpinLockStatus spinStatus;
+
+#ifdef USE_ASSERT_CHECKING
+extern void VerifyNoSpinLocksHeld(bool check_in_panic);
+static inline void
+spinlock_prepare_acquire(const char *file, int line, const char *func)
+{
+	/* it is not allowed to spin another lock when holding one already. */
+	VerifyNoSpinLocksHeld(true);
+	spinStatus.file = file;
+	spinStatus.line = line;
+	spinStatus.func = func;
+}
+static inline void
+spinlock_finish_release(void)
+{
+	spinStatus.file = NULL;
+	spinStatus.line = -1;
+	spinStatus.func = NULL;
+}
+
+#else
+#define VerifyNoSpinLocksHeld(check_in_panic) ((void) true)
+#define spinlock_prepare_acquire(file, line, func) ((void) true)
+#define spinlock_finish_release() ((void) true)
+#endif
 
 static inline void
-init_spin_delay(SpinDelayStatus *status,
-				const char *file, int line, const char *func)
+spinlock_prepare_spin(const char *file, int line, const char *func)
 {
-	status->spins = 0;
-	status->delays = 0;
-	status->cur_delay = 0;
-	status->file = file;
-	status->line = line;
-	status->func = func;
+	spinStatus.spins = 0;
+	spinStatus.delays = 0;
+	spinStatus.cur_delay = 0;
+	spinStatus.file = file;
+	spinStatus.line = line;
+	spinStatus.func = func;
+	spinStatus.in_panic = false;
 }
 
-#define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
-extern void perform_spin_delay(SpinDelayStatus *status);
-extern void finish_spin_delay(SpinDelayStatus *status);
+#define spinlock_local_prepare_spin() spinlock_prepare_spin( __FILE__, __LINE__, __func__)
+extern void spinlock_perform_delay(void);
+extern void spinlock_finish_spin(void);
 
 #endif	 /* S_LOCK_H */
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index c0679c5999..b3636fcaaa 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -59,9 +59,20 @@
 
 #define SpinLockInit(lock)	S_INIT_LOCK(lock)
 
-#define SpinLockAcquire(lock) S_LOCK(lock)
+#define SpinLockAcquire(lock) \
+	do \
+	{ \
+		spinlock_prepare_acquire(__FILE__, __LINE__, __func__); \
+		S_LOCK(lock); \
+	} while (false)
 
-#define SpinLockRelease(lock) S_UNLOCK(lock)
+
+#define SpinLockRelease(lock) \
+	do \
+	{ \
+		S_UNLOCK(lock) ; \
+		spinlock_finish_release(); \
+	} while (false)
 
 #define SpinLockFree(lock)	S_LOCK_FREE(lock)
 
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index a17afc98be..d41deac0eb 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -854,11 +854,17 @@ test_spinlock(void)
 		/* test basic operations via underlying S_* API */
 		S_INIT_LOCK(&struct_w_lock.lock);
 		S_LOCK(&struct_w_lock.lock);
-		S_UNLOCK(&struct_w_lock.lock);
+		SpinLockRelease(&struct_w_lock.lock);
 
 		/* and that "contended" acquisition works */
 		s_lock(&struct_w_lock.lock, "testfile", 17, "testfunc");
-		S_UNLOCK(&struct_w_lock.lock);
+
+		/*
+		 * XXX: can't use S_UNLOCK directly since it will leak spinStatus.file
+		 * reset. This should be OK since "none of the macros in this file are
+		 * intended to be called directly." in s_lock.h
+		 */
+		SpinLockRelease(&struct_w_lock.lock);
 
 		/*
 		 * Check, using TAS directly, that a single spin cycle doesn't block
@@ -875,7 +881,7 @@ test_spinlock(void)
 			elog(ERROR, "acquired already held spinlock");
 #endif							/* defined(TAS_SPIN) */
 
-		S_UNLOCK(&struct_w_lock.lock);
+		SpinLockRelease(&struct_w_lock.lock);
 #endif							/* defined(TAS) */
 
 		/*
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 91433d439b..1d35e125d8 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2637,7 +2637,7 @@ SpGistSearchItem
 SpGistState
 SpGistTypeDesc
 SpecialJoinInfo
-SpinDelayStatus
+SpinLockStatus
 SplitInterval
 SplitLR
 SplitPageLayout
-- 
2.34.1

