From 574d02d3d4d7f38b764ca6a2e4d2617a417ab8a8 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Mon, 29 Jul 2024 10:55:11 -0700
Subject: [PATCH v2 2/2] Detect many uses of uninitialized spinlocks on x86-64

On most platforms we use 0 for a unlocked spinlock and 1 for a locked one. As
we often place spinlocks in zero-initialized memory, missing calls to
SpinLockInit() aren't noticed.

To address that, swap the meaning of the spinlock state and use 1 for unlocked
and 0 for locked. That way using an uninitialized spinlock blocks the first
time a lock is acquired. A dedicated state for initialized locks would allow
for a nicer assertion, but ends up with slightly less dense code (a three byte
cmp with an immediate, instead of a 2 byte test).

To make the swapped meaning of the slock_t state easier to understand when
debugging, change the slock_t to be a struct on x86-64, with an "is_free"
member.

Author:
Reviewed-by:
Discussion: https://postgr.es/m/20240729164026.yurg37tej34o76uk@awork3.anarazel.de
Backpatch:
---
 src/include/storage/s_lock.h | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 02c68513a53..18291b284b0 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -205,7 +205,10 @@ spin_delay(void)
 #ifdef __x86_64__		/* AMD Opteron, Intel EM64T */
 #define HAS_TEST_AND_SET
 
-typedef unsigned char slock_t;
+typedef struct slock_t
+{
+	char	is_free;
+} slock_t;
 
 #define TAS(lock) tas(lock)
 
@@ -217,21 +220,27 @@ typedef unsigned char slock_t;
  * and IA32, by Michael Chynoweth and Mary R. Lee. As of this writing, it is
  * available at:
  * http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
+ *
+ * To catch uses of uninitialized spinlocks, we define 1 as unlocked and 0 as
+ * locked. That causes the first acquisition of an uninitialized spinlock to
+ * block forever. A dedicated state for initialized locks would allow for a
+ * nicer assertion, but ends up with slightly less dense code (a three byte
+ * cmp with an immediate, instead of a 2 byte test).
  */
-#define TAS_SPIN(lock)    (*(lock) ? 1 : TAS(lock))
+#define TAS_SPIN(lock)    ((lock)->is_free == 0 ? 1 : TAS(lock))
 
 static __inline__ int
 tas(volatile slock_t *lock)
 {
-	slock_t		_res = 1;
+	char		was_free = 0;
 
 	__asm__ __volatile__(
 		"	lock			\n"
 		"	xchgb	%0,%1	\n"
-:		"+q"(_res), "+m"(*lock)
+:		"+q"(was_free), "+m"(lock->is_free)
 :		/* no inputs */
 :		"memory", "cc");
-	return (int) _res;
+	return was_free == 0;
 }
 
 #define SPIN_DELAY() spin_delay()
@@ -247,6 +256,13 @@ spin_delay(void)
 		" rep; nop			\n");
 }
 
+#define S_UNLOCK(lock)	\
+	do { __asm__ __volatile__("" : : : "memory");  (lock)->is_free = 1; } while (0)
+
+#define S_LOCK_FREE(lock)	((lock)->is_free == 1)
+
+#define S_INIT_LOCK(lock)	S_UNLOCK(lock)
+
 #endif	 /* __x86_64__ */
 
 
-- 
2.45.2.746.g06e570c0df.dirty