Remove last traces of HPPA support

Started by Tom Laneabout 2 years ago36 messages

tgl@sss.pgh.pa.us

about 2 years ago

1 attachment(s)

We removed support for the HP-UX OS in v16, but left in support
for the PA-RISC architecture, mainly because I thought that its
spinlock mechanism is weird enough to be a good stress test
for our spinlock infrastructure. It still is that, but my
one remaining HPPA machine has gone to the great recycle heap
in the sky. There seems little point in keeping around nominal
support for an architecture that we can't test and no one is
using anymore.

Hence, the attached removes the remaining support for HPPA.
Any objections?

regards, tom lane

Attachments:

v1-remove-hppa-architecture-support.patchtext/x-diff; charset=us-ascii; name=v1-remove-hppa-architecture-support.patchDownload

diff --git a/contrib/pgcrypto/crypt-blowfish.c b/contrib/pgcrypto/crypt-blowfish.c
index 1264eccb3f..5a1b1e1009 100644
--- a/contrib/pgcrypto/crypt-blowfish.c
+++ b/contrib/pgcrypto/crypt-blowfish.c
@@ -41,7 +41,7 @@
 #ifdef __i386__
 #define BF_ASM				0	/* 1 */
 #define BF_SCALE			1
-#elif defined(__x86_64__) || defined(__hppa__)
+#elif defined(__x86_64__)
 #define BF_ASM				0
 #define BF_SCALE			1
 #else
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index f4b1f81189..3608aec595 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -3359,8 +3359,8 @@ export MANPATH
 
   <para>
    In general, <productname>PostgreSQL</productname> can be expected to work on
-   these CPU architectures: x86, PowerPC, S/390, SPARC, ARM, MIPS, RISC-V,
-   and PA-RISC, including
+   these CPU architectures: x86, PowerPC, S/390, SPARC, ARM, MIPS,
+   and RISC-V, including
    big-endian, little-endian, 32-bit, and 64-bit variants where applicable.
    It is often
    possible to build on an unsupported CPU type by configuring with
@@ -3391,7 +3391,8 @@ export MANPATH
   <para>
    Historical versions of <productname>PostgreSQL</productname> or POSTGRES
    also ran on CPU architectures including Alpha, Itanium, M32R, M68K,
-   M88K, NS32K, SuperH, and VAX, and operating systems including 4.3BSD, BEOS,
+   M88K, NS32K, PA-RISC, SuperH, and VAX,
+   and operating systems including 4.3BSD, BEOS,
    BSD/OS, DG/UX, Dynix, HP-UX, IRIX, NeXTSTEP, QNX, SCO, SINIX, Sprite, SunOS,
    Tru64 UNIX, and ULTRIX.
   </para>
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index 327ac64f7c..0e3f04207c 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -110,12 +110,7 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 void
 s_unlock(volatile slock_t *lock)
 {
-#ifdef TAS_ACTIVE_WORD
-	/* HP's PA-RISC */
-	*TAS_ACTIVE_WORD(lock) = -1;
-#else
 	*lock = 0;
-#endif
 }
 #endif
 
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index bbff945eba..f6f62d68c0 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -69,8 +69,6 @@
 #include "port/atomics/arch-x86.h"
 #elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
 #include "port/atomics/arch-ppc.h"
-#elif defined(__hppa) || defined(__hppa__)
-#include "port/atomics/arch-hppa.h"
 #endif
 
 /*
diff --git a/src/include/port/atomics/arch-hppa.h b/src/include/port/atomics/arch-hppa.h
deleted file mode 100644
index 4c89fbff71..0000000000
--- a/src/include/port/atomics/arch-hppa.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * arch-hppa.h
- *	  Atomic operations considerations specific to HPPA
- *
- * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * NOTES:
- *
- * src/include/port/atomics/arch-hppa.h
- *
- *-------------------------------------------------------------------------
- */
-
-/* HPPA doesn't do either read or write reordering */
-#define pg_memory_barrier_impl()		pg_compiler_barrier_impl()
diff --git a/src/include/port/atomics/fallback.h b/src/include/port/atomics/fallback.h
index a9e8e77c03..d119e8cc50 100644
--- a/src/include/port/atomics/fallback.h
+++ b/src/include/port/atomics/fallback.h
@@ -75,11 +75,7 @@ typedef struct pg_atomic_flag
 	 * be content with just one byte instead of 4, but that's not too much
 	 * waste.
 	 */
-#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC, GCC and HP compilers */
-	int			sema[4];
-#else
 	int			sema;
-#endif
 	volatile bool value;
 } pg_atomic_flag;
 
@@ -93,11 +89,7 @@ typedef struct pg_atomic_flag
 typedef struct pg_atomic_uint32
 {
 	/* Check pg_atomic_flag's definition above for an explanation */
-#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC */
-	int			sema[4];
-#else
 	int			sema;
-#endif
 	volatile uint32 value;
 } pg_atomic_uint32;
 
@@ -111,11 +103,7 @@ typedef struct pg_atomic_uint32
 typedef struct pg_atomic_uint64
 {
 	/* Check pg_atomic_flag's definition above for an explanation */
-#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC */
-	int			sema[4];
-#else
 	int			sema;
-#endif
 	volatile uint64 value;
 } pg_atomic_uint64;
 
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index c9fa84cc43..e76e8b6888 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -530,71 +530,6 @@ do \
 #endif /* __mips__ && !__sgi */
 
 
-#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC */
-/*
- * HP's PA-RISC
- *
- * Because LDCWX requires a 16-byte-aligned address, we declare slock_t as a
- * 16-byte struct.  The active word in the struct is whichever has the aligned
- * address; the other three words just sit at -1.
- */
-#define HAS_TEST_AND_SET
-
-typedef struct
-{
-	int			sema[4];
-} slock_t;
-
-#define TAS_ACTIVE_WORD(lock)	((volatile int *) (((uintptr_t) (lock) + 15) & ~15))
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	volatile int *lockword = TAS_ACTIVE_WORD(lock);
-	int			lockval;
-
-	/*
-	 * The LDCWX instruction atomically clears the target word and
-	 * returns the previous value.  Hence, if the instruction returns
-	 * 0, someone else has already acquired the lock before we tested
-	 * it (i.e., we have failed).
-	 *
-	 * Notice that this means that we actually clear the word to set
-	 * the lock and set the word to clear the lock.  This is the
-	 * opposite behavior from the SPARC LDSTUB instruction.  For some
-	 * reason everything that H-P does is rather baroque...
-	 *
-	 * For details about the LDCWX instruction, see the "Precision
-	 * Architecture and Instruction Reference Manual" (09740-90014 of June
-	 * 1987), p. 5-38.
-	 */
-	__asm__ __volatile__(
-		"	ldcwx	0(0,%2),%0	\n"
-:		"=r"(lockval), "+m"(*lockword)
-:		"r"(lockword)
-:		"memory");
-	return (lockval == 0);
-}
-
-#define S_UNLOCK(lock)	\
-	do { \
-		__asm__ __volatile__("" : : : "memory"); \
-		*TAS_ACTIVE_WORD(lock) = -1; \
-	} while (0)
-
-#define S_INIT_LOCK(lock) \
-	do { \
-		volatile slock_t *lock_ = (lock); \
-		lock_->sema[0] = -1; \
-		lock_->sema[1] = -1; \
-		lock_->sema[2] = -1; \
-		lock_->sema[3] = -1; \
-	} while (0)
-
-#define S_LOCK_FREE(lock)	(*TAS_ACTIVE_WORD(lock) != 0)
-
-#endif	 /* __hppa || __hppa__ */
-
 
 /*
  * If we have no platform-specific knowledge, but we found that the compiler
diff --git a/src/tools/pginclude/cpluspluscheck b/src/tools/pginclude/cpluspluscheck
index 4e09c4686b..96852ef75f 100755
--- a/src/tools/pginclude/cpluspluscheck
+++ b/src/tools/pginclude/cpluspluscheck
@@ -84,12 +84,10 @@ do
 	# Likewise, these files are platform-specific, and the one
 	# relevant to our platform will be included by atomics.h.
 	test "$f" = src/include/port/atomics/arch-arm.h && continue
-	test "$f" = src/include/port/atomics/arch-hppa.h && continue
 	test "$f" = src/include/port/atomics/arch-ppc.h && continue
 	test "$f" = src/include/port/atomics/arch-x86.h && continue
 	test "$f" = src/include/port/atomics/fallback.h && continue
 	test "$f" = src/include/port/atomics/generic.h && continue
-	test "$f" = src/include/port/atomics/generic-acc.h && continue
 	test "$f" = src/include/port/atomics/generic-gcc.h && continue
 	test "$f" = src/include/port/atomics/generic-msvc.h && continue
 	test "$f" = src/include/port/atomics/generic-sunpro.h && continue
diff --git a/src/tools/pginclude/headerscheck b/src/tools/pginclude/headerscheck
index 8dee1b5670..0b9b9740f4 100755
--- a/src/tools/pginclude/headerscheck
+++ b/src/tools/pginclude/headerscheck
@@ -79,12 +79,10 @@ do
 	# Likewise, these files are platform-specific, and the one
 	# relevant to our platform will be included by atomics.h.
 	test "$f" = src/include/port/atomics/arch-arm.h && continue
-	test "$f" = src/include/port/atomics/arch-hppa.h && continue
 	test "$f" = src/include/port/atomics/arch-ppc.h && continue
 	test "$f" = src/include/port/atomics/arch-x86.h && continue
 	test "$f" = src/include/port/atomics/fallback.h && continue
 	test "$f" = src/include/port/atomics/generic.h && continue
-	test "$f" = src/include/port/atomics/generic-acc.h && continue
 	test "$f" = src/include/port/atomics/generic-gcc.h && continue
 	test "$f" = src/include/port/atomics/generic-msvc.h && continue
 	test "$f" = src/include/port/atomics/generic-sunpro.h && continue

Michael Paquier

michael@paquier.xyz

about 2 years ago

In reply to: Tom Lane (#1)

Re: Remove last traces of HPPA support

On Thu, Oct 19, 2023 at 11:16:28AM -0400, Tom Lane wrote:

We removed support for the HP-UX OS in v16, but left in support
for the PA-RISC architecture, mainly because I thought that its
spinlock mechanism is weird enough to be a good stress test
for our spinlock infrastructure. It still is that, but my
one remaining HPPA machine has gone to the great recycle heap
in the sky. There seems little point in keeping around nominal
support for an architecture that we can't test and no one is
using anymore.

Looks OK for the C parts.

Hence, the attached removes the remaining support for HPPA.
Any objections?

Would a refresh of config/config.guess and config/config.sub be
suited? This stuff still has references to HPPA.
--
Michael

Tom Lane

tgl@sss.pgh.pa.us

about 2 years ago

In reply to: Michael Paquier (#2)

Re: Remove last traces of HPPA support

Michael Paquier <michael@paquier.xyz> writes:

On Thu, Oct 19, 2023 at 11:16:28AM -0400, Tom Lane wrote:

Hence, the attached removes the remaining support for HPPA.
Any objections?

Would a refresh of config/config.guess and config/config.sub be
suited? This stuff still has references to HPPA.

AFAIK we just absorb those files verbatim from upstream. There is plenty
of stuff in them for systems we don't support; it's not worth trying
to clean that out.

regards, tom lane

Noah Misch

noah@leadboat.com

about 2 years ago

In reply to: Tom Lane (#1)

Re: Remove last traces of HPPA support

On Thu, Oct 19, 2023 at 11:16:28AM -0400, Tom Lane wrote:

We removed support for the HP-UX OS in v16, but left in support
for the PA-RISC architecture, mainly because I thought that its
spinlock mechanism is weird enough to be a good stress test
for our spinlock infrastructure. It still is that, but my
one remaining HPPA machine has gone to the great recycle heap
in the sky. There seems little point in keeping around nominal
support for an architecture that we can't test and no one is
using anymore.

Hence, the attached removes the remaining support for HPPA.
Any objections?

I wouldn't do this. NetBSD/hppa still claims to exist, as does the OpenBSD
equivalent. I presume its pkgsrc compiles this code. The code is basically
zero-maintenance, so there's not much to gain from deleting it preemptively.

Tom Lane

tgl@sss.pgh.pa.us

about 2 years ago

In reply to: Noah Misch (#4)

Re: Remove last traces of HPPA support

Noah Misch <noah@leadboat.com> writes:

On Thu, Oct 19, 2023 at 11:16:28AM -0400, Tom Lane wrote:

Hence, the attached removes the remaining support for HPPA.

I wouldn't do this. NetBSD/hppa still claims to exist, as does the OpenBSD
equivalent. I presume its pkgsrc compiles this code. The code is basically
zero-maintenance, so there's not much to gain from deleting it preemptively.

I doubt it: I don't think anyone is routinely building very much of
pkgsrc for backwater hardware like HPPA, on either distro. It takes
too much time (as cross-build doesn't work IME) and there are too few
potential users. I certainly had to build all my own packages during
my experiments with running those systems on my machine.

Moreover, if they are compiling it they aren't testing it.
I filed a pile of bugs against NetBSD kernel and toolchains
on the way to getting the late lamented chickadee animal running.
While it was pretty much working when I retired chickadee, it was
obviously ground that nobody else had trodden in a long time.

As for OpenBSD, while I did have a working installation of 6.4
at one time, I completely failed to get 7.1 running on that
hardware. I think it's maintained only for very small values
of "maintained".

Lastly, even when they're working those systems are about half
the speed of HP-UX on the same hardware; and even when using HP-UX
there is no HPPA hardware that's not insanely slow by modern
standards. I can't believe that anyone would want to run modern
PG on that stack, and I don't believe that anyone but me has
tried in a long time.

regards, tom lane

Thomas Munro

thomas.munro@gmail.com

about 2 years ago

In reply to: Tom Lane (#1)

Re: Remove last traces of HPPA support

On Fri, Oct 20, 2023 at 4:21 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

We removed support for the HP-UX OS in v16, but left in support
for the PA-RISC architecture, mainly because I thought that its
spinlock mechanism is weird enough to be a good stress test
for our spinlock infrastructure. It still is that, but my
one remaining HPPA machine has gone to the great recycle heap
in the sky. There seems little point in keeping around nominal
support for an architecture that we can't test and no one is
using anymore.

Hence, the attached removes the remaining support for HPPA.

Tom Lane

tgl@sss.pgh.pa.us

about 2 years ago

In reply to: Tom Lane (#5)

Re: Remove last traces of HPPA support

I wrote:

Noah Misch <noah@leadboat.com> writes:

On Thu, Oct 19, 2023 at 11:16:28AM -0400, Tom Lane wrote:

Hence, the attached removes the remaining support for HPPA.

I wouldn't do this. NetBSD/hppa still claims to exist, as does the OpenBSD
equivalent. I presume its pkgsrc compiles this code. The code is basically
zero-maintenance, so there's not much to gain from deleting it preemptively.

I doubt it: I don't think anyone is routinely building very much of
pkgsrc for backwater hardware like HPPA, on either distro.

I dug a bit further on this point. The previous discussion about
our policy for old-hardware support was here:

/messages/by-id/959917.1657522169@sss.pgh.pa.us

The existence of a NetBSD/sh3el package for Postgres didn't stop
us from dropping SuperH support. Moreover, the page showing the
existence of that package:

https://ftp.netbsd.org/pub/pkgsrc/current/pkgsrc/databases/postgresql14-server/index.html

also shows a build for VAX, which we know positively would not
have passed regression tests, so they certainly weren't testing
those builds. (And, to the point here, it does *not* show any
build for hppa.)

The bottom line, though, is that IMV we agreed in that thread to a
policy that no architecture will be considered supported unless
it has a representative in the buildfarm. We've since enforced
that policy in the case of loongarch64, so it seems established.
With my HPPA animal gone, and nobody very likely to step up with
a replacement, HPPA no longer meets that threshold requirement.

regards, tom lane

Andres Freund

andres@anarazel.de

about 2 years ago

In reply to: Noah Misch (#4)

Re: Remove last traces of HPPA support

Hi,

On 2023-10-19 17:23:04 -0700, Noah Misch wrote:

On Thu, Oct 19, 2023 at 11:16:28AM -0400, Tom Lane wrote:

We removed support for the HP-UX OS in v16, but left in support
for the PA-RISC architecture, mainly because I thought that its
spinlock mechanism is weird enough to be a good stress test
for our spinlock infrastructure. It still is that, but my
one remaining HPPA machine has gone to the great recycle heap
in the sky. There seems little point in keeping around nominal
support for an architecture that we can't test and no one is
using anymore.

Hence, the attached removes the remaining support for HPPA.
Any objections?

I wouldn't do this. NetBSD/hppa still claims to exist, as does the OpenBSD
equivalent. I presume its pkgsrc compiles this code. The code is basically
zero-maintenance, so there's not much to gain from deleting it preemptively.

In addition to the point Tom has made, I think it's also not correct that hppa
doesn't impose a burden: hppa is the only of our architectures that doesn't
actually support atomic operations, requiring us to have infrastructure to
backfill atomics using spinlocks. This does preclude some uses of atomics,
e.g. in signal handlers - I think Thomas wanted to do so for some concurrency
primitive.

Greetings,

Andres Freund

Tom Lane

tgl@sss.pgh.pa.us

about 2 years ago

In reply to: Andres Freund (#8)

Re: Remove last traces of HPPA support

Andres Freund <andres@anarazel.de> writes:

In addition to the point Tom has made, I think it's also not correct that hppa
doesn't impose a burden: hppa is the only of our architectures that doesn't
actually support atomic operations, requiring us to have infrastructure to
backfill atomics using spinlocks. This does preclude some uses of atomics,
e.g. in signal handlers - I think Thomas wanted to do so for some concurrency
primitive.

Hmm, are you saying there's more of port/atomics/ that could be
removed? What exactly? Do we really want to assume that all
future architectures will have atomic operations?

regards, tom lane

#10

Andres Freund

andres@anarazel.de

about 2 years ago

In reply to: Tom Lane (#9)

Re: Remove last traces of HPPA support

Hi,

On 2023-10-20 15:59:42 -0400, Tom Lane wrote:

Andres Freund <andres@anarazel.de> writes:

In addition to the point Tom has made, I think it's also not correct that hppa
doesn't impose a burden: hppa is the only of our architectures that doesn't
actually support atomic operations, requiring us to have infrastructure to
backfill atomics using spinlocks. This does preclude some uses of atomics,
e.g. in signal handlers - I think Thomas wanted to do so for some concurrency
primitive.

Hmm, are you saying there's more of port/atomics/ that could be
removed? What exactly?

I was thinking we could remove the whole fallback path for atomic operations,
but it's a bit less, because we likely don't want to mandate support for 64bit
atomics yet. That'd still allow removing more than half of
src/include/port/atomics/fallback.h and src/backend/port/atomics.c - and more
if we finally decided to require a spinlock implementation.

Do we really want to assume that all future architectures will have atomic
operations?

Yes. Outside of the tiny microcontrollers, which obviously won't run postgres,
I cannot see any future architecture not having support for atomic operations.

Greetings,

Andres Freund

#11

Tom Lane

tgl@sss.pgh.pa.us

about 2 years ago

In reply to: Andres Freund (#10)

Re: Remove last traces of HPPA support

Andres Freund <andres@anarazel.de> writes:

On 2023-10-20 15:59:42 -0400, Tom Lane wrote:

Hmm, are you saying there's more of port/atomics/ that could be
removed? What exactly?

I was thinking we could remove the whole fallback path for atomic operations,
but it's a bit less, because we likely don't want to mandate support for 64bit
atomics yet.

Yeah. That'd be tantamount to desupporting 32-bit arches altogether,
I think. I'm not ready to go there yet.

That'd still allow removing more than half of
src/include/port/atomics/fallback.h and src/backend/port/atomics.c - and more
if we finally decided to require a spinlock implementation.

In the wake of 1c72d82c2, it seems likely that requiring some kind of
spinlock implementation is not such a big lift. Certainly, a machine
without that hasn't been a fit target for production in a very long
time, so maybe we should just drop that semaphore-based emulation.

Do we really want to assume that all future architectures will have atomic
operations?

Yes. Outside of the tiny microcontrollers, which obviously won't run postgres,
I cannot see any future architecture not having support for atomic operations.

I'd like to refine what that means a bit more. Are we assuming that
a machine providing any of the gcc atomic intrinsics (of a given
width) will provide all of them? Or is there a specific subset that
we can emulate the rest on top of?

regards, tom lane

#12

Andres Freund

andres@anarazel.de

about 2 years ago

In reply to: Tom Lane (#11)

Re: Remove last traces of HPPA support

Hi,

On 2023-10-20 17:46:59 -0400, Tom Lane wrote:

Andres Freund <andres@anarazel.de> writes:

On 2023-10-20 15:59:42 -0400, Tom Lane wrote:

Hmm, are you saying there's more of port/atomics/ that could be
removed? What exactly?

I was thinking we could remove the whole fallback path for atomic operations,
but it's a bit less, because we likely don't want to mandate support for 64bit
atomics yet.

Yeah. That'd be tantamount to desupporting 32-bit arches altogether,
I think. I'm not ready to go there yet.

It shouldn't be tantamount to that - many 32bit archs support 64bit atomic
operations. E.g. x86 supported it since the 586 (in 1993). However, arm only
addded them to 32 bit, in an extension, comparatively recently...

That'd still allow removing more than half of
src/include/port/atomics/fallback.h and src/backend/port/atomics.c - and more
if we finally decided to require a spinlock implementation.

In the wake of 1c72d82c2, it seems likely that requiring some kind of
spinlock implementation is not such a big lift. Certainly, a machine
without that hasn't been a fit target for production in a very long
time, so maybe we should just drop that semaphore-based emulation.

Yep. And the performance drop due to not having spinlock is also getting worse
over time, with CPU bound workloads having become a lot more common due to
larger amounts of memory and much much faster IO.

Do we really want to assume that all future architectures will have atomic
operations?

Yes. Outside of the tiny microcontrollers, which obviously won't run postgres,
I cannot see any future architecture not having support for atomic operations.

I'd like to refine what that means a bit more. Are we assuming that a
machine providing any of the gcc atomic intrinsics (of a given width) will
provide all of them? Or is there a specific subset that we can emulate the
rest on top of?

Right now we don't require that. As long as we know how to do atomic compare
exchange, we backfill all other atomic operations using compare-exchange -
albeit less efficiently (there's no retries for atomic-add when implemented
directly, but there are retries when using cmpxchg, the difference can be
significant under contention).

Practically speaking I think it's quite unlikely that a compiler + arch
combination will have only some intrinsics of some width - I think all
compilers have infrastructure to fall back to compare-exchange when there's no
dedicated atomic operation for some intrinsic.

Greetings,

Andres Freund

#13

Noah Misch

noah@leadboat.com

about 2 years ago

In reply to: Andres Freund (#8)

Re: Remove last traces of HPPA support

On Fri, Oct 20, 2023 at 12:40:00PM -0700, Andres Freund wrote:

On 2023-10-19 17:23:04 -0700, Noah Misch wrote:

On Thu, Oct 19, 2023 at 11:16:28AM -0400, Tom Lane wrote:

We removed support for the HP-UX OS in v16, but left in support
for the PA-RISC architecture, mainly because I thought that its
spinlock mechanism is weird enough to be a good stress test
for our spinlock infrastructure. It still is that, but my
one remaining HPPA machine has gone to the great recycle heap
in the sky. There seems little point in keeping around nominal
support for an architecture that we can't test and no one is
using anymore.

Hence, the attached removes the remaining support for HPPA.
Any objections?

I wouldn't do this. NetBSD/hppa still claims to exist, as does the OpenBSD
equivalent. I presume its pkgsrc compiles this code. The code is basically
zero-maintenance, so there's not much to gain from deleting it preemptively.

In addition to the point Tom has made, I think it's also not correct that hppa
doesn't impose a burden: hppa is the only of our architectures that doesn't
actually support atomic operations, requiring us to have infrastructure to
backfill atomics using spinlocks. This does preclude some uses of atomics,
e.g. in signal handlers - I think Thomas wanted to do so for some concurrency
primitive.

If the next thing is a patch removing half of the fallback atomics, that is a
solid reason to remove hppa. The code removed in the last proposed patch was
not that and was code that never changes, hence my reaction.

#14

Tom Lane

tgl@sss.pgh.pa.us

about 2 years ago

In reply to: Noah Misch (#13)

Re: Remove last traces of HPPA support

Noah Misch <noah@leadboat.com> writes:

If the next thing is a patch removing half of the fallback atomics, that is a
solid reason to remove hppa.

Agreed, though I don't think we have a clear proposal as to what
else to remove.

The code removed in the last proposed patch was
not that and was code that never changes, hence my reaction.

Mmm ... I'd agree that the relevant stanzas of s_lock.h/.c haven't
changed in a long time, but port/atomics/ is of considerably newer
vintage and is still receiving a fair amount of churn. Moreover,
much of what I proposed to remove from there is HPPA-only code with
exactly no parallel in other arches (specifically, the bits in
atomics/fallback.h). So I don't feel comfortable that it will
continue to work without benefit of testing. We're taking a risk
just hoping that it will continue to work in the back branches until
they hit EOL. Expecting that it'll continue to work going forward,
sans testing, seems like the height of folly.

regards, tom lane

#15

Andres Freund

andres@anarazel.de

about 2 years ago

In reply to: Tom Lane (#14)

Re: Remove last traces of HPPA support

Hi,

On 2023-10-20 22:06:55 -0400, Tom Lane wrote:

Noah Misch <noah@leadboat.com> writes:

If the next thing is a patch removing half of the fallback atomics, that is a
solid reason to remove hppa.

Agreed, though I don't think we have a clear proposal as to what
else to remove.

The code removed in the last proposed patch was
not that and was code that never changes, hence my reaction.

Mmm ... I'd agree that the relevant stanzas of s_lock.h/.c haven't
changed in a long time, but port/atomics/ is of considerably newer
vintage and is still receiving a fair amount of churn. Moreover,
much of what I proposed to remove from there is HPPA-only code with
exactly no parallel in other arches (specifically, the bits in
atomics/fallback.h). So I don't feel comfortable that it will
continue to work without benefit of testing. We're taking a risk
just hoping that it will continue to work in the back branches until
they hit EOL. Expecting that it'll continue to work going forward,
sans testing, seems like the height of folly.

It'd be one thing to continue supporting an almost-guaranteed-to-be-unused
platform, if we expected it to become more popular or complete enough to be
usable like e.g. risc-v a few years ago. But I doubt we'll find anybody out
there believing that there's a potential future upward trend for HPPA.

IMO a single person looking at HPPA code for a few minutes is a cost that more
than outweighs the potential benefits of continuing "supporting" this dead
arch. Even code that doesn't need to change has costs, particularly if it's
intermingled with actually important code (which spinlocks certainly are).

Greetings,

Andres Freund

#16

Tom Lane

tgl@sss.pgh.pa.us

about 2 years ago

In reply to: Andres Freund (#15)

Re: Remove last traces of HPPA support

Andres Freund <andres@anarazel.de> writes:

It'd be one thing to continue supporting an almost-guaranteed-to-be-unused
platform, if we expected it to become more popular or complete enough to be
usable like e.g. risc-v a few years ago. But I doubt we'll find anybody out
there believing that there's a potential future upward trend for HPPA.

Indeed. I would have bet that Postgres on HPPA was extinct in the wild,
until I noticed this message a few days ago:

/messages/by-id/BYAPR02MB42624ED41C15BFA82DAE2C359BD5A@BYAPR02MB4262.namprd02.prod.outlook.com

But we already cut that user off at the knees by removing HP-UX support.

The remaining argument for worrying about this architecture being in
use in the field is the idea that somebody is using it on top of
NetBSD or OpenBSD. But having used both of those systems (or tried
to), I feel absolutely confident in asserting that nobody is using
it in production today, let alone hoping to continue using it.

IMO a single person looking at HPPA code for a few minutes is a cost that more
than outweighs the potential benefits of continuing "supporting" this dead
arch. Even code that doesn't need to change has costs, particularly if it's
intermingled with actually important code (which spinlocks certainly are).

Yup, that. It's not zero cost to carry this stuff.

regards, tom lane

#17

Andres Freund

andres@anarazel.de

about 2 years ago

In reply to: Tom Lane (#16)

Re: Remove last traces of HPPA support

Hi,

On October 20, 2023 11:18:19 PM PDT, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Andres Freund <andres@anarazel.de> writes:

It'd be one thing to continue supporting an almost-guaranteed-to-be-unused
platform, if we expected it to become more popular or complete enough to be
usable like e.g. risc-v a few years ago. But I doubt we'll find anybody out
there believing that there's a potential future upward trend for HPPA.

Indeed. I would have bet that Postgres on HPPA was extinct in the wild,
until I noticed this message a few days ago:

/messages/by-id/BYAPR02MB42624ED41C15BFA82DAE2C359BD5A@BYAPR02MB4262.namprd02.prod.outlook.com

But we already cut that user off at the knees by removing HP-UX support.

Not that it matters really, but I'd assume that was hpux on ia64, not hppa?

Greetings,

Andres
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.

#18

Tom Lane

tgl@sss.pgh.pa.us

about 2 years ago

In reply to: Andres Freund (#17)

Re: Remove last traces of HPPA support

Andres Freund <andres@anarazel.de> writes:

On October 20, 2023 11:18:19 PM PDT, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Indeed. I would have bet that Postgres on HPPA was extinct in the wild,
until I noticed this message a few days ago:
/messages/by-id/BYAPR02MB42624ED41C15BFA82DAE2C359BD5A@BYAPR02MB4262.namprd02.prod.outlook.com
But we already cut that user off at the knees by removing HP-UX support.

Not that it matters really, but I'd assume that was hpux on ia64, not hppa?

Hmm, maybe ... impossible to tell from the given information, but ia64
was at least still in production till recently, so you might be right.

In any case, I heard no bleating when we nuked ia64 support.

regards, tom lane

#19

Noah Misch

noah@leadboat.com

over 1 year ago

In reply to: Tom Lane (#16)

Re: Remove last traces of HPPA support

On Sat, Oct 21, 2023 at 02:18:19AM -0400, Tom Lane wrote:

Andres Freund <andres@anarazel.de> writes:

It'd be one thing to continue supporting an almost-guaranteed-to-be-unused
platform, if we expected it to become more popular or complete enough to be
usable like e.g. risc-v a few years ago. But I doubt we'll find anybody out
there believing that there's a potential future upward trend for HPPA.

Indeed. I would have bet that Postgres on HPPA was extinct in the wild,
until I noticed this message a few days ago:

/messages/by-id/BYAPR02MB42624ED41C15BFA82DAE2C359BD5A@BYAPR02MB4262.namprd02.prod.outlook.com

But we already cut that user off at the knees by removing HP-UX support.

The remaining argument for worrying about this architecture being in
use in the field is the idea that somebody is using it on top of
NetBSD or OpenBSD. But having used both of those systems (or tried
to), I feel absolutely confident in asserting that nobody is using
it in production today, let alone hoping to continue using it.

IMO a single person looking at HPPA code for a few minutes is a cost that more
than outweighs the potential benefits of continuing "supporting" this dead
arch. Even code that doesn't need to change has costs, particularly if it's
intermingled with actually important code (which spinlocks certainly are).

Yup, that. It's not zero cost to carry this stuff.

+1 for dropping it.

#20

Tom Lane

tgl@sss.pgh.pa.us

over 1 year ago

In reply to: Noah Misch (#19)

Re: Remove last traces of HPPA support

Noah Misch <noah@leadboat.com> writes:

On Sat, Oct 21, 2023 at 02:18:19AM -0400, Tom Lane wrote:

Andres Freund <andres@anarazel.de> writes:

IMO a single person looking at HPPA code for a few minutes is a cost that more
than outweighs the potential benefits of continuing "supporting" this dead
arch. Even code that doesn't need to change has costs, particularly if it's
intermingled with actually important code (which spinlocks certainly are).

Yup, that. It's not zero cost to carry this stuff.

+1 for dropping it.

Done at commit edadeb0710.

regards, tom lane

#21

Thomas Munro

thomas.munro@gmail.com

over 1 year ago

In reply to: Tom Lane (#20)

3 attachment(s)

Re: Remove last traces of HPPA support

On Tue, Jul 2, 2024 at 5:56 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Done at commit edadeb0710.

Here are some experimental patches to try out some ideas mentioned
upthread, that are approximately unlocked by that cleanup.

1. We could get rid of --disable-spinlocks. It is hard to imagine a
hypothetical new port that would actually be a useful place to run
PostgreSQL where you can't implement spinlocks. (This one isn't
exactly unlocked by PA-RISC's departure, it's just tangled up with the
relevant cruft.)

2. We could get rid of --disable-atomics, and require at least 32 bit
lock-free (non-emulated) atomics. AFAIK there are no relevant systems
that don't have them. Hypothetical new systems would be unlikely to
omit them, unless they are eg embedded systems that don't intend to be
able to run an OS.

Personally I would like to do this, because I'd like to be able to use
pg_atomic_fetch_or_u32() in a SIGALRM handler in my
latchify-all-the-things patch (a stepping stone in the multi-threading
project as discussed in the Vancouver unconference). That's not
allowed if it might be a locking fallback. It's not strictly
necessary for my project, and I could find another way if I have to,
but when contemplating doing extra work to support imaginary computers
that I don't truly believe in... and since this general direction was
suggested already, both on this thread and in the comments in the
tree...

Once you decide to do #2, ie require atomics, perhaps you could also
implement spinlocks with them, rendering point #1 moot, and delete all
that hand-rolled TAS stuff. (Then you'd have spinlocks implemented
with flag/u32 atomics APIs, but potentially also u64 atomics
implemented with spinlocks! Circular, but not impossible AFAICT.
Assuming we can't require 64 bit lock-free atomics any time soon that
is, not considered). 🤯🤯🤯But maybe there are still good reasons to
have hand-rolled specialisations in some cases? I have not researched
that idea and eg compared the generated instructions... I do
appreciate that that code reflects a lot of accumulated wisdom and
experience that I don't claim to possess, and this bit is vapourware
anyway.

3. While tinkering with the above, and contemplating contact with
hypothetical future systems and even existing oddball systems, it
practically suggests itself that we could allow <stdatomic.h> as a way
of providing atomics (port/atomics.h clearly anticipated that, it was
just too soon). Note: that's different from requiring C11, but it
means that the new rule would be that your system should have *either*
C11 <stdatomic.h> or a hand-rolled implementation in port/atomics/*.h.
This is not a proposal, just an early stage experiment to test the
waters!

Some early thoughts about that, not fully explored:
* Since C11 uses funky generics, perhaps we might want to add some
type checks to make sure you don't accidentally confuse u32 and u64
somewhere.
* I couldn't immediately see how to use the standard atomic_flag for
our stuff due to lack of relaxed load, so it's falling back to the
generic u32 implementation (a small waste of space). atomic_bool or
atomic_char should work fine though, not tried. I guess
pg_atomic_flag might be a minor historical mistake, assuming it was
supposed to be just like the standard type of the same name. Or maybe
I'm missing something.
* The pg_spin_delay_impl() part definitely still needs hand-rolled
magic still when using <stdatomic.h> (I'm not aware of any standard
way to do that). But I'm not sure it even belongs in the "atomics"
headers anyway? It's not the same kind of thing, is it?
* The comments seem to imply that we need to tell the compiler not to
generate any code for read/write barriers on TSO systems (compiler
barrier only), but AFAICS the right thing happens anyway when coded as
standard acquire/release barriers. x86: nothing. ARM: something.
What am I missing?
* It'd be interesting to learn about anything else that modern tool
chains might do worse than our hand-rolled wisdom.
* Special support for Sun's compiler could be dropped if we could just
use their <stdatomic.h>. The same applies for MSVC 2022+ AFAICS, so
maybe in ~3 years from now we could drop the Windows-specific code.
* Uhh, yeah, so that would also apply to any modern GCC/Clang, so in
effect everyone would be using <stdatomic.h> except any hand-rolled
special bits that we decide to keep for performance reasons, and the
rest would become dead code and liable for garbage collection. So
that would amount to a confusing policy like: "we require
<stdatomic.h> with at least lock-free int in practice, but we'd
consider patches to add a non-C11-way to do this stuff if you invent a
new kind of computer/toolchain and refuse to support C11". Hmm. (I
have another version of this type of thinking happening in another
pending patch, the pg_threads.h one, more on that shortly...)

Attachments:

v1-0001-Remove-disable-spinlocks.patchtext/x-patch; charset=US-ASCII; name=v1-0001-Remove-disable-spinlocks.patchDownload

From fb79e823712b126d1701fe4aa8d8bd5c4def2e75 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Tue, 2 Jul 2024 14:47:59 +1200
Subject: [PATCH v1 1/3] Remove --disable-spinlocks.

The --disable-spinlocks build option was provided to allow new systems
unknown to s_lock.h/c to be brought up.  It is not expected to be
difficult to implement the required code for new systems.
---
 configure                               |  40 ------
 configure.ac                            |  13 --
 doc/src/sgml/installation.sgml          |  37 +----
 meson.build                             |   6 -
 src/backend/port/atomics.c              |  26 ----
 src/backend/postmaster/launch_backend.c |   9 --
 src/backend/storage/ipc/ipci.c          |  10 --
 src/backend/storage/lmgr/Makefile       |   1 -
 src/backend/storage/lmgr/meson.build    |   1 -
 src/backend/storage/lmgr/s_lock.c       |   2 +-
 src/backend/storage/lmgr/spin.c         | 180 ------------------------
 src/include/pg_config.h.in              |   3 -
 src/include/pg_config_manual.h          |  15 --
 src/include/port/atomics.h              |   4 +-
 src/include/storage/s_lock.h            |  39 +----
 src/include/storage/spin.h              |  18 +--
 src/test/regress/regress.c              |  86 -----------
 17 files changed, 10 insertions(+), 480 deletions(-)
 delete mode 100644 src/backend/storage/lmgr/spin.c

diff --git a/configure b/configure
index 76f06bd8fda..3a577f96e8f 100755
--- a/configure
+++ b/configure
@@ -836,7 +836,6 @@ enable_integer_datetimes
 enable_nls
 with_pgport
 enable_rpath
-enable_spinlocks
 enable_atomics
 enable_debug
 enable_profiling
@@ -1529,7 +1528,6 @@ Optional Features:
                           enable Native Language Support
   --disable-rpath         do not embed shared library search path in
                           executables
-  --disable-spinlocks     do not use spinlocks
   --disable-atomics       do not use atomic operations
   --enable-debug          build with debugging symbols (-g)
   --enable-profiling      build with profiling enabled
@@ -3266,33 +3264,6 @@ fi
 
 
 
-#
-# Spinlocks
-#
-
-
-# Check whether --enable-spinlocks was given.
-if test "${enable_spinlocks+set}" = set; then :
-  enableval=$enable_spinlocks;
-  case $enableval in
-    yes)
-      :
-      ;;
-    no)
-      :
-      ;;
-    *)
-      as_fn_error $? "no argument expected for --enable-spinlocks option" "$LINENO" 5
-      ;;
-  esac
-
-else
-  enable_spinlocks=yes
-
-fi
-
-
-
 #
 # Atomic operations
 #
@@ -12185,17 +12156,6 @@ fi
 
 fi
 
-if test "$enable_spinlocks" = yes; then
-
-$as_echo "#define HAVE_SPINLOCKS 1" >>confdefs.h
-
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING:
-*** Not using spinlocks will cause poor performance." >&5
-$as_echo "$as_me: WARNING:
-*** Not using spinlocks will cause poor performance." >&2;}
-fi
-
 if test "$enable_atomics" = yes; then
 
 $as_echo "#define HAVE_ATOMICS 1" >>confdefs.h
diff --git a/configure.ac b/configure.ac
index ab2d51c21ce..2dd8c1613fb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -186,12 +186,6 @@ PGAC_ARG_BOOL(enable, rpath, yes,
               [do not embed shared library search path in executables])
 AC_SUBST(enable_rpath)
 
-#
-# Spinlocks
-#
-PGAC_ARG_BOOL(enable, spinlocks, yes,
-              [do not use spinlocks])
-
 #
 # Atomic operations
 #
@@ -1296,13 +1290,6 @@ failure.  It is possible the compiler isn't looking in the proper directory.
 Use --without-zlib to disable zlib support.])])
 fi
 
-if test "$enable_spinlocks" = yes; then
-  AC_DEFINE(HAVE_SPINLOCKS, 1, [Define to 1 if you have spinlocks.])
-else
-  AC_MSG_WARN([
-*** Not using spinlocks will cause poor performance.])
-fi
-
 if test "$enable_atomics" = yes; then
   AC_DEFINE(HAVE_ATOMICS, 1, [Define to 1 if you want to use atomics if available.])
 else
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index 4784834ab9f..3f19f272b17 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -1258,22 +1258,6 @@ build-postgresql:
        </listitem>
       </varlistentry>
 
-      <varlistentry id="configure-option-disable-spinlocks">
-       <term><option>--disable-spinlocks</option></term>
-       <listitem>
-        <para>
-         Allow the build to succeed even if <productname>PostgreSQL</productname>
-         has no CPU spinlock support for the platform.  The lack of
-         spinlock support will result in very poor performance; therefore,
-         this option should only be used if the build aborts and
-         informs you that the platform lacks spinlock support. If this
-         option is required to build <productname>PostgreSQL</productname> on
-         your platform, please report the problem to the
-         <productname>PostgreSQL</productname> developers.
-        </para>
-       </listitem>
-      </varlistentry>
-
       <varlistentry id="configure-option-disable-atomics">
        <term><option>--disable-atomics</option></term>
        <listitem>
@@ -2690,23 +2674,6 @@ ninja install
       </listitem>
      </varlistentry>
 
-     <varlistentry id="configure-spinlocks-meson">
-      <term><option>-Dspinlocks={ true | false }</option></term>
-      <listitem>
-       <para>
-        This option is set to true by default; setting it to false will
-        allow the build to succeed even if <productname>PostgreSQL</productname>
-        has no CPU spinlock support for the platform.  The lack of
-        spinlock support will result in very poor performance; therefore,
-        this option should only be changed if the build aborts and
-        informs you that the platform lacks spinlock support. If setting this
-        option to false is required to build <productname>PostgreSQL</productname> on
-        your platform, please report the problem to the
-        <productname>PostgreSQL</productname> developers.
-       </para>
-      </listitem>
-     </varlistentry>
-
      <varlistentry id="configure-atomics-meson">
       <term><option>-Datomics={ true | false }</option></term>
       <listitem>
@@ -2719,6 +2686,7 @@ ninja install
        </para>
       </listitem>
      </varlistentry>
+
     </variablelist>
    </sect3>
 
@@ -3393,9 +3361,6 @@ export MANPATH
    these CPU architectures: x86, PowerPC, S/390, SPARC, ARM, MIPS,
    and RISC-V, including
    big-endian, little-endian, 32-bit, and 64-bit variants where applicable.
-   It is often
-   possible to build on an unsupported CPU type by configuring with
-   <option>--disable-spinlocks</option>, but performance will be poor.
   </para>
 
   <para>
diff --git a/meson.build b/meson.build
index 5387bb6d5fd..0d569e7a240 100644
--- a/meson.build
+++ b/meson.build
@@ -1983,12 +1983,6 @@ endif
 # Atomics
 ###############################################################
 
-if not get_option('spinlocks')
-  warning('Not using spinlocks will cause poor performance')
-else
-  cdata.set('HAVE_SPINLOCKS', 1)
-endif
-
 if not get_option('atomics')
   warning('Not using atomics will cause poor performance')
 else
diff --git a/src/backend/port/atomics.c b/src/backend/port/atomics.c
index 93789b4e058..cd7ede96726 100644
--- a/src/backend/port/atomics.c
+++ b/src/backend/port/atomics.c
@@ -57,17 +57,7 @@ pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
 	StaticAssertDecl(sizeof(ptr->sema) >= sizeof(slock_t),
 					 "size mismatch of atomic_flag vs slock_t");
 
-#ifndef HAVE_SPINLOCKS
-
-	/*
-	 * NB: If we're using semaphore based TAS emulation, be careful to use a
-	 * separate set of semaphores. Otherwise we'd get in trouble if an atomic
-	 * var would be manipulated while spinlock is held.
-	 */
-	s_init_lock_sema((slock_t *) &ptr->sema, true);
-#else
 	SpinLockInit((slock_t *) &ptr->sema);
-#endif
 
 	ptr->value = false;
 }
@@ -108,15 +98,7 @@ pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_)
 	StaticAssertDecl(sizeof(ptr->sema) >= sizeof(slock_t),
 					 "size mismatch of atomic_uint32 vs slock_t");
 
-	/*
-	 * If we're using semaphore based atomic flags, be careful about nested
-	 * usage of atomics while a spinlock is held.
-	 */
-#ifndef HAVE_SPINLOCKS
-	s_init_lock_sema((slock_t *) &ptr->sema, true);
-#else
 	SpinLockInit((slock_t *) &ptr->sema);
-#endif
 	ptr->value = val_;
 }
 
@@ -184,15 +166,7 @@ pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_)
 	StaticAssertDecl(sizeof(ptr->sema) >= sizeof(slock_t),
 					 "size mismatch of atomic_uint64 vs slock_t");
 
-	/*
-	 * If we're using semaphore based atomic flags, be careful about nested
-	 * usage of atomics while a spinlock is held.
-	 */
-#ifndef HAVE_SPINLOCKS
-	s_init_lock_sema((slock_t *) &ptr->sema, true);
-#else
 	SpinLockInit((slock_t *) &ptr->sema);
-#endif
 	ptr->value = val_;
 }
 
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index bdfa238e4fe..04221a55e01 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -104,9 +104,6 @@ typedef struct
 	void	   *UsedShmemSegAddr;
 	slock_t    *ShmemLock;
 	struct bkend *ShmemBackendArray;
-#ifndef HAVE_SPINLOCKS
-	PGSemaphore *SpinlockSemaArray;
-#endif
 	int			NamedLWLockTrancheRequests;
 	NamedLWLockTranche *NamedLWLockTrancheArray;
 	LWLockPadded *MainLWLockArray;
@@ -722,9 +719,6 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock,
 	param->ShmemLock = ShmemLock;
 	param->ShmemBackendArray = ShmemBackendArray;
 
-#ifndef HAVE_SPINLOCKS
-	param->SpinlockSemaArray = SpinlockSemaArray;
-#endif
 	param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests;
 	param->NamedLWLockTrancheArray = NamedLWLockTrancheArray;
 	param->MainLWLockArray = MainLWLockArray;
@@ -980,9 +974,6 @@ restore_backend_variables(BackendParameters *param)
 	ShmemLock = param->ShmemLock;
 	ShmemBackendArray = param->ShmemBackendArray;
 
-#ifndef HAVE_SPINLOCKS
-	SpinlockSemaArray = param->SpinlockSemaArray;
-#endif
 	NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests;
 	NamedLWLockTrancheArray = param->NamedLWLockTrancheArray;
 	MainLWLockArray = param->MainLWLockArray;
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 2100150f01c..921a93588f6 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -94,7 +94,6 @@ CalculateShmemSize(int *num_semaphores)
 
 	/* Compute number of semaphores we'll need */
 	numSemas = ProcGlobalSemas();
-	numSemas += SpinlockSemas();
 
 	/* Return the number of semaphores if requested by the caller */
 	if (num_semaphores)
@@ -111,7 +110,6 @@ CalculateShmemSize(int *num_semaphores)
 	 */
 	size = 100000;
 	size = add_size(size, PGSemaphoreShmemSize(numSemas));
-	size = add_size(size, SpinlockSemaSize());
 	size = add_size(size, hash_estimate_size(SHMEM_INDEX_SIZE,
 											 sizeof(ShmemIndexEnt)));
 	size = add_size(size, dsm_estimate_size());
@@ -228,14 +226,6 @@ CreateSharedMemoryAndSemaphores(void)
 	 */
 	PGReserveSemaphores(numSemas);
 
-	/*
-	 * If spinlocks are disabled, initialize emulation layer (which depends on
-	 * semaphores, so the order is important here).
-	 */
-#ifndef HAVE_SPINLOCKS
-	SpinlockSemaInit();
-#endif
-
 	/*
 	 * Set up shared memory allocation mechanism
 	 */
diff --git a/src/backend/storage/lmgr/Makefile b/src/backend/storage/lmgr/Makefile
index 3f89548bde6..6cbaf23b855 100644
--- a/src/backend/storage/lmgr/Makefile
+++ b/src/backend/storage/lmgr/Makefile
@@ -21,7 +21,6 @@ OBJS = \
 	predicate.o \
 	proc.o \
 	s_lock.o \
-	spin.o
 
 include $(top_srcdir)/src/backend/common.mk
 
diff --git a/src/backend/storage/lmgr/meson.build b/src/backend/storage/lmgr/meson.build
index 05ac41e809a..d43511925e1 100644
--- a/src/backend/storage/lmgr/meson.build
+++ b/src/backend/storage/lmgr/meson.build
@@ -9,5 +9,4 @@ backend_sources += files(
   'predicate.c',
   'proc.c',
   's_lock.c',
-  'spin.c',
 )
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index cba48b3e778..69549a65dba 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -1,7 +1,7 @@
 /*-------------------------------------------------------------------------
  *
  * s_lock.c
- *	   Hardware-dependent implementation of spinlocks.
+ *	   Implementation of spinlocks.
  *
  * When waiting for a contended spinlock we loop tightly for awhile, then
  * delay using pg_usleep() and try again.  Preferably, "awhile" should be a
diff --git a/src/backend/storage/lmgr/spin.c b/src/backend/storage/lmgr/spin.c
deleted file mode 100644
index 50cb99cd3b6..00000000000
--- a/src/backend/storage/lmgr/spin.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * spin.c
- *	   Hardware-independent implementation of spinlocks.
- *
- *
- * For machines that have test-and-set (TAS) instructions, s_lock.h/.c
- * define the spinlock implementation.  This file contains only a stub
- * implementation for spinlocks using PGSemaphores.  Unless semaphores
- * are implemented in a way that doesn't involve a kernel call, this
- * is too slow to be very useful :-(
- *
- *
- * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *
- * IDENTIFICATION
- *	  src/backend/storage/lmgr/spin.c
- *
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-
-#include "storage/pg_sema.h"
-#include "storage/shmem.h"
-#include "storage/spin.h"
-
-
-#ifndef HAVE_SPINLOCKS
-
-/*
- * No TAS, so spinlocks are implemented as PGSemaphores.
- */
-
-#ifndef HAVE_ATOMICS
-#define NUM_EMULATION_SEMAPHORES (NUM_SPINLOCK_SEMAPHORES + NUM_ATOMICS_SEMAPHORES)
-#else
-#define NUM_EMULATION_SEMAPHORES (NUM_SPINLOCK_SEMAPHORES)
-#endif							/* HAVE_ATOMICS */
-
-PGSemaphore *SpinlockSemaArray;
-
-#else							/* !HAVE_SPINLOCKS */
-
-#define NUM_EMULATION_SEMAPHORES 0
-
-#endif							/* HAVE_SPINLOCKS */
-
-/*
- * Report the amount of shared memory needed to store semaphores for spinlock
- * support.
- */
-Size
-SpinlockSemaSize(void)
-{
-	return NUM_EMULATION_SEMAPHORES * sizeof(PGSemaphore);
-}
-
-/*
- * Report number of semaphores needed to support spinlocks.
- */
-int
-SpinlockSemas(void)
-{
-	return NUM_EMULATION_SEMAPHORES;
-}
-
-#ifndef HAVE_SPINLOCKS
-
-/*
- * Initialize spinlock emulation.
- *
- * This must be called after PGReserveSemaphores().
- */
-void
-SpinlockSemaInit(void)
-{
-	PGSemaphore *spinsemas;
-	int			nsemas = SpinlockSemas();
-	int			i;
-
-	/*
-	 * We must use ShmemAllocUnlocked(), since the spinlock protecting
-	 * ShmemAlloc() obviously can't be ready yet.
-	 */
-	spinsemas = (PGSemaphore *) ShmemAllocUnlocked(SpinlockSemaSize());
-	for (i = 0; i < nsemas; ++i)
-		spinsemas[i] = PGSemaphoreCreate();
-	SpinlockSemaArray = spinsemas;
-}
-
-/*
- * s_lock.h hardware-spinlock emulation using semaphores
- *
- * We map all spinlocks onto NUM_EMULATION_SEMAPHORES semaphores.  It's okay to
- * map multiple spinlocks onto one semaphore because no process should ever
- * hold more than one at a time.  We just need enough semaphores so that we
- * aren't adding too much extra contention from that.
- *
- * There is one exception to the restriction of only holding one spinlock at a
- * time, which is that it's ok if emulated atomic operations are nested inside
- * spinlocks. To avoid the danger of spinlocks and atomic using the same sema,
- * we make sure "normal" spinlocks and atomics backed by spinlocks use
- * distinct semaphores (see the nested argument to s_init_lock_sema).
- *
- * slock_t is just an int for this implementation; it holds the spinlock
- * number from 1..NUM_EMULATION_SEMAPHORES.  We intentionally ensure that 0
- * is not a valid value, so that testing with this code can help find
- * failures to initialize spinlocks.
- */
-
-static inline void
-s_check_valid(int lockndx)
-{
-	if (unlikely(lockndx <= 0 || lockndx > NUM_EMULATION_SEMAPHORES))
-		elog(ERROR, "invalid spinlock number: %d", lockndx);
-}
-
-void
-s_init_lock_sema(volatile slock_t *lock, bool nested)
-{
-	static uint32 counter = 0;
-	uint32		offset;
-	uint32		sema_total;
-	uint32		idx;
-
-	if (nested)
-	{
-		/*
-		 * To allow nesting atomics inside spinlocked sections, use a
-		 * different spinlock. See comment above.
-		 */
-		offset = 1 + NUM_SPINLOCK_SEMAPHORES;
-		sema_total = NUM_ATOMICS_SEMAPHORES;
-	}
-	else
-	{
-		offset = 1;
-		sema_total = NUM_SPINLOCK_SEMAPHORES;
-	}
-
-	idx = (counter++ % sema_total) + offset;
-
-	/* double check we did things correctly */
-	s_check_valid(idx);
-
-	*lock = idx;
-}
-
-void
-s_unlock_sema(volatile slock_t *lock)
-{
-	int			lockndx = *lock;
-
-	s_check_valid(lockndx);
-
-	PGSemaphoreUnlock(SpinlockSemaArray[lockndx - 1]);
-}
-
-bool
-s_lock_free_sema(volatile slock_t *lock)
-{
-	/* We don't currently use S_LOCK_FREE anyway */
-	elog(ERROR, "spin.c does not support S_LOCK_FREE()");
-	return false;
-}
-
-int
-tas_sema(volatile slock_t *lock)
-{
-	int			lockndx = *lock;
-
-	s_check_valid(lockndx);
-
-	/* Note that TAS macros return 0 if *success* */
-	return !PGSemaphoreTryLock(SpinlockSemaArray[lockndx - 1]);
-}
-
-#endif							/* !HAVE_SPINLOCKS */
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index f8d3e3b6b84..ed616278b14 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -378,9 +378,6 @@
 /* Define to 1 if the system has the type `socklen_t'. */
 #undef HAVE_SOCKLEN_T
 
-/* Define to 1 if you have spinlocks. */
-#undef HAVE_SPINLOCKS
-
 /* Define to 1 if you have the `SSL_CTX_set_cert_cb' function. */
 #undef HAVE_SSL_CTX_SET_CERT_CB
 
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index f941ee2faf8..11f74f4b56d 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -86,21 +86,6 @@
 #define USE_FLOAT8_BYVAL 1
 #endif
 
-/*
- * When we don't have native spinlocks, we use semaphores to simulate them.
- * Decreasing this value reduces consumption of OS resources; increasing it
- * may improve performance, but supplying a real spinlock implementation is
- * probably far better.
- */
-#define NUM_SPINLOCK_SEMAPHORES		128
-
-/*
- * When we have neither spinlocks nor atomic operations support we're
- * implementing atomic operations on top of spinlock on top of semaphores. To
- * be safe against atomic operations while holding a spinlock separate
- * semaphores have to be used.
- */
-#define NUM_ATOMICS_SEMAPHORES		64
 
 /*
  * MAXPGPATH: standard size of a pathname buffer in PostgreSQL (hence,
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index c911c6b9564..f92a9f62ba4 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -16,8 +16,8 @@
  *
  * There exist generic, hardware independent, implementations for several
  * compilers which might be sufficient, although possibly not optimal, for a
- * new platform. If no such generic implementation is available spinlocks (or
- * even OS provided semaphores) will be used to implement the API.
+ * new platform. If no such generic implementation is available spinlocks will
+ * be used to implement the API.
  *
  * Implement _u64 atomics if and only if your platform can use them
  * efficiently (and obviously correctly).
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 02c68513a53..e94ed5f48bd 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -1,10 +1,10 @@
 /*-------------------------------------------------------------------------
  *
  * s_lock.h
- *	   Hardware-dependent implementation of spinlocks.
+ *	   Implementation of spinlocks.
  *
  *	NOTE: none of the macros in this file are intended to be called directly.
- *	Call them through the hardware-independent macros in spin.h.
+ *	Call them through the macros in spin.h.
  *
  *	The following hardware-dependent macros must be provided for each
  *	supported platform:
@@ -78,13 +78,6 @@
  *	in assembly language to execute a hardware atomic-test-and-set
  *	instruction.  Equivalent OS-supplied mutex routines could be used too.
  *
- *	If no system-specific TAS() is available (ie, HAVE_SPINLOCKS is not
- *	defined), then we fall back on an emulation that uses SysV semaphores
- *	(see spin.c).  This emulation will be MUCH MUCH slower than a proper TAS()
- *	implementation, because of the cost of a kernel call per lock or unlock.
- *	An old report is that Postgres spends around 40% of its time in semop(2)
- *	when using the SysV semaphore code.
- *
  *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -100,8 +93,6 @@
 #error "s_lock.h may not be included from frontend code"
 #endif
 
-#ifdef HAVE_SPINLOCKS	/* skip spinlocks if requested */
-
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
 /*************************************************************************
  * All the gcc inlines
@@ -655,34 +646,10 @@ spin_delay(void)
 
 /* Blow up if we didn't have any way to do spinlocks */
 #ifndef HAS_TEST_AND_SET
-#error PostgreSQL does not have native spinlock support on this platform.  To continue the compilation, rerun configure using --disable-spinlocks.  However, performance will be poor.  Please report this to pgsql-bugs@lists.postgresql.org.
+#error PostgreSQL does not have spinlock support on this platform.  Please report this to pgsql-bugs@lists.postgresql.org.
 #endif
 
 
-#else	/* !HAVE_SPINLOCKS */
-
-
-/*
- * Fake spinlock implementation using semaphores --- slow and prone
- * to fall foul of kernel limits on number of semaphores, so don't use this
- * unless you must!  The subroutines appear in spin.c.
- */
-typedef int slock_t;
-
-extern bool s_lock_free_sema(volatile slock_t *lock);
-extern void s_unlock_sema(volatile slock_t *lock);
-extern void s_init_lock_sema(volatile slock_t *lock, bool nested);
-extern int	tas_sema(volatile slock_t *lock);
-
-#define S_LOCK_FREE(lock)	s_lock_free_sema(lock)
-#define S_UNLOCK(lock)	 s_unlock_sema(lock)
-#define S_INIT_LOCK(lock)	s_init_lock_sema(lock, false)
-#define TAS(lock)	tas_sema(lock)
-
-
-#endif	/* HAVE_SPINLOCKS */
-
-
 /*
  * Default Definitions - override these above as needed.
  */
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index c0679c59992..3ae2a56d073 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -1,11 +1,11 @@
 /*-------------------------------------------------------------------------
  *
  * spin.h
- *	   Hardware-independent implementation of spinlocks.
+ *	   API for spinlocks.
  *
  *
- *	The hardware-independent interface to spinlocks is defined by the
- *	typedef "slock_t" and these macros:
+ *	The interface to spinlocks is defined by the typedef "slock_t" and
+ *	these macros:
  *
  *	void SpinLockInit(volatile slock_t *lock)
  *		Initialize a spinlock (to the unlocked state).
@@ -52,9 +52,6 @@
 #define SPIN_H
 
 #include "storage/s_lock.h"
-#ifndef HAVE_SPINLOCKS
-#include "storage/pg_sema.h"
-#endif
 
 
 #define SpinLockInit(lock)	S_INIT_LOCK(lock)
@@ -65,13 +62,4 @@
 
 #define SpinLockFree(lock)	S_LOCK_FREE(lock)
 
-
-extern int	SpinlockSemas(void);
-extern Size SpinlockSemaSize(void);
-
-#ifndef HAVE_SPINLOCKS
-extern void SpinlockSemaInit(void);
-extern PGDLLIMPORT PGSemaphore *SpinlockSemaArray;
-#endif
-
 #endif							/* SPIN_H */
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 45a6ad3c49e..14aad5a0c6e 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -887,91 +887,7 @@ test_spinlock(void)
 		if (memcmp(struct_w_lock.data_after, "ef12", 4) != 0)
 			elog(ERROR, "padding after spinlock modified");
 	}
-
-	/*
-	 * Ensure that allocating more than INT32_MAX emulated spinlocks works.
-	 * That's interesting because the spinlock emulation uses a 32bit integer
-	 * to map spinlocks onto semaphores. There've been bugs...
-	 */
-#ifndef HAVE_SPINLOCKS
-	{
-		/*
-		 * Initialize enough spinlocks to advance counter close to wraparound.
-		 * It's too expensive to perform acquire/release for each, as those
-		 * may be syscalls when the spinlock emulation is used (and even just
-		 * atomic TAS would be expensive).
-		 */
-		for (uint32 i = 0; i < INT32_MAX - 100000; i++)
-		{
-			slock_t		lock;
-
-			SpinLockInit(&lock);
-		}
-
-		for (uint32 i = 0; i < 200000; i++)
-		{
-			slock_t		lock;
-
-			SpinLockInit(&lock);
-
-			SpinLockAcquire(&lock);
-			SpinLockRelease(&lock);
-			SpinLockAcquire(&lock);
-			SpinLockRelease(&lock);
-		}
-	}
-#endif
-}
-
-/*
- * Verify that performing atomic ops inside a spinlock isn't a
- * problem. Realistically that's only going to be a problem when both
- * --disable-spinlocks and --disable-atomics are used, but it's cheap enough
- * to just always test.
- *
- * The test works by initializing enough atomics that we'd conflict if there
- * were an overlap between a spinlock and an atomic by holding a spinlock
- * while manipulating more than NUM_SPINLOCK_SEMAPHORES atomics.
- *
- * NUM_TEST_ATOMICS doesn't really need to be more than
- * NUM_SPINLOCK_SEMAPHORES, but it seems better to test a bit more
- * extensively.
- */
-static void
-test_atomic_spin_nest(void)
-{
-	slock_t		lock;
-#define NUM_TEST_ATOMICS (NUM_SPINLOCK_SEMAPHORES + NUM_ATOMICS_SEMAPHORES + 27)
-	pg_atomic_uint32 atomics32[NUM_TEST_ATOMICS];
-	pg_atomic_uint64 atomics64[NUM_TEST_ATOMICS];
-
-	SpinLockInit(&lock);
-
-	for (int i = 0; i < NUM_TEST_ATOMICS; i++)
-	{
-		pg_atomic_init_u32(&atomics32[i], 0);
-		pg_atomic_init_u64(&atomics64[i], 0);
-	}
-
-	/* just so it's not all zeroes */
-	for (int i = 0; i < NUM_TEST_ATOMICS; i++)
-	{
-		EXPECT_EQ_U32(pg_atomic_fetch_add_u32(&atomics32[i], i), 0);
-		EXPECT_EQ_U64(pg_atomic_fetch_add_u64(&atomics64[i], i), 0);
-	}
-
-	/* test whether we can do atomic op with lock held */
-	SpinLockAcquire(&lock);
-	for (int i = 0; i < NUM_TEST_ATOMICS; i++)
-	{
-		EXPECT_EQ_U32(pg_atomic_fetch_sub_u32(&atomics32[i], i), i);
-		EXPECT_EQ_U32(pg_atomic_read_u32(&atomics32[i]), 0);
-		EXPECT_EQ_U64(pg_atomic_fetch_sub_u64(&atomics64[i], i), i);
-		EXPECT_EQ_U64(pg_atomic_read_u64(&atomics64[i]), 0);
-	}
-	SpinLockRelease(&lock);
 }
-#undef NUM_TEST_ATOMICS
 
 PG_FUNCTION_INFO_V1(test_atomic_ops);
 Datum
@@ -989,8 +905,6 @@ test_atomic_ops(PG_FUNCTION_ARGS)
 	 */
 	test_spinlock();
 
-	test_atomic_spin_nest();
-
 	PG_RETURN_BOOL(true);
 }
 
-- 
2.45.2

v1-0002-Remove-disable-atomics-require-32-bit-atomics.patchtext/x-patch; charset=US-ASCII; name=v1-0002-Remove-disable-atomics-require-32-bit-atomics.patchDownload

From 42f94a323aefffb86bbe02cbb15841d0a93b0abd Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Tue, 2 Jul 2024 15:13:00 +1200
Subject: [PATCH v1 2/3] Remove --disable-atomics, require 32 bit atomics.

All target systems have real atomics support.  We're not yet ready to
use C11 <stdatomic.h>, but since edadeb07 there is no remaining reason
to carry code that simulates atomic flag and uint32 imperfectly with
spinlocks.
---
 configure                                 |  40 --------
 configure.ac                              |  13 ---
 doc/src/sgml/installation.sgml            |  25 -----
 meson.build                               |  65 ++++++-------
 src/backend/port/atomics.c                | 109 ----------------------
 src/include/pg_config.h.in                |   3 -
 src/include/port/atomics.h                |   7 +-
 src/include/port/atomics/arch-x86.h       |   8 --
 src/include/port/atomics/fallback.h       |  87 +----------------
 src/include/port/atomics/generic-gcc.h    |   4 -
 src/include/port/atomics/generic-msvc.h   |   4 -
 src/include/port/atomics/generic-sunpro.h |   8 --
 12 files changed, 32 insertions(+), 341 deletions(-)

diff --git a/configure b/configure
index 3a577f96e8f..4c73e46e246 100755
--- a/configure
+++ b/configure
@@ -836,7 +836,6 @@ enable_integer_datetimes
 enable_nls
 with_pgport
 enable_rpath
-enable_atomics
 enable_debug
 enable_profiling
 enable_coverage
@@ -1528,7 +1527,6 @@ Optional Features:
                           enable Native Language Support
   --disable-rpath         do not embed shared library search path in
                           executables
-  --disable-atomics       do not use atomic operations
   --enable-debug          build with debugging symbols (-g)
   --enable-profiling      build with profiling enabled
   --enable-coverage       build with coverage testing instrumentation
@@ -3264,33 +3262,6 @@ fi
 
 
 
-#
-# Atomic operations
-#
-
-
-# Check whether --enable-atomics was given.
-if test "${enable_atomics+set}" = set; then :
-  enableval=$enable_atomics;
-  case $enableval in
-    yes)
-      :
-      ;;
-    no)
-      :
-      ;;
-    *)
-      as_fn_error $? "no argument expected for --enable-atomics option" "$LINENO" 5
-      ;;
-  esac
-
-else
-  enable_atomics=yes
-
-fi
-
-
-
 #
 # --enable-debug adds -g to compiler flags
 #
@@ -12156,17 +12127,6 @@ fi
 
 fi
 
-if test "$enable_atomics" = yes; then
-
-$as_echo "#define HAVE_ATOMICS 1" >>confdefs.h
-
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING:
-*** Not using atomic operations will cause poor performance." >&5
-$as_echo "$as_me: WARNING:
-*** Not using atomic operations will cause poor performance." >&2;}
-fi
-
 if test "$with_gssapi" = yes ; then
   if test "$PORTNAME" != "win32"; then
     { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing gss_store_cred_into" >&5
diff --git a/configure.ac b/configure.ac
index 2dd8c1613fb..8ee3d2cabb9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -186,12 +186,6 @@ PGAC_ARG_BOOL(enable, rpath, yes,
               [do not embed shared library search path in executables])
 AC_SUBST(enable_rpath)
 
-#
-# Atomic operations
-#
-PGAC_ARG_BOOL(enable, atomics, yes,
-              [do not use atomic operations])
-
 #
 # --enable-debug adds -g to compiler flags
 #
@@ -1290,13 +1284,6 @@ failure.  It is possible the compiler isn't looking in the proper directory.
 Use --without-zlib to disable zlib support.])])
 fi
 
-if test "$enable_atomics" = yes; then
-  AC_DEFINE(HAVE_ATOMICS, 1, [Define to 1 if you want to use atomics if available.])
-else
-  AC_MSG_WARN([
-*** Not using atomic operations will cause poor performance.])
-fi
-
 if test "$with_gssapi" = yes ; then
   if test "$PORTNAME" != "win32"; then
     AC_SEARCH_LIBS(gss_store_cred_into, [gssapi_krb5 gss 'gssapi -lkrb5 -lcrypto'], [],
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index 3f19f272b17..4ab8ddba7c1 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -1258,18 +1258,6 @@ build-postgresql:
        </listitem>
       </varlistentry>
 
-      <varlistentry id="configure-option-disable-atomics">
-       <term><option>--disable-atomics</option></term>
-       <listitem>
-        <para>
-         Disable use of CPU atomic operations.  This option does nothing on
-         platforms that lack such operations.  On platforms that do have
-         them, this will result in poor performance.  This option is only
-         useful for debugging or making performance comparisons.
-        </para>
-       </listitem>
-      </varlistentry>
-
      </variablelist>
 
    </sect3>
@@ -2674,19 +2662,6 @@ ninja install
       </listitem>
      </varlistentry>
 
-     <varlistentry id="configure-atomics-meson">
-      <term><option>-Datomics={ true | false }</option></term>
-      <listitem>
-       <para>
-        This option is set to true by default; setting it to false will
-        disable use of CPU atomic operations.  The option does nothing on
-        platforms that lack such operations.  On platforms that do have
-        them, disabling atomics will result in poor performance.  Changing
-        this option is only useful for debugging or making performance comparisons.
-       </para>
-      </listitem>
-     </varlistentry>
-
     </variablelist>
    </sect3>
 
diff --git a/meson.build b/meson.build
index 0d569e7a240..a382119528c 100644
--- a/meson.build
+++ b/meson.build
@@ -1983,70 +1983,61 @@ endif
 # Atomics
 ###############################################################
 
-if not get_option('atomics')
-  warning('Not using atomics will cause poor performance')
-else
-  # XXX: perhaps we should require some atomics support in this case these
-  # days?
-  cdata.set('HAVE_ATOMICS', 1)
-
-  atomic_checks = [
-    {'name': 'HAVE_GCC__SYNC_CHAR_TAS',
-     'desc': '__sync_lock_test_and_set(char)',
-     'test': '''
+atomic_checks = [
+  {'name': 'HAVE_GCC__SYNC_CHAR_TAS',
+   'desc': '__sync_lock_test_and_set(char)',
+   'test': '''
 char lock = 0;
 __sync_lock_test_and_set(&lock, 1);
 __sync_lock_release(&lock);'''},
 
-    {'name': 'HAVE_GCC__SYNC_INT32_TAS',
-     'desc': '__sync_lock_test_and_set(int32)',
-     'test': '''
+  {'name': 'HAVE_GCC__SYNC_INT32_TAS',
+   'desc': '__sync_lock_test_and_set(int32)',
+   'test': '''
 int lock = 0;
 __sync_lock_test_and_set(&lock, 1);
 __sync_lock_release(&lock);'''},
 
-    {'name': 'HAVE_GCC__SYNC_INT32_CAS',
-     'desc': '__sync_val_compare_and_swap(int32)',
-     'test': '''
+  {'name': 'HAVE_GCC__SYNC_INT32_CAS',
+   'desc': '__sync_val_compare_and_swap(int32)',
+   'test': '''
 int val = 0;
 __sync_val_compare_and_swap(&val, 0, 37);'''},
 
-    {'name': 'HAVE_GCC__SYNC_INT64_CAS',
-     'desc': '__sync_val_compare_and_swap(int64)',
-     'test': '''
+  {'name': 'HAVE_GCC__SYNC_INT64_CAS',
+   'desc': '__sync_val_compare_and_swap(int64)',
+   'test': '''
 INT64 val = 0;
 __sync_val_compare_and_swap(&val, 0, 37);'''},
 
-    {'name': 'HAVE_GCC__ATOMIC_INT32_CAS',
-     'desc': ' __atomic_compare_exchange_n(int32)',
-     'test': '''
+  {'name': 'HAVE_GCC__ATOMIC_INT32_CAS',
+   'desc': ' __atomic_compare_exchange_n(int32)',
+   'test': '''
 int val = 0;
 int expect = 0;
 __atomic_compare_exchange_n(&val, &expect, 37, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);'''},
 
-    {'name': 'HAVE_GCC__ATOMIC_INT64_CAS',
-     'desc': ' __atomic_compare_exchange_n(int64)',
-     'test': '''
+  {'name': 'HAVE_GCC__ATOMIC_INT64_CAS',
+   'desc': ' __atomic_compare_exchange_n(int64)',
+   'test': '''
 INT64 val = 0;
 INT64 expect = 0;
 __atomic_compare_exchange_n(&val, &expect, 37, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);'''},
-  ]
+]
 
-  foreach check : atomic_checks
-    test = '''
+foreach check : atomic_checks
+  test = '''
 int main(void)
 {
 @0@
 }'''.format(check['test'])
 
-    cdata.set(check['name'],
-      cc.links(test,
-        name: check['desc'],
-        args: test_c_args + ['-DINT64=@0@'.format(cdata.get('PG_INT64_TYPE'))]) ? 1 : false
-    )
-  endforeach
-
-endif
+  cdata.set(check['name'],
+    cc.links(test,
+      name: check['desc'],
+      args: test_c_args + ['-DINT64=@0@'.format(cdata.get('PG_INT64_TYPE'))]) ? 1 : false
+  )
+endforeach
 
 
 ###############################################################
diff --git a/src/backend/port/atomics.c b/src/backend/port/atomics.c
index cd7ede96726..6f1e014d0b8 100644
--- a/src/backend/port/atomics.c
+++ b/src/backend/port/atomics.c
@@ -49,115 +49,6 @@ pg_extern_compiler_barrier(void)
 #endif
 
 
-#ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION
-
-void
-pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
-{
-	StaticAssertDecl(sizeof(ptr->sema) >= sizeof(slock_t),
-					 "size mismatch of atomic_flag vs slock_t");
-
-	SpinLockInit((slock_t *) &ptr->sema);
-
-	ptr->value = false;
-}
-
-bool
-pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
-{
-	uint32		oldval;
-
-	SpinLockAcquire((slock_t *) &ptr->sema);
-	oldval = ptr->value;
-	ptr->value = true;
-	SpinLockRelease((slock_t *) &ptr->sema);
-
-	return oldval == 0;
-}
-
-void
-pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
-{
-	SpinLockAcquire((slock_t *) &ptr->sema);
-	ptr->value = false;
-	SpinLockRelease((slock_t *) &ptr->sema);
-}
-
-bool
-pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
-{
-	return ptr->value == 0;
-}
-
-#endif							/* PG_HAVE_ATOMIC_FLAG_SIMULATION */
-
-#ifdef PG_HAVE_ATOMIC_U32_SIMULATION
-void
-pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_)
-{
-	StaticAssertDecl(sizeof(ptr->sema) >= sizeof(slock_t),
-					 "size mismatch of atomic_uint32 vs slock_t");
-
-	SpinLockInit((slock_t *) &ptr->sema);
-	ptr->value = val_;
-}
-
-void
-pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val)
-{
-	/*
-	 * One might think that an unlocked write doesn't need to acquire the
-	 * spinlock, but one would be wrong. Even an unlocked write has to cause a
-	 * concurrent pg_atomic_compare_exchange_u32() (et al) to fail.
-	 */
-	SpinLockAcquire((slock_t *) &ptr->sema);
-	ptr->value = val;
-	SpinLockRelease((slock_t *) &ptr->sema);
-}
-
-bool
-pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
-									uint32 *expected, uint32 newval)
-{
-	bool		ret;
-
-	/*
-	 * Do atomic op under a spinlock. It might look like we could just skip
-	 * the cmpxchg if the lock isn't available, but that'd just emulate a
-	 * 'weak' compare and swap. I.e. one that allows spurious failures. Since
-	 * several algorithms rely on a strong variant and that is efficiently
-	 * implementable on most major architectures let's emulate it here as
-	 * well.
-	 */
-	SpinLockAcquire((slock_t *) &ptr->sema);
-
-	/* perform compare/exchange logic */
-	ret = ptr->value == *expected;
-	*expected = ptr->value;
-	if (ret)
-		ptr->value = newval;
-
-	/* and release lock */
-	SpinLockRelease((slock_t *) &ptr->sema);
-
-	return ret;
-}
-
-uint32
-pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
-{
-	uint32		oldval;
-
-	SpinLockAcquire((slock_t *) &ptr->sema);
-	oldval = ptr->value;
-	ptr->value += add_;
-	SpinLockRelease((slock_t *) &ptr->sema);
-	return oldval;
-}
-
-#endif							/* PG_HAVE_ATOMIC_U32_SIMULATION */
-
-
 #ifdef PG_HAVE_ATOMIC_U64_SIMULATION
 
 void
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index ed616278b14..bf57690af5b 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -57,9 +57,6 @@
 /* Define to 1 if you have the `ASN1_STRING_get0_data' function. */
 #undef HAVE_ASN1_STRING_GET0_DATA
 
-/* Define to 1 if you want to use atomics if available. */
-#undef HAVE_ATOMICS
-
 /* Define to 1 if you have the <atomic.h> header file. */
 #undef HAVE_ATOMIC_H
 
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index f92a9f62ba4..a6360f021fd 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -17,7 +17,7 @@
  * There exist generic, hardware independent, implementations for several
  * compilers which might be sufficient, although possibly not optimal, for a
  * new platform. If no such generic implementation is available spinlocks will
- * be used to implement the API.
+ * be used to implement the 64-bit parts of the API.
  *
  * Implement _u64 atomics if and only if your platform can use them
  * efficiently (and obviously correctly).
@@ -91,10 +91,7 @@
 #elif defined(__SUNPRO_C) && !defined(__GNUC__)
 #include "port/atomics/generic-sunpro.h"
 #else
-/*
- * Unsupported compiler, we'll likely use slower fallbacks... At least
- * compiler barriers should really be provided.
- */
+#error "unknown compiler, so required atomic type support is missing"
 #endif
 
 /*
diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
index 3efa79dc3df..30e4c8ea697 100644
--- a/src/include/port/atomics/arch-x86.h
+++ b/src/include/port/atomics/arch-x86.h
@@ -49,8 +49,6 @@
  * nice to support older gcc's and the compare/exchange implementation here is
  * actually more efficient than the * __sync variant.
  */
-#if defined(HAVE_ATOMICS)
-
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
 
 #define PG_HAVE_ATOMIC_FLAG_SUPPORT
@@ -80,8 +78,6 @@ typedef struct pg_atomic_uint64
 
 #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
 
-#endif /* defined(HAVE_ATOMICS) */
-
 #if !defined(PG_HAVE_SPIN_DELAY)
 /*
  * This sequence is equivalent to the PAUSE instruction ("rep" is
@@ -132,8 +128,6 @@ pg_spin_delay_impl(void)
 #endif /* !defined(PG_HAVE_SPIN_DELAY) */
 
 
-#if defined(HAVE_ATOMICS)
-
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
 
 #define PG_HAVE_ATOMIC_TEST_SET_FLAG
@@ -248,5 +242,3 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 	defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
 #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
 #endif /* 8 byte single-copy atomicity */
-
-#endif /* HAVE_ATOMICS */
diff --git a/src/include/port/atomics/fallback.h b/src/include/port/atomics/fallback.h
index 34cfee110fb..d169f675b07 100644
--- a/src/include/port/atomics/fallback.h
+++ b/src/include/port/atomics/fallback.h
@@ -1,7 +1,7 @@
 /*-------------------------------------------------------------------------
  *
  * fallback.h
- *    Fallback for platforms without spinlock and/or atomics support. Slower
+ *    Fallback for platforms without 64 bit atomics support. Slower
  *    than native atomics support, but not unusably slow.
  *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
@@ -51,50 +51,6 @@ extern void pg_extern_compiler_barrier(void);
 #endif
 
 
-/*
- * If we have atomics implementation for this platform, fall back to providing
- * the atomics API using a spinlock to protect the internal state. Possibly
- * the spinlock implementation uses semaphores internally...
- *
- * We have to be a bit careful here, as it's not guaranteed that atomic
- * variables are mapped to the same address in every process (e.g. dynamic
- * shared memory segments). We can't just hash the address and use that to map
- * to a spinlock. Instead assign a spinlock on initialization of the atomic
- * variable.
- */
-#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) && !defined(PG_HAVE_ATOMIC_U32_SUPPORT)
-
-#define PG_HAVE_ATOMIC_FLAG_SIMULATION
-#define PG_HAVE_ATOMIC_FLAG_SUPPORT
-
-typedef struct pg_atomic_flag
-{
-	/*
-	 * To avoid circular includes we can't use s_lock as a type here. Instead
-	 * just reserve enough space for all spinlock types. Some platforms would
-	 * be content with just one byte instead of 4, but that's not too much
-	 * waste.
-	 */
-	int			sema;
-	volatile bool value;
-} pg_atomic_flag;
-
-#endif /* PG_HAVE_ATOMIC_FLAG_SUPPORT */
-
-#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT)
-
-#define PG_HAVE_ATOMIC_U32_SIMULATION
-
-#define PG_HAVE_ATOMIC_U32_SUPPORT
-typedef struct pg_atomic_uint32
-{
-	/* Check pg_atomic_flag's definition above for an explanation */
-	int			sema;
-	volatile uint32 value;
-} pg_atomic_uint32;
-
-#endif /* PG_HAVE_ATOMIC_U32_SUPPORT */
-
 #if !defined(PG_HAVE_ATOMIC_U64_SUPPORT)
 
 #define PG_HAVE_ATOMIC_U64_SIMULATION
@@ -102,49 +58,10 @@ typedef struct pg_atomic_uint32
 #define PG_HAVE_ATOMIC_U64_SUPPORT
 typedef struct pg_atomic_uint64
 {
-	/* Check pg_atomic_flag's definition above for an explanation */
 	int			sema;
 	volatile uint64 value;
 } pg_atomic_uint64;
 
-#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
-
-#ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION
-
-#define PG_HAVE_ATOMIC_INIT_FLAG
-extern void pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr);
-
-#define PG_HAVE_ATOMIC_TEST_SET_FLAG
-extern bool pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr);
-
-#define PG_HAVE_ATOMIC_CLEAR_FLAG
-extern void pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr);
-
-#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
-extern bool pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr);
-
-#endif /* PG_HAVE_ATOMIC_FLAG_SIMULATION */
-
-#ifdef PG_HAVE_ATOMIC_U32_SIMULATION
-
-#define PG_HAVE_ATOMIC_INIT_U32
-extern void pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_);
-
-#define PG_HAVE_ATOMIC_WRITE_U32
-extern void pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val);
-
-#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
-extern bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
-												uint32 *expected, uint32 newval);
-
-#define PG_HAVE_ATOMIC_FETCH_ADD_U32
-extern uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_);
-
-#endif /* PG_HAVE_ATOMIC_U32_SIMULATION */
-
-
-#ifdef PG_HAVE_ATOMIC_U64_SIMULATION
-
 #define PG_HAVE_ATOMIC_INIT_U64
 extern void pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_);
 
@@ -155,4 +72,4 @@ extern bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
 #define PG_HAVE_ATOMIC_FETCH_ADD_U64
 extern uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_);
 
-#endif /* PG_HAVE_ATOMIC_U64_SIMULATION */
+#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
diff --git a/src/include/port/atomics/generic-gcc.h b/src/include/port/atomics/generic-gcc.h
index 9d91370fa8c..f911cef65ab 100644
--- a/src/include/port/atomics/generic-gcc.h
+++ b/src/include/port/atomics/generic-gcc.h
@@ -53,8 +53,6 @@
 #endif
 
 
-#ifdef HAVE_ATOMICS
-
 /* generic gcc based atomic flag implementation */
 #if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) \
 	&& (defined(HAVE_GCC__SYNC_INT32_TAS) || defined(HAVE_GCC__SYNC_CHAR_TAS))
@@ -316,5 +314,3 @@ pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_)
 #endif
 
 #endif /* !defined(PG_DISABLE_64_BIT_ATOMICS) */
-
-#endif /* defined(HAVE_ATOMICS) */
diff --git a/src/include/port/atomics/generic-msvc.h b/src/include/port/atomics/generic-msvc.h
index c013aca5e7c..677436f2601 100644
--- a/src/include/port/atomics/generic-msvc.h
+++ b/src/include/port/atomics/generic-msvc.h
@@ -30,8 +30,6 @@
 #define pg_memory_barrier_impl()	MemoryBarrier()
 #endif
 
-#if defined(HAVE_ATOMICS)
-
 #define PG_HAVE_ATOMIC_U32_SUPPORT
 typedef struct pg_atomic_uint32
 {
@@ -115,5 +113,3 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 }
 
 #endif /* _WIN64 */
-
-#endif /* HAVE_ATOMICS */
diff --git a/src/include/port/atomics/generic-sunpro.h b/src/include/port/atomics/generic-sunpro.h
index e060c0868a9..aa81ab0df89 100644
--- a/src/include/port/atomics/generic-sunpro.h
+++ b/src/include/port/atomics/generic-sunpro.h
@@ -17,8 +17,6 @@
  * -------------------------------------------------------------------------
  */
 
-#if defined(HAVE_ATOMICS)
-
 #ifdef HAVE_MBARRIER_H
 #include <mbarrier.h>
 
@@ -66,10 +64,6 @@ typedef struct pg_atomic_uint64
 
 #endif /* HAVE_ATOMIC_H */
 
-#endif /* defined(HAVE_ATOMICS) */
-
-
-#if defined(HAVE_ATOMICS)
 
 #ifdef HAVE_ATOMIC_H
 
@@ -116,5 +110,3 @@ pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 newval)
 }
 
 #endif /* HAVE_ATOMIC_H */
-
-#endif /* defined(HAVE_ATOMICS) */
-- 
2.45.2

v1-0003-Optionally-do-port-atomics.h-with-stdatomic.h.patchtext/x-patch; charset=US-ASCII; name=v1-0003-Optionally-do-port-atomics.h-with-stdatomic.h.patchDownload

From 5260342a16abe91bd7c1b426423a4f2591cbda73 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Tue, 2 Jul 2024 22:31:42 +1200
Subject: [PATCH v1 3/3] Optionally do port/atomics.h with <stdatomic.h>.

Implement port/atomics.h's facilities directly using C11 standard
facilities, if available.  In practice, every modern system has it, but
we don't require C11 yet, so this is done only if a configure time check
finds the header.

XXX Not yet handled: pg_spin_delay().  But why should that be tangled up
with the atomics headers?
XXX Are the barriers or any other operations less efficient than the
hand-crafted stuff?
---
 configure                  |   2 +-
 configure.ac               |   1 +
 meson.build                |   1 +
 src/include/pg_config.h.in |   3 +
 src/include/port/atomics.h | 121 +++++++++++++++++++++++++++++++++++--
 5 files changed, 123 insertions(+), 5 deletions(-)

diff --git a/configure b/configure
index 4c73e46e246..77a18ce1c72 100755
--- a/configure
+++ b/configure
@@ -13257,7 +13257,7 @@ $as_echo "#define HAVE_STDBOOL_H 1" >>confdefs.h
 fi
 
 
-for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h mbarrier.h sys/epoll.h sys/event.h sys/personality.h sys/prctl.h sys/procctl.h sys/signalfd.h sys/ucred.h termios.h ucred.h
+for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h mbarrier.h stdatomic.h sys/epoll.h sys/event.h sys/personality.h sys/prctl.h sys/procctl.h sys/signalfd.h sys/ucred.h termios.h ucred.h
 do :
   as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
 ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
diff --git a/configure.ac b/configure.ac
index 8ee3d2cabb9..06ccb16085c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1440,6 +1440,7 @@ AC_CHECK_HEADERS(m4_normalize([
 	ifaddrs.h
 	langinfo.h
 	mbarrier.h
+	stdatomic.h
 	sys/epoll.h
 	sys/event.h
 	sys/personality.h
diff --git a/meson.build b/meson.build
index a382119528c..ce1062729da 100644
--- a/meson.build
+++ b/meson.build
@@ -2280,6 +2280,7 @@ header_checks = [
   'ifaddrs.h',
   'langinfo.h',
   'mbarrier.h',
+  'stdatomic.h',
   'stdbool.h',
   'strings.h',
   'sys/epoll.h',
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index bf57690af5b..150eefc666d 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -378,6 +378,9 @@
 /* Define to 1 if you have the `SSL_CTX_set_cert_cb' function. */
 #undef HAVE_SSL_CTX_SET_CERT_CB
 
+/* Define to 1 if you have the <stdatomic.h> header file. */
+#undef HAVE_STDATOMIC_H
+
 /* Define to 1 if stdbool.h conforms to C99. */
 #undef HAVE_STDBOOL_H
 
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index a6360f021fd..533c10a2106 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -3,9 +3,13 @@
  * atomics.h
  *	  Atomic operations.
  *
- * Hardware and compiler dependent functions for manipulating memory
- * atomically and dealing with cache coherency. Used to implement locking
- * facilities and lockless algorithms/data structures.
+ * If C11 <stdatomic.h> is available, this header just maps pg_XXX names onto
+ * the standard interfaces. Otherwise, for strict C99 environments, hardware-
+ * and compiler-dependent implementation functions are provided.
+ *
+ * These interfaces are for manipulating memory atomically and dealing with
+ * cache coherency. They can be used to implement locking facilities and
+ * lockless algorithms/data structures.
  *
  * To bring up postgres on a platform/compiler at the very least
  * implementations for the following operations should be provided:
@@ -46,6 +50,113 @@
 
 #include <limits.h>
 
+#ifdef HAVE_STDATOMIC_H
+
+/* Map pg_ atomic interfaces directly to standard C11 interfaces. */
+
+#include <stdatomic.h>
+
+/* Prevent compiler re-ordering and control memory ordering. */
+#define pg_memory_barrier_impl() atomic_thread_fence(memory_order_seq_cst)
+#define pg_read_barrier_impl()  atomic_thread_fence(memory_order_acquire)
+#define pg_write_barrier_impl() atomic_thread_fence(memory_order_release)
+
+/* Prevent compiler re-ordering, but don't generate any code. */
+#define pg_compiler_barrier_impl() atomic_signal_fence(memory_order_seq_cst)
+
+/*
+ * We don't map pg_atomic_flag to standard atomic_flag, because that can't
+ * implement pg_atomic_unlocked_test_flag()'s relaxed load.  So we'll just let
+ * generic.h provide an implementation on top of pg_atomic_uint32.
+ */
+
+/*
+ * For pg_atomic_uint32, we require a real lock-free uint32, not one that is
+ * emulated with locks by the compiler or runtime library.
+ */
+#if ATOMIC_INT_LOCK_FREE < 2
+#error atomic_uint is not always lock-free
+#endif
+typedef atomic_uint pg_atomic_uint32;
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+#define pg_atomic_init_u32_impl(x, v) atomic_init((x), (v))
+#define PG_HAVE_ATOMIC_INIT_U32
+#define pg_atomic_read_u32_impl(x) *(x)
+#define PG_HAVE_ATOMIC_READ_U32
+#define pg_atomic_write_u32_impl(x, v) *(x) = (v)
+#define PG_HAVE_ATOMIC_WRITE_U32
+#define pg_atomic_unlocked_write_u32_impl(x, v) *(x) = (v)
+#define PG_HAVE_ATOMIC_UNLOCKED_WRITE_U32
+#define pg_atomic_exchange_u32_impl atomic_exchange
+#define PG_HAVE_ATOMIC_EXCHANGE_U32
+#define pg_atomic_compare_exchange_u32_impl atomic_compare_exchange_strong
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+#define pg_atomic_fetch_add_u32_impl atomic_fetch_add
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+#define pg_atomic_fetch_sub_u32_impl atomic_fetch_sub
+#define PG_HAVE_ATOMIC_FETCH_SUB_U32
+#define pg_atomic_fetch_or_u32_impl atomic_fetch_or
+#define PG_HAVE_ATOMIC_FETCH_OR_U32
+#define pg_atomic_fetch_and_u32_impl atomic_fetch_and
+#define PG_HAVE_ATOMIC_FETCH_AND_U32
+#define pg_atomic_fetch_xor_u32_impl atomic_fetch_xor
+#define PG_HAVE_ATOMIC_FETCH_XOR_U32
+
+/*
+ * Does this system also have a 64 bit atomic type that is lock-free?  All
+ * modern systems should, but if not, we'll supply our own lock-based
+ * emulation in fallback.h instead of relying on libc's lock-based emulation.
+ * That reduces the number of possible combinations of behavior on rare
+ * systems.
+ */
+#if defined(DEBUG_NO_ATOMIC_64)
+/* developer-only macro used to force fallback code to be used */
+#elif SIZEOF_LONG == 8 && ATOMIC_LONG_LOCK_FREE > 1
+typedef atomic_ulong pg_atomic_uint64;
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+#elif ATOMIC_LONG_LONG_LOCK_FREE > 1
+typedef atomic_ulonglong pg_atomic_uint64;
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+#endif
+
+#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
+#define pg_atomic_init_u64_impl(x, v) atomic_init((x), (v))
+#define PG_HAVE_ATOMIC_INIT_U64
+#define pg_atomic_read_u64_impl(x) *(x)
+#define PG_HAVE_ATOMIC_READ_U64
+#define pg_atomic_write_u64_impl(x, v) *(x) = (v)
+#define PG_HAVE_ATOMIC_WRITE_U64
+#define pg_atomic_unlocked_write_u64_impl(x, v) *(x) = (v)
+#define PG_HAVE_ATOMIC_UNLOCKED_WRITE_U64
+#define pg_atomic_exchange_u64_impl atomic_exchange
+#define PG_HAVE_ATOMIC_EXCHANGE_U64
+#define pg_atomic_compare_exchange_u64_impl atomic_compare_exchange_strong
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+#define pg_atomic_fetch_add_u64_impl atomic_fetch_add
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+#define pg_atomic_fetch_sub_u64_impl atomic_fetch_sub
+#define PG_HAVE_ATOMIC_FETCH_SUB_U64
+#define pg_atomic_fetch_or_u64_impl atomic_fetch_or
+#define PG_HAVE_ATOMIC_FETCH_OR_U64
+#define pg_atomic_fetch_and_u64_impl atomic_fetch_and
+#define PG_HAVE_ATOMIC_FETCH_AND_U64
+#define pg_atomic_fetch_xor_u64_impl atomic_fetch_xor
+#define PG_HAVE_ATOMIC_FETCH_XOR_U64
+#endif
+
+/*
+ * XXX TODO: we need to get the pg_spin_delay_impl from arch-specific files,
+ * but we don't want anything else from them.  But really, why is that tangled
+ * up with atomics?
+ */
+
+#else
+
+/*
+ * This system doesn't have <stdatomic.h> yet, so we'll use hand-rolled
+ * implementations using compiler- and architecture-specific knowledge.
+ */
+
 /*
  * First a set of architecture specific files is included.
  *
@@ -91,9 +202,11 @@
 #elif defined(__SUNPRO_C) && !defined(__GNUC__)
 #include "port/atomics/generic-sunpro.h"
 #else
-#error "unknown compiler, so required atomic type support is missing"
+#error "no <stdatomic.h> and unknown compiler, so required atomic type support is missing"
 #endif
 
+#endif /* !HAVE_STDATOMIC_H */
+
 /*
  * Provide a full fallback of the pg_*_barrier(), pg_atomic**_flag and
  * pg_atomic_* APIs for platforms without sufficient spinlock and/or atomics
-- 
2.45.2

#22

Tom Lane

tgl@sss.pgh.pa.us

over 1 year ago

In reply to: Thomas Munro (#21)

Re: Remove last traces of HPPA support

Thomas Munro <thomas.munro@gmail.com> writes:

Here are some experimental patches to try out some ideas mentioned
upthread, that are approximately unlocked by that cleanup.

FWIW, I'm good with getting rid of --disable-spinlocks and
--disable-atomics. That's a fair amount of code and needing to
support it causes problems, as you say. I am very much less
excited about ripping out our spinlock and/or atomics code in favor
of <stdatomic.h>; I just don't see the gain there, and I do see risk
in ceding control of the semantics and performance of those
primitives.

regards, tom lane

#23

Thomas Munro

thomas.munro@gmail.com

over 1 year ago

In reply to: Tom Lane (#22)

4 attachment(s)

Re: Remove last traces of HPPA support

On Wed, Jul 3, 2024 at 8:09 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Thomas Munro <thomas.munro@gmail.com> writes:

Here are some experimental patches to try out some ideas mentioned
upthread, that are approximately unlocked by that cleanup.

FWIW, I'm good with getting rid of --disable-spinlocks and
--disable-atomics. That's a fair amount of code and needing to
support it causes problems, as you say. I am very much less
excited about ripping out our spinlock and/or atomics code in favor
of <stdatomic.h>; I just don't see the gain there, and I do see risk
in ceding control of the semantics and performance of those
primitives.

OK, <stdatomic.h> part on ice for now. Here's an update of the rest,
this time also removing the barrier fallbacks as discussed in the LTO
thread[1]/messages/by-id/721bf39a-ed8a-44b0-8b8e-be3bd81db748@technowledgy.de.

I guess we should also consider reimplementing the spinlock on the
atomic API, but I can see that Andres is poking at spinlock code right
now so I'll keep out of his way...

Side issue: I noticed via CI failure when I tried to require
read/write barriers to be provided (a choice I backed out of), that on
MSVC we seem to be using the full memory barrier fallback for those.
Huh? For x86, I think they should be using pg_compiler_barrier() (no
code gen, just prevent reordering), not pg_pg_memory_barrier(), no?
Perhaps I'm missing something but I suspect we might be failing to
include arch-x86.h on that compiler when we should... maybe it needs
to detect _M_AMD64 too? For ARM, from a quick look, the only way to
reach real acquire/release barriers seems to be to use the C11
interface (which would also be fine on x86 where it should degrade to
a no-op compiler barrier or signal fence as the standard calls it),
but IIRC the Windows/ARM basics haven't gone in yet anyway.

[1]: /messages/by-id/721bf39a-ed8a-44b0-8b8e-be3bd81db748@technowledgy.de

Attachments:

v2-0001-Remove-disable-spinlocks.patchtext/x-patch; charset=US-ASCII; name=v2-0001-Remove-disable-spinlocks.patchDownload

From 33533817949052f7af423aaee0ef6e737031effb Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Tue, 2 Jul 2024 14:47:59 +1200
Subject: [PATCH v2 1/4] Remove --disable-spinlocks.

A later change will require atomic support, so it wouldn't make sense
for a new system not to be able to implement true spinlocks.

Discussion: https://postgr.es/m/3351991.1697728588%40sss.pgh.pa.us
---
 configure                               |  40 ------
 configure.ac                            |  13 --
 doc/src/sgml/installation.sgml          |  37 +----
 meson.build                             |   6 -
 src/backend/port/atomics.c              |  26 ----
 src/backend/port/posix_sema.c           |   3 +-
 src/backend/port/sysv_sema.c            |   3 +-
 src/backend/postmaster/launch_backend.c |   8 --
 src/backend/storage/ipc/ipci.c          |  10 --
 src/backend/storage/lmgr/Makefile       |   1 -
 src/backend/storage/lmgr/meson.build    |   1 -
 src/backend/storage/lmgr/s_lock.c       |   2 +-
 src/backend/storage/lmgr/spin.c         | 180 ------------------------
 src/include/pg_config.h.in              |   3 -
 src/include/pg_config_manual.h          |  15 --
 src/include/port/atomics.h              |   4 +-
 src/include/port/atomics/fallback.h     |   4 +-
 src/include/storage/s_lock.h            |  39 +----
 src/include/storage/spin.h              |  18 +--
 src/test/regress/regress.c              |  86 -----------
 20 files changed, 13 insertions(+), 486 deletions(-)
 delete mode 100644 src/backend/storage/lmgr/spin.c

diff --git a/configure b/configure
index ea5514fab1..f8deaa8d78 100755
--- a/configure
+++ b/configure
@@ -836,7 +836,6 @@ enable_integer_datetimes
 enable_nls
 with_pgport
 enable_rpath
-enable_spinlocks
 enable_atomics
 enable_debug
 enable_profiling
@@ -1529,7 +1528,6 @@ Optional Features:
                           enable Native Language Support
   --disable-rpath         do not embed shared library search path in
                           executables
-  --disable-spinlocks     do not use spinlocks
   --disable-atomics       do not use atomic operations
   --enable-debug          build with debugging symbols (-g)
   --enable-profiling      build with profiling enabled
@@ -3266,33 +3264,6 @@ fi
 
 
 
-#
-# Spinlocks
-#
-
-
-# Check whether --enable-spinlocks was given.
-if test "${enable_spinlocks+set}" = set; then :
-  enableval=$enable_spinlocks;
-  case $enableval in
-    yes)
-      :
-      ;;
-    no)
-      :
-      ;;
-    *)
-      as_fn_error $? "no argument expected for --enable-spinlocks option" "$LINENO" 5
-      ;;
-  esac
-
-else
-  enable_spinlocks=yes
-
-fi
-
-
-
 #
 # Atomic operations
 #
@@ -12185,17 +12156,6 @@ fi
 
 fi
 
-if test "$enable_spinlocks" = yes; then
-
-$as_echo "#define HAVE_SPINLOCKS 1" >>confdefs.h
-
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING:
-*** Not using spinlocks will cause poor performance." >&5
-$as_echo "$as_me: WARNING:
-*** Not using spinlocks will cause poor performance." >&2;}
-fi
-
 if test "$enable_atomics" = yes; then
 
 $as_echo "#define HAVE_ATOMICS 1" >>confdefs.h
diff --git a/configure.ac b/configure.ac
index 0089e78b68..a72169f574 100644
--- a/configure.ac
+++ b/configure.ac
@@ -186,12 +186,6 @@ PGAC_ARG_BOOL(enable, rpath, yes,
               [do not embed shared library search path in executables])
 AC_SUBST(enable_rpath)
 
-#
-# Spinlocks
-#
-PGAC_ARG_BOOL(enable, spinlocks, yes,
-              [do not use spinlocks])
-
 #
 # Atomic operations
 #
@@ -1296,13 +1290,6 @@ failure.  It is possible the compiler isn't looking in the proper directory.
 Use --without-zlib to disable zlib support.])])
 fi
 
-if test "$enable_spinlocks" = yes; then
-  AC_DEFINE(HAVE_SPINLOCKS, 1, [Define to 1 if you have spinlocks.])
-else
-  AC_MSG_WARN([
-*** Not using spinlocks will cause poor performance.])
-fi
-
 if test "$enable_atomics" = yes; then
   AC_DEFINE(HAVE_ATOMICS, 1, [Define to 1 if you want to use atomics if available.])
 else
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index 4784834ab9..3f19f272b1 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -1258,22 +1258,6 @@ build-postgresql:
        </listitem>
       </varlistentry>
 
-      <varlistentry id="configure-option-disable-spinlocks">
-       <term><option>--disable-spinlocks</option></term>
-       <listitem>
-        <para>
-         Allow the build to succeed even if <productname>PostgreSQL</productname>
-         has no CPU spinlock support for the platform.  The lack of
-         spinlock support will result in very poor performance; therefore,
-         this option should only be used if the build aborts and
-         informs you that the platform lacks spinlock support. If this
-         option is required to build <productname>PostgreSQL</productname> on
-         your platform, please report the problem to the
-         <productname>PostgreSQL</productname> developers.
-        </para>
-       </listitem>
-      </varlistentry>
-
       <varlistentry id="configure-option-disable-atomics">
        <term><option>--disable-atomics</option></term>
        <listitem>
@@ -2690,23 +2674,6 @@ ninja install
       </listitem>
      </varlistentry>
 
-     <varlistentry id="configure-spinlocks-meson">
-      <term><option>-Dspinlocks={ true | false }</option></term>
-      <listitem>
-       <para>
-        This option is set to true by default; setting it to false will
-        allow the build to succeed even if <productname>PostgreSQL</productname>
-        has no CPU spinlock support for the platform.  The lack of
-        spinlock support will result in very poor performance; therefore,
-        this option should only be changed if the build aborts and
-        informs you that the platform lacks spinlock support. If setting this
-        option to false is required to build <productname>PostgreSQL</productname> on
-        your platform, please report the problem to the
-        <productname>PostgreSQL</productname> developers.
-       </para>
-      </listitem>
-     </varlistentry>
-
      <varlistentry id="configure-atomics-meson">
       <term><option>-Datomics={ true | false }</option></term>
       <listitem>
@@ -2719,6 +2686,7 @@ ninja install
        </para>
       </listitem>
      </varlistentry>
+
     </variablelist>
    </sect3>
 
@@ -3393,9 +3361,6 @@ export MANPATH
    these CPU architectures: x86, PowerPC, S/390, SPARC, ARM, MIPS,
    and RISC-V, including
    big-endian, little-endian, 32-bit, and 64-bit variants where applicable.
-   It is often
-   possible to build on an unsupported CPU type by configuring with
-   <option>--disable-spinlocks</option>, but performance will be poor.
   </para>
 
   <para>
diff --git a/meson.build b/meson.build
index 27805b9bcc..6a0d538365 100644
--- a/meson.build
+++ b/meson.build
@@ -2089,12 +2089,6 @@ endif
 # Atomics
 ###############################################################
 
-if not get_option('spinlocks')
-  warning('Not using spinlocks will cause poor performance')
-else
-  cdata.set('HAVE_SPINLOCKS', 1)
-endif
-
 if not get_option('atomics')
   warning('Not using atomics will cause poor performance')
 else
diff --git a/src/backend/port/atomics.c b/src/backend/port/atomics.c
index 93789b4e05..cd7ede9672 100644
--- a/src/backend/port/atomics.c
+++ b/src/backend/port/atomics.c
@@ -57,17 +57,7 @@ pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
 	StaticAssertDecl(sizeof(ptr->sema) >= sizeof(slock_t),
 					 "size mismatch of atomic_flag vs slock_t");
 
-#ifndef HAVE_SPINLOCKS
-
-	/*
-	 * NB: If we're using semaphore based TAS emulation, be careful to use a
-	 * separate set of semaphores. Otherwise we'd get in trouble if an atomic
-	 * var would be manipulated while spinlock is held.
-	 */
-	s_init_lock_sema((slock_t *) &ptr->sema, true);
-#else
 	SpinLockInit((slock_t *) &ptr->sema);
-#endif
 
 	ptr->value = false;
 }
@@ -108,15 +98,7 @@ pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_)
 	StaticAssertDecl(sizeof(ptr->sema) >= sizeof(slock_t),
 					 "size mismatch of atomic_uint32 vs slock_t");
 
-	/*
-	 * If we're using semaphore based atomic flags, be careful about nested
-	 * usage of atomics while a spinlock is held.
-	 */
-#ifndef HAVE_SPINLOCKS
-	s_init_lock_sema((slock_t *) &ptr->sema, true);
-#else
 	SpinLockInit((slock_t *) &ptr->sema);
-#endif
 	ptr->value = val_;
 }
 
@@ -184,15 +166,7 @@ pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_)
 	StaticAssertDecl(sizeof(ptr->sema) >= sizeof(slock_t),
 					 "size mismatch of atomic_uint64 vs slock_t");
 
-	/*
-	 * If we're using semaphore based atomic flags, be careful about nested
-	 * usage of atomics while a spinlock is held.
-	 */
-#ifndef HAVE_SPINLOCKS
-	s_init_lock_sema((slock_t *) &ptr->sema, true);
-#else
 	SpinLockInit((slock_t *) &ptr->sema);
-#endif
 	ptr->value = val_;
 }
 
diff --git a/src/backend/port/posix_sema.c b/src/backend/port/posix_sema.c
index 5886d2233f..64186ec0a7 100644
--- a/src/backend/port/posix_sema.c
+++ b/src/backend/port/posix_sema.c
@@ -217,8 +217,7 @@ PGReserveSemaphores(int maxSemas)
 
 	/*
 	 * We must use ShmemAllocUnlocked(), since the spinlock protecting
-	 * ShmemAlloc() won't be ready yet.  (This ordering is necessary when we
-	 * are emulating spinlocks with semaphores.)
+	 * ShmemAlloc() won't be ready yet.
 	 */
 	sharedSemas = (PGSemaphore)
 		ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas));
diff --git a/src/backend/port/sysv_sema.c b/src/backend/port/sysv_sema.c
index 1454f96b5f..5b88a92bc9 100644
--- a/src/backend/port/sysv_sema.c
+++ b/src/backend/port/sysv_sema.c
@@ -325,8 +325,7 @@ PGReserveSemaphores(int maxSemas)
 
 	/*
 	 * We must use ShmemAllocUnlocked(), since the spinlock protecting
-	 * ShmemAlloc() won't be ready yet.  (This ordering is necessary when we
-	 * are emulating spinlocks with semaphores.)
+	 * ShmemAlloc() won't be ready yet.
 	 */
 	sharedSemas = (PGSemaphore)
 		ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas));
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index 8d4589846a..69ffbd169f 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -107,9 +107,7 @@ typedef struct
 #ifdef USE_INJECTION_POINTS
 	struct InjectionPointsCtl *ActiveInjectionPoints;
 #endif
-#ifndef HAVE_SPINLOCKS
 	PGSemaphore *SpinlockSemaArray;
-#endif
 	int			NamedLWLockTrancheRequests;
 	NamedLWLockTranche *NamedLWLockTrancheArray;
 	LWLockPadded *MainLWLockArray;
@@ -716,9 +714,6 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock,
 	param->ActiveInjectionPoints = ActiveInjectionPoints;
 #endif
 
-#ifndef HAVE_SPINLOCKS
-	param->SpinlockSemaArray = SpinlockSemaArray;
-#endif
 	param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests;
 	param->NamedLWLockTrancheArray = NamedLWLockTrancheArray;
 	param->MainLWLockArray = MainLWLockArray;
@@ -978,9 +973,6 @@ restore_backend_variables(BackendParameters *param)
 	ActiveInjectionPoints = param->ActiveInjectionPoints;
 #endif
 
-#ifndef HAVE_SPINLOCKS
-	SpinlockSemaArray = param->SpinlockSemaArray;
-#endif
 	NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests;
 	NamedLWLockTrancheArray = param->NamedLWLockTrancheArray;
 	MainLWLockArray = param->MainLWLockArray;
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index b6c3b16950..34e4d17b67 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -94,7 +94,6 @@ CalculateShmemSize(int *num_semaphores)
 
 	/* Compute number of semaphores we'll need */
 	numSemas = ProcGlobalSemas();
-	numSemas += SpinlockSemas();
 
 	/* Return the number of semaphores if requested by the caller */
 	if (num_semaphores)
@@ -111,7 +110,6 @@ CalculateShmemSize(int *num_semaphores)
 	 */
 	size = 100000;
 	size = add_size(size, PGSemaphoreShmemSize(numSemas));
-	size = add_size(size, SpinlockSemaSize());
 	size = add_size(size, hash_estimate_size(SHMEM_INDEX_SIZE,
 											 sizeof(ShmemIndexEnt)));
 	size = add_size(size, dsm_estimate_size());
@@ -225,14 +223,6 @@ CreateSharedMemoryAndSemaphores(void)
 	 */
 	PGReserveSemaphores(numSemas);
 
-	/*
-	 * If spinlocks are disabled, initialize emulation layer (which depends on
-	 * semaphores, so the order is important here).
-	 */
-#ifndef HAVE_SPINLOCKS
-	SpinlockSemaInit();
-#endif
-
 	/*
 	 * Set up shared memory allocation mechanism
 	 */
diff --git a/src/backend/storage/lmgr/Makefile b/src/backend/storage/lmgr/Makefile
index 3f89548bde..6cbaf23b85 100644
--- a/src/backend/storage/lmgr/Makefile
+++ b/src/backend/storage/lmgr/Makefile
@@ -21,7 +21,6 @@ OBJS = \
 	predicate.o \
 	proc.o \
 	s_lock.o \
-	spin.o
 
 include $(top_srcdir)/src/backend/common.mk
 
diff --git a/src/backend/storage/lmgr/meson.build b/src/backend/storage/lmgr/meson.build
index 05ac41e809..d43511925e 100644
--- a/src/backend/storage/lmgr/meson.build
+++ b/src/backend/storage/lmgr/meson.build
@@ -9,5 +9,4 @@ backend_sources += files(
   'predicate.c',
   'proc.c',
   's_lock.c',
-  'spin.c',
 )
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index cba48b3e77..69549a65db 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -1,7 +1,7 @@
 /*-------------------------------------------------------------------------
  *
  * s_lock.c
- *	   Hardware-dependent implementation of spinlocks.
+ *	   Implementation of spinlocks.
  *
  * When waiting for a contended spinlock we loop tightly for awhile, then
  * delay using pg_usleep() and try again.  Preferably, "awhile" should be a
diff --git a/src/backend/storage/lmgr/spin.c b/src/backend/storage/lmgr/spin.c
deleted file mode 100644
index 50cb99cd3b..0000000000
--- a/src/backend/storage/lmgr/spin.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * spin.c
- *	   Hardware-independent implementation of spinlocks.
- *
- *
- * For machines that have test-and-set (TAS) instructions, s_lock.h/.c
- * define the spinlock implementation.  This file contains only a stub
- * implementation for spinlocks using PGSemaphores.  Unless semaphores
- * are implemented in a way that doesn't involve a kernel call, this
- * is too slow to be very useful :-(
- *
- *
- * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *
- * IDENTIFICATION
- *	  src/backend/storage/lmgr/spin.c
- *
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-
-#include "storage/pg_sema.h"
-#include "storage/shmem.h"
-#include "storage/spin.h"
-
-
-#ifndef HAVE_SPINLOCKS
-
-/*
- * No TAS, so spinlocks are implemented as PGSemaphores.
- */
-
-#ifndef HAVE_ATOMICS
-#define NUM_EMULATION_SEMAPHORES (NUM_SPINLOCK_SEMAPHORES + NUM_ATOMICS_SEMAPHORES)
-#else
-#define NUM_EMULATION_SEMAPHORES (NUM_SPINLOCK_SEMAPHORES)
-#endif							/* HAVE_ATOMICS */
-
-PGSemaphore *SpinlockSemaArray;
-
-#else							/* !HAVE_SPINLOCKS */
-
-#define NUM_EMULATION_SEMAPHORES 0
-
-#endif							/* HAVE_SPINLOCKS */
-
-/*
- * Report the amount of shared memory needed to store semaphores for spinlock
- * support.
- */
-Size
-SpinlockSemaSize(void)
-{
-	return NUM_EMULATION_SEMAPHORES * sizeof(PGSemaphore);
-}
-
-/*
- * Report number of semaphores needed to support spinlocks.
- */
-int
-SpinlockSemas(void)
-{
-	return NUM_EMULATION_SEMAPHORES;
-}
-
-#ifndef HAVE_SPINLOCKS
-
-/*
- * Initialize spinlock emulation.
- *
- * This must be called after PGReserveSemaphores().
- */
-void
-SpinlockSemaInit(void)
-{
-	PGSemaphore *spinsemas;
-	int			nsemas = SpinlockSemas();
-	int			i;
-
-	/*
-	 * We must use ShmemAllocUnlocked(), since the spinlock protecting
-	 * ShmemAlloc() obviously can't be ready yet.
-	 */
-	spinsemas = (PGSemaphore *) ShmemAllocUnlocked(SpinlockSemaSize());
-	for (i = 0; i < nsemas; ++i)
-		spinsemas[i] = PGSemaphoreCreate();
-	SpinlockSemaArray = spinsemas;
-}
-
-/*
- * s_lock.h hardware-spinlock emulation using semaphores
- *
- * We map all spinlocks onto NUM_EMULATION_SEMAPHORES semaphores.  It's okay to
- * map multiple spinlocks onto one semaphore because no process should ever
- * hold more than one at a time.  We just need enough semaphores so that we
- * aren't adding too much extra contention from that.
- *
- * There is one exception to the restriction of only holding one spinlock at a
- * time, which is that it's ok if emulated atomic operations are nested inside
- * spinlocks. To avoid the danger of spinlocks and atomic using the same sema,
- * we make sure "normal" spinlocks and atomics backed by spinlocks use
- * distinct semaphores (see the nested argument to s_init_lock_sema).
- *
- * slock_t is just an int for this implementation; it holds the spinlock
- * number from 1..NUM_EMULATION_SEMAPHORES.  We intentionally ensure that 0
- * is not a valid value, so that testing with this code can help find
- * failures to initialize spinlocks.
- */
-
-static inline void
-s_check_valid(int lockndx)
-{
-	if (unlikely(lockndx <= 0 || lockndx > NUM_EMULATION_SEMAPHORES))
-		elog(ERROR, "invalid spinlock number: %d", lockndx);
-}
-
-void
-s_init_lock_sema(volatile slock_t *lock, bool nested)
-{
-	static uint32 counter = 0;
-	uint32		offset;
-	uint32		sema_total;
-	uint32		idx;
-
-	if (nested)
-	{
-		/*
-		 * To allow nesting atomics inside spinlocked sections, use a
-		 * different spinlock. See comment above.
-		 */
-		offset = 1 + NUM_SPINLOCK_SEMAPHORES;
-		sema_total = NUM_ATOMICS_SEMAPHORES;
-	}
-	else
-	{
-		offset = 1;
-		sema_total = NUM_SPINLOCK_SEMAPHORES;
-	}
-
-	idx = (counter++ % sema_total) + offset;
-
-	/* double check we did things correctly */
-	s_check_valid(idx);
-
-	*lock = idx;
-}
-
-void
-s_unlock_sema(volatile slock_t *lock)
-{
-	int			lockndx = *lock;
-
-	s_check_valid(lockndx);
-
-	PGSemaphoreUnlock(SpinlockSemaArray[lockndx - 1]);
-}
-
-bool
-s_lock_free_sema(volatile slock_t *lock)
-{
-	/* We don't currently use S_LOCK_FREE anyway */
-	elog(ERROR, "spin.c does not support S_LOCK_FREE()");
-	return false;
-}
-
-int
-tas_sema(volatile slock_t *lock)
-{
-	int			lockndx = *lock;
-
-	s_check_valid(lockndx);
-
-	/* Note that TAS macros return 0 if *success* */
-	return !PGSemaphoreTryLock(SpinlockSemaArray[lockndx - 1]);
-}
-
-#endif							/* !HAVE_SPINLOCKS */
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 3dea3856aa..e6c06f6102 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -382,9 +382,6 @@
 /* Define to 1 if the system has the type `socklen_t'. */
 #undef HAVE_SOCKLEN_T
 
-/* Define to 1 if you have spinlocks. */
-#undef HAVE_SPINLOCKS
-
 /* Define to 1 if you have the `SSL_CTX_set_cert_cb' function. */
 #undef HAVE_SSL_CTX_SET_CERT_CB
 
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index f941ee2faf..11f74f4b56 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -86,21 +86,6 @@
 #define USE_FLOAT8_BYVAL 1
 #endif
 
-/*
- * When we don't have native spinlocks, we use semaphores to simulate them.
- * Decreasing this value reduces consumption of OS resources; increasing it
- * may improve performance, but supplying a real spinlock implementation is
- * probably far better.
- */
-#define NUM_SPINLOCK_SEMAPHORES		128
-
-/*
- * When we have neither spinlocks nor atomic operations support we're
- * implementing atomic operations on top of spinlock on top of semaphores. To
- * be safe against atomic operations while holding a spinlock separate
- * semaphores have to be used.
- */
-#define NUM_ATOMICS_SEMAPHORES		64
 
 /*
  * MAXPGPATH: standard size of a pathname buffer in PostgreSQL (hence,
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index f6fa432d2d..03134e3b7b 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -16,8 +16,8 @@
  *
  * There exist generic, hardware independent, implementations for several
  * compilers which might be sufficient, although possibly not optimal, for a
- * new platform. If no such generic implementation is available spinlocks (or
- * even OS provided semaphores) will be used to implement the API.
+ * new platform. If no such generic implementation is available spinlocks will
+ * be used to implement the API.
  *
  * Implement _u64 atomics if and only if your platform can use them
  * efficiently (and obviously correctly).
diff --git a/src/include/port/atomics/fallback.h b/src/include/port/atomics/fallback.h
index 34cfee110f..2e3eef4aca 100644
--- a/src/include/port/atomics/fallback.h
+++ b/src/include/port/atomics/fallback.h
@@ -20,9 +20,7 @@
 #ifndef pg_memory_barrier_impl
 /*
  * If we have no memory barrier implementation for this architecture, we
- * fall back to acquiring and releasing a spinlock.  This might, in turn,
- * fall back to the semaphore-based spinlock implementation, which will be
- * amazingly slow.
+ * fall back to acquiring and releasing a spinlock.
  *
  * It's not self-evident that every possible legal implementation of a
  * spinlock acquire-and-release would be equivalent to a full memory barrier.
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 02c68513a5..e94ed5f48b 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -1,10 +1,10 @@
 /*-------------------------------------------------------------------------
  *
  * s_lock.h
- *	   Hardware-dependent implementation of spinlocks.
+ *	   Implementation of spinlocks.
  *
  *	NOTE: none of the macros in this file are intended to be called directly.
- *	Call them through the hardware-independent macros in spin.h.
+ *	Call them through the macros in spin.h.
  *
  *	The following hardware-dependent macros must be provided for each
  *	supported platform:
@@ -78,13 +78,6 @@
  *	in assembly language to execute a hardware atomic-test-and-set
  *	instruction.  Equivalent OS-supplied mutex routines could be used too.
  *
- *	If no system-specific TAS() is available (ie, HAVE_SPINLOCKS is not
- *	defined), then we fall back on an emulation that uses SysV semaphores
- *	(see spin.c).  This emulation will be MUCH MUCH slower than a proper TAS()
- *	implementation, because of the cost of a kernel call per lock or unlock.
- *	An old report is that Postgres spends around 40% of its time in semop(2)
- *	when using the SysV semaphore code.
- *
  *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -100,8 +93,6 @@
 #error "s_lock.h may not be included from frontend code"
 #endif
 
-#ifdef HAVE_SPINLOCKS	/* skip spinlocks if requested */
-
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
 /*************************************************************************
  * All the gcc inlines
@@ -655,34 +646,10 @@ spin_delay(void)
 
 /* Blow up if we didn't have any way to do spinlocks */
 #ifndef HAS_TEST_AND_SET
-#error PostgreSQL does not have native spinlock support on this platform.  To continue the compilation, rerun configure using --disable-spinlocks.  However, performance will be poor.  Please report this to pgsql-bugs@lists.postgresql.org.
+#error PostgreSQL does not have spinlock support on this platform.  Please report this to pgsql-bugs@lists.postgresql.org.
 #endif
 
 
-#else	/* !HAVE_SPINLOCKS */
-
-
-/*
- * Fake spinlock implementation using semaphores --- slow and prone
- * to fall foul of kernel limits on number of semaphores, so don't use this
- * unless you must!  The subroutines appear in spin.c.
- */
-typedef int slock_t;
-
-extern bool s_lock_free_sema(volatile slock_t *lock);
-extern void s_unlock_sema(volatile slock_t *lock);
-extern void s_init_lock_sema(volatile slock_t *lock, bool nested);
-extern int	tas_sema(volatile slock_t *lock);
-
-#define S_LOCK_FREE(lock)	s_lock_free_sema(lock)
-#define S_UNLOCK(lock)	 s_unlock_sema(lock)
-#define S_INIT_LOCK(lock)	s_init_lock_sema(lock, false)
-#define TAS(lock)	tas_sema(lock)
-
-
-#endif	/* HAVE_SPINLOCKS */
-
-
 /*
  * Default Definitions - override these above as needed.
  */
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index c0679c5999..3ae2a56d07 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -1,11 +1,11 @@
 /*-------------------------------------------------------------------------
  *
  * spin.h
- *	   Hardware-independent implementation of spinlocks.
+ *	   API for spinlocks.
  *
  *
- *	The hardware-independent interface to spinlocks is defined by the
- *	typedef "slock_t" and these macros:
+ *	The interface to spinlocks is defined by the typedef "slock_t" and
+ *	these macros:
  *
  *	void SpinLockInit(volatile slock_t *lock)
  *		Initialize a spinlock (to the unlocked state).
@@ -52,9 +52,6 @@
 #define SPIN_H
 
 #include "storage/s_lock.h"
-#ifndef HAVE_SPINLOCKS
-#include "storage/pg_sema.h"
-#endif
 
 
 #define SpinLockInit(lock)	S_INIT_LOCK(lock)
@@ -65,13 +62,4 @@
 
 #define SpinLockFree(lock)	S_LOCK_FREE(lock)
 
-
-extern int	SpinlockSemas(void);
-extern Size SpinlockSemaSize(void);
-
-#ifndef HAVE_SPINLOCKS
-extern void SpinlockSemaInit(void);
-extern PGDLLIMPORT PGSemaphore *SpinlockSemaArray;
-#endif
-
 #endif							/* SPIN_H */
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 45a6ad3c49..14aad5a0c6 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -887,91 +887,7 @@ test_spinlock(void)
 		if (memcmp(struct_w_lock.data_after, "ef12", 4) != 0)
 			elog(ERROR, "padding after spinlock modified");
 	}
-
-	/*
-	 * Ensure that allocating more than INT32_MAX emulated spinlocks works.
-	 * That's interesting because the spinlock emulation uses a 32bit integer
-	 * to map spinlocks onto semaphores. There've been bugs...
-	 */
-#ifndef HAVE_SPINLOCKS
-	{
-		/*
-		 * Initialize enough spinlocks to advance counter close to wraparound.
-		 * It's too expensive to perform acquire/release for each, as those
-		 * may be syscalls when the spinlock emulation is used (and even just
-		 * atomic TAS would be expensive).
-		 */
-		for (uint32 i = 0; i < INT32_MAX - 100000; i++)
-		{
-			slock_t		lock;
-
-			SpinLockInit(&lock);
-		}
-
-		for (uint32 i = 0; i < 200000; i++)
-		{
-			slock_t		lock;
-
-			SpinLockInit(&lock);
-
-			SpinLockAcquire(&lock);
-			SpinLockRelease(&lock);
-			SpinLockAcquire(&lock);
-			SpinLockRelease(&lock);
-		}
-	}
-#endif
-}
-
-/*
- * Verify that performing atomic ops inside a spinlock isn't a
- * problem. Realistically that's only going to be a problem when both
- * --disable-spinlocks and --disable-atomics are used, but it's cheap enough
- * to just always test.
- *
- * The test works by initializing enough atomics that we'd conflict if there
- * were an overlap between a spinlock and an atomic by holding a spinlock
- * while manipulating more than NUM_SPINLOCK_SEMAPHORES atomics.
- *
- * NUM_TEST_ATOMICS doesn't really need to be more than
- * NUM_SPINLOCK_SEMAPHORES, but it seems better to test a bit more
- * extensively.
- */
-static void
-test_atomic_spin_nest(void)
-{
-	slock_t		lock;
-#define NUM_TEST_ATOMICS (NUM_SPINLOCK_SEMAPHORES + NUM_ATOMICS_SEMAPHORES + 27)
-	pg_atomic_uint32 atomics32[NUM_TEST_ATOMICS];
-	pg_atomic_uint64 atomics64[NUM_TEST_ATOMICS];
-
-	SpinLockInit(&lock);
-
-	for (int i = 0; i < NUM_TEST_ATOMICS; i++)
-	{
-		pg_atomic_init_u32(&atomics32[i], 0);
-		pg_atomic_init_u64(&atomics64[i], 0);
-	}
-
-	/* just so it's not all zeroes */
-	for (int i = 0; i < NUM_TEST_ATOMICS; i++)
-	{
-		EXPECT_EQ_U32(pg_atomic_fetch_add_u32(&atomics32[i], i), 0);
-		EXPECT_EQ_U64(pg_atomic_fetch_add_u64(&atomics64[i], i), 0);
-	}
-
-	/* test whether we can do atomic op with lock held */
-	SpinLockAcquire(&lock);
-	for (int i = 0; i < NUM_TEST_ATOMICS; i++)
-	{
-		EXPECT_EQ_U32(pg_atomic_fetch_sub_u32(&atomics32[i], i), i);
-		EXPECT_EQ_U32(pg_atomic_read_u32(&atomics32[i]), 0);
-		EXPECT_EQ_U64(pg_atomic_fetch_sub_u64(&atomics64[i], i), i);
-		EXPECT_EQ_U64(pg_atomic_read_u64(&atomics64[i]), 0);
-	}
-	SpinLockRelease(&lock);
 }
-#undef NUM_TEST_ATOMICS
 
 PG_FUNCTION_INFO_V1(test_atomic_ops);
 Datum
@@ -989,8 +905,6 @@ test_atomic_ops(PG_FUNCTION_ARGS)
 	 */
 	test_spinlock();
 
-	test_atomic_spin_nest();
-
 	PG_RETURN_BOOL(true);
 }
 
-- 
2.39.2

v2-0002-Remove-disable-atomics-require-32-bit-atomics.patchtext/x-patch; charset=US-ASCII; name=v2-0002-Remove-disable-atomics-require-32-bit-atomics.patchDownload

From ca0d057fdf8b47957ce469c1300835564a82da92 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Tue, 2 Jul 2024 15:13:00 +1200
Subject: [PATCH v2 2/4] Remove --disable-atomics, require 32 bit atomics.

All targeted systems have real atomics support in practice.  Since
edadeb07 there is no remaining reason to carry code that simulates
atomic flag and uint32 imperfectly with spinlocks.

Any modern toolchain capable of implementing C11 <stdatomic.h> must have
the underlying operations we need, though we don't require C11 yet.

Discussion: https://postgr.es/m/3351991.1697728588%40sss.pgh.pa.us
---
 configure                                 |  40 --------
 configure.ac                              |  13 ---
 doc/src/sgml/installation.sgml            |  25 -----
 meson.build                               |  65 ++++++-------
 src/backend/port/atomics.c                | 109 ----------------------
 src/include/pg_config.h.in                |   3 -
 src/include/port/atomics.h                |  18 ++--
 src/include/port/atomics/arch-x86.h       |   8 --
 src/include/port/atomics/fallback.h       |  87 +----------------
 src/include/port/atomics/generic-gcc.h    |   4 -
 src/include/port/atomics/generic-msvc.h   |   4 -
 src/include/port/atomics/generic-sunpro.h |   8 --
 12 files changed, 39 insertions(+), 345 deletions(-)

diff --git a/configure b/configure
index f8deaa8d78..8f684f7945 100755
--- a/configure
+++ b/configure
@@ -836,7 +836,6 @@ enable_integer_datetimes
 enable_nls
 with_pgport
 enable_rpath
-enable_atomics
 enable_debug
 enable_profiling
 enable_coverage
@@ -1528,7 +1527,6 @@ Optional Features:
                           enable Native Language Support
   --disable-rpath         do not embed shared library search path in
                           executables
-  --disable-atomics       do not use atomic operations
   --enable-debug          build with debugging symbols (-g)
   --enable-profiling      build with profiling enabled
   --enable-coverage       build with coverage testing instrumentation
@@ -3264,33 +3262,6 @@ fi
 
 
 
-#
-# Atomic operations
-#
-
-
-# Check whether --enable-atomics was given.
-if test "${enable_atomics+set}" = set; then :
-  enableval=$enable_atomics;
-  case $enableval in
-    yes)
-      :
-      ;;
-    no)
-      :
-      ;;
-    *)
-      as_fn_error $? "no argument expected for --enable-atomics option" "$LINENO" 5
-      ;;
-  esac
-
-else
-  enable_atomics=yes
-
-fi
-
-
-
 #
 # --enable-debug adds -g to compiler flags
 #
@@ -12156,17 +12127,6 @@ fi
 
 fi
 
-if test "$enable_atomics" = yes; then
-
-$as_echo "#define HAVE_ATOMICS 1" >>confdefs.h
-
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING:
-*** Not using atomic operations will cause poor performance." >&5
-$as_echo "$as_me: WARNING:
-*** Not using atomic operations will cause poor performance." >&2;}
-fi
-
 if test "$with_gssapi" = yes ; then
   if test "$PORTNAME" != "win32"; then
     { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing gss_store_cred_into" >&5
diff --git a/configure.ac b/configure.ac
index a72169f574..75b73532fe 100644
--- a/configure.ac
+++ b/configure.ac
@@ -186,12 +186,6 @@ PGAC_ARG_BOOL(enable, rpath, yes,
               [do not embed shared library search path in executables])
 AC_SUBST(enable_rpath)
 
-#
-# Atomic operations
-#
-PGAC_ARG_BOOL(enable, atomics, yes,
-              [do not use atomic operations])
-
 #
 # --enable-debug adds -g to compiler flags
 #
@@ -1290,13 +1284,6 @@ failure.  It is possible the compiler isn't looking in the proper directory.
 Use --without-zlib to disable zlib support.])])
 fi
 
-if test "$enable_atomics" = yes; then
-  AC_DEFINE(HAVE_ATOMICS, 1, [Define to 1 if you want to use atomics if available.])
-else
-  AC_MSG_WARN([
-*** Not using atomic operations will cause poor performance.])
-fi
-
 if test "$with_gssapi" = yes ; then
   if test "$PORTNAME" != "win32"; then
     AC_SEARCH_LIBS(gss_store_cred_into, [gssapi_krb5 gss 'gssapi -lkrb5 -lcrypto'], [],
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index 3f19f272b1..4ab8ddba7c 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -1258,18 +1258,6 @@ build-postgresql:
        </listitem>
       </varlistentry>
 
-      <varlistentry id="configure-option-disable-atomics">
-       <term><option>--disable-atomics</option></term>
-       <listitem>
-        <para>
-         Disable use of CPU atomic operations.  This option does nothing on
-         platforms that lack such operations.  On platforms that do have
-         them, this will result in poor performance.  This option is only
-         useful for debugging or making performance comparisons.
-        </para>
-       </listitem>
-      </varlistentry>
-
      </variablelist>
 
    </sect3>
@@ -2674,19 +2662,6 @@ ninja install
       </listitem>
      </varlistentry>
 
-     <varlistentry id="configure-atomics-meson">
-      <term><option>-Datomics={ true | false }</option></term>
-      <listitem>
-       <para>
-        This option is set to true by default; setting it to false will
-        disable use of CPU atomic operations.  The option does nothing on
-        platforms that lack such operations.  On platforms that do have
-        them, disabling atomics will result in poor performance.  Changing
-        this option is only useful for debugging or making performance comparisons.
-       </para>
-      </listitem>
-     </varlistentry>
-
     </variablelist>
    </sect3>
 
diff --git a/meson.build b/meson.build
index 6a0d538365..7de0371226 100644
--- a/meson.build
+++ b/meson.build
@@ -2089,70 +2089,61 @@ endif
 # Atomics
 ###############################################################
 
-if not get_option('atomics')
-  warning('Not using atomics will cause poor performance')
-else
-  # XXX: perhaps we should require some atomics support in this case these
-  # days?
-  cdata.set('HAVE_ATOMICS', 1)
-
-  atomic_checks = [
-    {'name': 'HAVE_GCC__SYNC_CHAR_TAS',
-     'desc': '__sync_lock_test_and_set(char)',
-     'test': '''
+atomic_checks = [
+  {'name': 'HAVE_GCC__SYNC_CHAR_TAS',
+   'desc': '__sync_lock_test_and_set(char)',
+   'test': '''
 char lock = 0;
 __sync_lock_test_and_set(&lock, 1);
 __sync_lock_release(&lock);'''},
 
-    {'name': 'HAVE_GCC__SYNC_INT32_TAS',
-     'desc': '__sync_lock_test_and_set(int32)',
-     'test': '''
+  {'name': 'HAVE_GCC__SYNC_INT32_TAS',
+   'desc': '__sync_lock_test_and_set(int32)',
+   'test': '''
 int lock = 0;
 __sync_lock_test_and_set(&lock, 1);
 __sync_lock_release(&lock);'''},
 
-    {'name': 'HAVE_GCC__SYNC_INT32_CAS',
-     'desc': '__sync_val_compare_and_swap(int32)',
-     'test': '''
+  {'name': 'HAVE_GCC__SYNC_INT32_CAS',
+   'desc': '__sync_val_compare_and_swap(int32)',
+   'test': '''
 int val = 0;
 __sync_val_compare_and_swap(&val, 0, 37);'''},
 
-    {'name': 'HAVE_GCC__SYNC_INT64_CAS',
-     'desc': '__sync_val_compare_and_swap(int64)',
-     'test': '''
+  {'name': 'HAVE_GCC__SYNC_INT64_CAS',
+   'desc': '__sync_val_compare_and_swap(int64)',
+   'test': '''
 INT64 val = 0;
 __sync_val_compare_and_swap(&val, 0, 37);'''},
 
-    {'name': 'HAVE_GCC__ATOMIC_INT32_CAS',
-     'desc': ' __atomic_compare_exchange_n(int32)',
-     'test': '''
+  {'name': 'HAVE_GCC__ATOMIC_INT32_CAS',
+   'desc': ' __atomic_compare_exchange_n(int32)',
+   'test': '''
 int val = 0;
 int expect = 0;
 __atomic_compare_exchange_n(&val, &expect, 37, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);'''},
 
-    {'name': 'HAVE_GCC__ATOMIC_INT64_CAS',
-     'desc': ' __atomic_compare_exchange_n(int64)',
-     'test': '''
+  {'name': 'HAVE_GCC__ATOMIC_INT64_CAS',
+   'desc': ' __atomic_compare_exchange_n(int64)',
+   'test': '''
 INT64 val = 0;
 INT64 expect = 0;
 __atomic_compare_exchange_n(&val, &expect, 37, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);'''},
-  ]
+]
 
-  foreach check : atomic_checks
-    test = '''
+foreach check : atomic_checks
+  test = '''
 int main(void)
 {
 @0@
 }'''.format(check['test'])
 
-    cdata.set(check['name'],
-      cc.links(test,
-        name: check['desc'],
-        args: test_c_args + ['-DINT64=@0@'.format(cdata.get('PG_INT64_TYPE'))]) ? 1 : false
-    )
-  endforeach
-
-endif
+  cdata.set(check['name'],
+    cc.links(test,
+      name: check['desc'],
+      args: test_c_args + ['-DINT64=@0@'.format(cdata.get('PG_INT64_TYPE'))]) ? 1 : false
+  )
+endforeach
 
 
 ###############################################################
diff --git a/src/backend/port/atomics.c b/src/backend/port/atomics.c
index cd7ede9672..6f1e014d0b 100644
--- a/src/backend/port/atomics.c
+++ b/src/backend/port/atomics.c
@@ -49,115 +49,6 @@ pg_extern_compiler_barrier(void)
 #endif
 
 
-#ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION
-
-void
-pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
-{
-	StaticAssertDecl(sizeof(ptr->sema) >= sizeof(slock_t),
-					 "size mismatch of atomic_flag vs slock_t");
-
-	SpinLockInit((slock_t *) &ptr->sema);
-
-	ptr->value = false;
-}
-
-bool
-pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
-{
-	uint32		oldval;
-
-	SpinLockAcquire((slock_t *) &ptr->sema);
-	oldval = ptr->value;
-	ptr->value = true;
-	SpinLockRelease((slock_t *) &ptr->sema);
-
-	return oldval == 0;
-}
-
-void
-pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
-{
-	SpinLockAcquire((slock_t *) &ptr->sema);
-	ptr->value = false;
-	SpinLockRelease((slock_t *) &ptr->sema);
-}
-
-bool
-pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
-{
-	return ptr->value == 0;
-}
-
-#endif							/* PG_HAVE_ATOMIC_FLAG_SIMULATION */
-
-#ifdef PG_HAVE_ATOMIC_U32_SIMULATION
-void
-pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_)
-{
-	StaticAssertDecl(sizeof(ptr->sema) >= sizeof(slock_t),
-					 "size mismatch of atomic_uint32 vs slock_t");
-
-	SpinLockInit((slock_t *) &ptr->sema);
-	ptr->value = val_;
-}
-
-void
-pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val)
-{
-	/*
-	 * One might think that an unlocked write doesn't need to acquire the
-	 * spinlock, but one would be wrong. Even an unlocked write has to cause a
-	 * concurrent pg_atomic_compare_exchange_u32() (et al) to fail.
-	 */
-	SpinLockAcquire((slock_t *) &ptr->sema);
-	ptr->value = val;
-	SpinLockRelease((slock_t *) &ptr->sema);
-}
-
-bool
-pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
-									uint32 *expected, uint32 newval)
-{
-	bool		ret;
-
-	/*
-	 * Do atomic op under a spinlock. It might look like we could just skip
-	 * the cmpxchg if the lock isn't available, but that'd just emulate a
-	 * 'weak' compare and swap. I.e. one that allows spurious failures. Since
-	 * several algorithms rely on a strong variant and that is efficiently
-	 * implementable on most major architectures let's emulate it here as
-	 * well.
-	 */
-	SpinLockAcquire((slock_t *) &ptr->sema);
-
-	/* perform compare/exchange logic */
-	ret = ptr->value == *expected;
-	*expected = ptr->value;
-	if (ret)
-		ptr->value = newval;
-
-	/* and release lock */
-	SpinLockRelease((slock_t *) &ptr->sema);
-
-	return ret;
-}
-
-uint32
-pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
-{
-	uint32		oldval;
-
-	SpinLockAcquire((slock_t *) &ptr->sema);
-	oldval = ptr->value;
-	ptr->value += add_;
-	SpinLockRelease((slock_t *) &ptr->sema);
-	return oldval;
-}
-
-#endif							/* PG_HAVE_ATOMIC_U32_SIMULATION */
-
-
 #ifdef PG_HAVE_ATOMIC_U64_SIMULATION
 
 void
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index e6c06f6102..0e9b108e66 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -57,9 +57,6 @@
 /* Define to 1 if you have the `ASN1_STRING_get0_data' function. */
 #undef HAVE_ASN1_STRING_GET0_DATA
 
-/* Define to 1 if you want to use atomics if available. */
-#undef HAVE_ATOMICS
-
 /* Define to 1 if you have the <atomic.h> header file. */
 #undef HAVE_ATOMIC_H
 
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index 03134e3b7b..c2ce10a718 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -17,7 +17,7 @@
  * There exist generic, hardware independent, implementations for several
  * compilers which might be sufficient, although possibly not optimal, for a
  * new platform. If no such generic implementation is available spinlocks will
- * be used to implement the API.
+ * be used to implement the 64-bit parts of the API.
  *
  * Implement _u64 atomics if and only if your platform can use them
  * efficiently (and obviously correctly).
@@ -91,17 +91,17 @@
 #elif defined(__SUNPRO_C) && !defined(__GNUC__)
 #include "port/atomics/generic-sunpro.h"
 #else
-/*
- * Unsupported compiler, we'll likely use slower fallbacks... At least
- * compiler barriers should really be provided.
- */
+/* Unknown compiler. */
+#endif
+
+/* Fail if we couldn't find implementations of required facilities. */
+#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT)
+#error "could not find an implementation of pg_atomic_uint32"
 #endif
 
 /*
- * Provide a full fallback of the pg_*_barrier(), pg_atomic**_flag and
- * pg_atomic_* APIs for platforms without sufficient spinlock and/or atomics
- * support. In the case of spinlock backed atomics the emulation is expected
- * to be efficient, although less so than native atomics support.
+ * Provide a spinlock-based implementation of the 64 bit variants, if
+ * necessary.
  */
 #include "port/atomics/fallback.h"
 
diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
index 2a8eca30fc..c12f8a6069 100644
--- a/src/include/port/atomics/arch-x86.h
+++ b/src/include/port/atomics/arch-x86.h
@@ -49,8 +49,6 @@
  * nice to support older gcc's and the compare/exchange implementation here is
  * actually more efficient than the * __sync variant.
  */
-#if defined(HAVE_ATOMICS)
-
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
 
 #define PG_HAVE_ATOMIC_FLAG_SUPPORT
@@ -80,8 +78,6 @@ typedef struct pg_atomic_uint64
 
 #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
 
-#endif /* defined(HAVE_ATOMICS) */
-
 #if !defined(PG_HAVE_SPIN_DELAY)
 /*
  * This sequence is equivalent to the PAUSE instruction ("rep" is
@@ -132,8 +128,6 @@ pg_spin_delay_impl(void)
 #endif /* !defined(PG_HAVE_SPIN_DELAY) */
 
 
-#if defined(HAVE_ATOMICS)
-
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
 
 #define PG_HAVE_ATOMIC_TEST_SET_FLAG
@@ -250,5 +244,3 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 	defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
 #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
 #endif /* 8 byte single-copy atomicity */
-
-#endif /* HAVE_ATOMICS */
diff --git a/src/include/port/atomics/fallback.h b/src/include/port/atomics/fallback.h
index 2e3eef4aca..8ffd1a8fd3 100644
--- a/src/include/port/atomics/fallback.h
+++ b/src/include/port/atomics/fallback.h
@@ -1,7 +1,7 @@
 /*-------------------------------------------------------------------------
  *
  * fallback.h
- *    Fallback for platforms without spinlock and/or atomics support. Slower
+ *    Fallback for platforms without 64 bit atomics support. Slower
  *    than native atomics support, but not unusably slow.
  *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
@@ -49,50 +49,6 @@ extern void pg_extern_compiler_barrier(void);
 #endif
 
 
-/*
- * If we have atomics implementation for this platform, fall back to providing
- * the atomics API using a spinlock to protect the internal state. Possibly
- * the spinlock implementation uses semaphores internally...
- *
- * We have to be a bit careful here, as it's not guaranteed that atomic
- * variables are mapped to the same address in every process (e.g. dynamic
- * shared memory segments). We can't just hash the address and use that to map
- * to a spinlock. Instead assign a spinlock on initialization of the atomic
- * variable.
- */
-#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) && !defined(PG_HAVE_ATOMIC_U32_SUPPORT)
-
-#define PG_HAVE_ATOMIC_FLAG_SIMULATION
-#define PG_HAVE_ATOMIC_FLAG_SUPPORT
-
-typedef struct pg_atomic_flag
-{
-	/*
-	 * To avoid circular includes we can't use s_lock as a type here. Instead
-	 * just reserve enough space for all spinlock types. Some platforms would
-	 * be content with just one byte instead of 4, but that's not too much
-	 * waste.
-	 */
-	int			sema;
-	volatile bool value;
-} pg_atomic_flag;
-
-#endif /* PG_HAVE_ATOMIC_FLAG_SUPPORT */
-
-#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT)
-
-#define PG_HAVE_ATOMIC_U32_SIMULATION
-
-#define PG_HAVE_ATOMIC_U32_SUPPORT
-typedef struct pg_atomic_uint32
-{
-	/* Check pg_atomic_flag's definition above for an explanation */
-	int			sema;
-	volatile uint32 value;
-} pg_atomic_uint32;
-
-#endif /* PG_HAVE_ATOMIC_U32_SUPPORT */
-
 #if !defined(PG_HAVE_ATOMIC_U64_SUPPORT)
 
 #define PG_HAVE_ATOMIC_U64_SIMULATION
@@ -100,49 +56,10 @@ typedef struct pg_atomic_uint32
 #define PG_HAVE_ATOMIC_U64_SUPPORT
 typedef struct pg_atomic_uint64
 {
-	/* Check pg_atomic_flag's definition above for an explanation */
 	int			sema;
 	volatile uint64 value;
 } pg_atomic_uint64;
 
-#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
-
-#ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION
-
-#define PG_HAVE_ATOMIC_INIT_FLAG
-extern void pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr);
-
-#define PG_HAVE_ATOMIC_TEST_SET_FLAG
-extern bool pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr);
-
-#define PG_HAVE_ATOMIC_CLEAR_FLAG
-extern void pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr);
-
-#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
-extern bool pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr);
-
-#endif /* PG_HAVE_ATOMIC_FLAG_SIMULATION */
-
-#ifdef PG_HAVE_ATOMIC_U32_SIMULATION
-
-#define PG_HAVE_ATOMIC_INIT_U32
-extern void pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_);
-
-#define PG_HAVE_ATOMIC_WRITE_U32
-extern void pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val);
-
-#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
-extern bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
-												uint32 *expected, uint32 newval);
-
-#define PG_HAVE_ATOMIC_FETCH_ADD_U32
-extern uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_);
-
-#endif /* PG_HAVE_ATOMIC_U32_SIMULATION */
-
-
-#ifdef PG_HAVE_ATOMIC_U64_SIMULATION
-
 #define PG_HAVE_ATOMIC_INIT_U64
 extern void pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_);
 
@@ -153,4 +70,4 @@ extern bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
 #define PG_HAVE_ATOMIC_FETCH_ADD_U64
 extern uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_);
 
-#endif /* PG_HAVE_ATOMIC_U64_SIMULATION */
+#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
diff --git a/src/include/port/atomics/generic-gcc.h b/src/include/port/atomics/generic-gcc.h
index 872d2f02af..cfbcbe0fff 100644
--- a/src/include/port/atomics/generic-gcc.h
+++ b/src/include/port/atomics/generic-gcc.h
@@ -53,8 +53,6 @@
 #endif
 
 
-#ifdef HAVE_ATOMICS
-
 /* generic gcc based atomic flag implementation */
 #if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) \
 	&& (defined(HAVE_GCC__SYNC_INT32_TAS) || defined(HAVE_GCC__SYNC_CHAR_TAS))
@@ -319,5 +317,3 @@ pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_)
 #endif
 
 #endif /* !defined(PG_DISABLE_64_BIT_ATOMICS) */
-
-#endif /* defined(HAVE_ATOMICS) */
diff --git a/src/include/port/atomics/generic-msvc.h b/src/include/port/atomics/generic-msvc.h
index c013aca5e7..677436f260 100644
--- a/src/include/port/atomics/generic-msvc.h
+++ b/src/include/port/atomics/generic-msvc.h
@@ -30,8 +30,6 @@
 #define pg_memory_barrier_impl()	MemoryBarrier()
 #endif
 
-#if defined(HAVE_ATOMICS)
-
 #define PG_HAVE_ATOMIC_U32_SUPPORT
 typedef struct pg_atomic_uint32
 {
@@ -115,5 +113,3 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 }
 
 #endif /* _WIN64 */
-
-#endif /* HAVE_ATOMICS */
diff --git a/src/include/port/atomics/generic-sunpro.h b/src/include/port/atomics/generic-sunpro.h
index 840a45e778..08f093ed2c 100644
--- a/src/include/port/atomics/generic-sunpro.h
+++ b/src/include/port/atomics/generic-sunpro.h
@@ -17,8 +17,6 @@
  * -------------------------------------------------------------------------
  */
 
-#if defined(HAVE_ATOMICS)
-
 #ifdef HAVE_MBARRIER_H
 #include <mbarrier.h>
 
@@ -66,10 +64,6 @@ typedef struct pg_atomic_uint64
 
 #endif /* HAVE_ATOMIC_H */
 
-#endif /* defined(HAVE_ATOMICS) */
-
-
-#if defined(HAVE_ATOMICS)
 
 #ifdef HAVE_ATOMIC_H
 
@@ -117,5 +111,3 @@ pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 newval)
 }
 
 #endif /* HAVE_ATOMIC_H */
-
-#endif /* defined(HAVE_ATOMICS) */
-- 
2.39.2

v2-0003-Require-compiler-barrier-support.patchtext/x-patch; charset=US-ASCII; name=v2-0003-Require-compiler-barrier-support.patchDownload

From e0a1081846cb907a71adf13050605478c71a827c Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Tue, 30 Jul 2024 06:27:57 +1200
Subject: [PATCH v2 3/4] Require compiler barrier support.

Previously we had a fallback implementation of pg_compiler_barrier()
that called an empty function across a translation unit boundary so the
compiler couldn't see what it did.  That might not work with a link time
optimizer.  Since we now require knowledge of how to implement atomics,
there shouldn't be any cases where we don't also know how to implement
compiler barriers.

Discussion: https://postgr.es/m/721bf39a-ed8a-44b0-8b8e-be3bd81db748%40technowledgy.de
Discussion: https://postgr.es/m/3351991.1697728588%40sss.pgh.pa.us
---
 src/backend/port/atomics.c          |  8 --------
 src/include/port/atomics.h          |  3 +++
 src/include/port/atomics/fallback.h | 15 ---------------
 3 files changed, 3 insertions(+), 23 deletions(-)

diff --git a/src/backend/port/atomics.c b/src/backend/port/atomics.c
index 6f1e014d0b..19a84a7849 100644
--- a/src/backend/port/atomics.c
+++ b/src/backend/port/atomics.c
@@ -40,14 +40,6 @@ pg_spinlock_barrier(void)
 }
 #endif
 
-#ifdef PG_HAVE_COMPILER_BARRIER_EMULATION
-void
-pg_extern_compiler_barrier(void)
-{
-	/* do nothing */
-}
-#endif
-
 
 #ifdef PG_HAVE_ATOMIC_U64_SIMULATION
 
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index c2ce10a718..edb0ae40dc 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -98,6 +98,9 @@
 #if !defined(PG_HAVE_ATOMIC_U32_SUPPORT)
 #error "could not find an implementation of pg_atomic_uint32"
 #endif
+#if !defined(pg_compiler_barrier_impl)
+#error "could not find an implementation of pg_compiler_barrier"
+#endif
 
 /*
  * Provide a spinlock-based implementation of the 64 bit variants, if
diff --git a/src/include/port/atomics/fallback.h b/src/include/port/atomics/fallback.h
index 8ffd1a8fd3..9f83827d83 100644
--- a/src/include/port/atomics/fallback.h
+++ b/src/include/port/atomics/fallback.h
@@ -33,21 +33,6 @@ extern void pg_spinlock_barrier(void);
 #define pg_memory_barrier_impl pg_spinlock_barrier
 #endif
 
-#ifndef pg_compiler_barrier_impl
-/*
- * If the compiler/arch combination does not provide compiler barriers,
- * provide a fallback.  The fallback simply consists of a function call into
- * an externally defined function.  That should guarantee compiler barrier
- * semantics except for compilers that do inter translation unit/global
- * optimization - those better provide an actual compiler barrier.
- *
- * A native compiler barrier for sure is a lot faster than this...
- */
-#define PG_HAVE_COMPILER_BARRIER_EMULATION
-extern void pg_extern_compiler_barrier(void);
-#define pg_compiler_barrier_impl pg_extern_compiler_barrier
-#endif
-
 
 #if !defined(PG_HAVE_ATOMIC_U64_SUPPORT)
 
-- 
2.39.2

v2-0004-Require-memory-barrier-support.patchtext/x-patch; charset=US-ASCII; name=v2-0004-Require-memory-barrier-support.patchDownload

From 2ccc8fdaf2f1441d9ba2924451ffa53bf1a1f4a5 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Tue, 30 Jul 2024 07:14:59 +1200
Subject: [PATCH v2 4/4] Require memory barrier support.

Previously we had a fallback implementation that made a harmless system
call, on the theory that system calls must contain a memory barrier.
(Some of the comments removed refer to a spinlock implementation, but
that changed in 1b468a13 which left some stray comments.)

We don't require 'read' and 'write' barriers, falling back to full
memory barriers still.  Notably, MSVC relies on that, which is probably
incorrect XXX?

Discussion: https://postgr.es/m/721bf39a-ed8a-44b0-8b8e-be3bd81db748%40technowledgy.de
Discussion: https://postgr.es/m/3351991.1697728588%40sss.pgh.pa.us
---
 src/backend/port/atomics.c          | 23 -----------------------
 src/include/port/atomics.h          |  4 ++++
 src/include/port/atomics/fallback.h | 16 ----------------
 src/include/port/atomics/generic.h  | 10 ----------
 4 files changed, 4 insertions(+), 49 deletions(-)

diff --git a/src/backend/port/atomics.c b/src/backend/port/atomics.c
index 19a84a7849..f98f6b6dbd 100644
--- a/src/backend/port/atomics.c
+++ b/src/backend/port/atomics.c
@@ -17,29 +17,6 @@
 #include "port/atomics.h"
 #include "storage/spin.h"
 
-#ifdef PG_HAVE_MEMORY_BARRIER_EMULATION
-#ifdef WIN32
-#error "barriers are required (and provided) on WIN32 platforms"
-#endif
-#include <signal.h>
-#endif
-
-#ifdef PG_HAVE_MEMORY_BARRIER_EMULATION
-void
-pg_spinlock_barrier(void)
-{
-	/*
-	 * NB: we have to be reentrant here, some barriers are placed in signal
-	 * handlers.
-	 *
-	 * We use kill(0) for the fallback barrier as we assume that kernels on
-	 * systems old enough to require fallback barrier support will include an
-	 * appropriate barrier while checking the existence of the postmaster pid.
-	 */
-	(void) kill(PostmasterPid, 0);
-}
-#endif
-
 
 #ifdef PG_HAVE_ATOMIC_U64_SIMULATION
 
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index edb0ae40dc..c0c8688f73 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -101,6 +101,10 @@
 #if !defined(pg_compiler_barrier_impl)
 #error "could not find an implementation of pg_compiler_barrier"
 #endif
+#if !defined(pg_memory_barrier_impl)
+#error "could not find an implementation of pg_memory_barrier_impl"
+#endif
+
 
 /*
  * Provide a spinlock-based implementation of the 64 bit variants, if
diff --git a/src/include/port/atomics/fallback.h b/src/include/port/atomics/fallback.h
index 9f83827d83..2c0eb28768 100644
--- a/src/include/port/atomics/fallback.h
+++ b/src/include/port/atomics/fallback.h
@@ -17,22 +17,6 @@
 #	error "should be included via atomics.h"
 #endif
 
-#ifndef pg_memory_barrier_impl
-/*
- * If we have no memory barrier implementation for this architecture, we
- * fall back to acquiring and releasing a spinlock.
- *
- * It's not self-evident that every possible legal implementation of a
- * spinlock acquire-and-release would be equivalent to a full memory barrier.
- * For example, I'm not sure that Itanium's acq and rel add up to a full
- * fence.  But all of our actual implementations seem OK in this regard.
- */
-#define PG_HAVE_MEMORY_BARRIER_EMULATION
-
-extern void pg_spinlock_barrier(void);
-#define pg_memory_barrier_impl pg_spinlock_barrier
-#endif
-
 
 #if !defined(PG_HAVE_ATOMIC_U64_SUPPORT)
 
diff --git a/src/include/port/atomics/generic.h b/src/include/port/atomics/generic.h
index 6113ab62a3..b636f95142 100644
--- a/src/include/port/atomics/generic.h
+++ b/src/include/port/atomics/generic.h
@@ -135,19 +135,9 @@ pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
 static inline void
 pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
 {
-	/*
-	 * Use a memory barrier + plain write if we have a native memory
-	 * barrier. But don't do so if memory barriers use spinlocks - that'd lead
-	 * to circularity if flags are used to implement spinlocks.
-	 */
-#ifndef PG_HAVE_MEMORY_BARRIER_EMULATION
 	/* XXX: release semantics suffice? */
 	pg_memory_barrier_impl();
 	pg_atomic_write_u32_impl(ptr, 0);
-#else
-	uint32 value = 1;
-	pg_atomic_compare_exchange_u32_impl(ptr, &value, 0);
-#endif
 }
 
 #elif !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG)
-- 
2.39.2

#24

Heikki Linnakangas

hlinnaka@iki.fi

over 1 year ago

In reply to: Thomas Munro (#23)

Re: Remove last traces of HPPA support

On 30/07/2024 00:50, Thomas Munro wrote:

On Wed, Jul 3, 2024 at 8:09 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Thomas Munro <thomas.munro@gmail.com> writes:

Here are some experimental patches to try out some ideas mentioned
upthread, that are approximately unlocked by that cleanup.

FWIW, I'm good with getting rid of --disable-spinlocks and
--disable-atomics. That's a fair amount of code and needing to
support it causes problems, as you say. I am very much less
excited about ripping out our spinlock and/or atomics code in favor
of <stdatomic.h>; I just don't see the gain there, and I do see risk
in ceding control of the semantics and performance of those
primitives.

OK, <stdatomic.h> part on ice for now. Here's an update of the rest,
this time also removing the barrier fallbacks as discussed in the LTO
thread[1].

Looks good to me.

I guess we should also consider reimplementing the spinlock on the
atomic API, but I can see that Andres is poking at spinlock code right
now so I'll keep out of his way...

Side issue: I noticed via CI failure when I tried to require
read/write barriers to be provided (a choice I backed out of), that on
MSVC we seem to be using the full memory barrier fallback for those.
Huh? For x86, I think they should be using pg_compiler_barrier() (no
code gen, just prevent reordering), not pg_pg_memory_barrier(), no?

Agreed, arch-x86.h is quite clear on that.

Perhaps I'm missing something but I suspect we might be failing to
include arch-x86.h on that compiler when we should... maybe it needs
to detect _M_AMD64 too?

Aha, yes I think that's it. Apparently, __x86_64__ is not defined on
MSVC. To prove that, I added garbage to the "#ifdef __x86_64__" guarded
block in atomics.h. The compilation passes on MSVC, but not on other
platforms: https://cirrus-ci.com/build/6310061188841472.

That means that we're not getting the x86-64 instructions in
src/port/pg_crc32c_sse42.c on MSVC either.

I think we should do:

#ifdef _M_AMD64
#define __x86_64__
#endif

somewhere, perhaps in src/include/port/win32.h.

--
Heikki Linnakangas
Neon (https://neon.tech)

#25

Thomas Munro

thomas.munro@gmail.com

over 1 year ago

In reply to: Heikki Linnakangas (#24)

1 attachment(s)

Re: Remove last traces of HPPA support

On Tue, Jul 30, 2024 at 11:16 AM Heikki Linnakangas <hlinnaka@iki.fi> wrote:

On 30/07/2024 00:50, Thomas Munro wrote:

On Wed, Jul 3, 2024 at 8:09 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Thomas Munro <thomas.munro@gmail.com> writes:

OK, <stdatomic.h> part on ice for now. Here's an update of the rest,
this time also removing the barrier fallbacks as discussed in the LTO
thread[1].

Looks good to me.

Thanks. I'll wait just a bit longer to see if anyone else has comments.

Perhaps I'm missing something but I suspect we might be failing to
include arch-x86.h on that compiler when we should... maybe it needs
to detect _M_AMD64 too?

Aha, yes I think that's it. Apparently, __x86_64__ is not defined on
MSVC. To prove that, I added garbage to the "#ifdef __x86_64__" guarded
block in atomics.h. The compilation passes on MSVC, but not on other
platforms: https://cirrus-ci.com/build/6310061188841472.

That means that we're not getting the x86-64 instructions in
src/port/pg_crc32c_sse42.c on MSVC either.

I think we should do:

#ifdef _M_AMD64
#define __x86_64__
#endif

somewhere, perhaps in src/include/port/win32.h.

Hmm. I had come up with the opposite solution, because we already
tested for _M_AMD64 explicitly elsewhere, and also I was thinking we
would back-patch, and I don't want to cause problems for external code
that thinks that __x86_64__ implies it can bust out some GCC inline
assembler or something. But I don't have a strong opinion, your idea
is certainly simpler to implement and I also wouldn't mind much if we
just fixed it in master only, for fear of subtle breakage...

Same problem probably exists for i386. I don't think CI, build farm
or the EDB packaging team do 32 bit Windows, so that makes it a little
hard to know if your blind code changes have broken or fixed
anything... on the other hand it's pretty simple...

I wondered if the pre-Meson system might have somehow defined
__x86_64__, but I'm not seeing it. Commit b64d92f1a56 explicitly
mentions that it was tested on MSVC, so I guess maybe it was just
always "working" but not quite taking the intended code paths? Funny
though, that code that calls _mm_pause() on AMD64 or the __asm thing
that only works on i386 doesn't look like blind code to me. Curious.

Attachments:

0001-Fix-x86-architecture-detection-on-MSVC.patchtext/x-patch; charset=US-ASCII; name=0001-Fix-x86-architecture-detection-on-MSVC.patchDownload

From 47a8445c946e3792247fcf818c6e60ae72693f5c Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Tue, 30 Jul 2024 11:01:26 +1200
Subject: [PATCH] Fix x86 architecture detection on MSVC.

We were looking for __x86_64__, but MSVC calls it _M_AMD64.  Therefore
we were mapping pg_{read,write}_barrier() to expensive
pg_memory_barrier() instead of pg_compiler_barrier(), and not using the
intended spinlock delay primitive.  A couple of other places missed it
as well.

The problem probably exists for _M_IX86 (32 bit) too; this is untested
due to lack of 32 bit Windows CI, but that macro was already used in our
tree so it seems safe to use it in new places.

Back-patch to all supported releases.

Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Discussion: https://postgr.es/m/CA%2BhUKGKAf_i6w7hB_3pqZXQeqn%2BixvY%2BCMps_n%3DmJ5HAatMjMw%40mail.gmail.com
---
 contrib/pgcrypto/crypt-blowfish.c   | 4 ++--
 src/include/port/atomics.h          | 3 ++-
 src/include/port/atomics/arch-x86.h | 2 +-
 src/port/pg_crc32c_sse42.c          | 2 +-
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/contrib/pgcrypto/crypt-blowfish.c b/contrib/pgcrypto/crypt-blowfish.c
index 5a1b1e1009..c34e66b2f7 100644
--- a/contrib/pgcrypto/crypt-blowfish.c
+++ b/contrib/pgcrypto/crypt-blowfish.c
@@ -38,10 +38,10 @@
 #include "px-crypt.h"
 #include "px.h"
 
-#ifdef __i386__
+#if defined(__i386__) || defined(_M_IX86)
 #define BF_ASM				0	/* 1 */
 #define BF_SCALE			1
-#elif defined(__x86_64__)
+#elif defined(__x86_64__) || defined(_M_AMD64)
 #define BF_ASM				0
 #define BF_SCALE			1
 #else
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index f6fa432d2d..ec59745168 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -65,7 +65,8 @@
  */
 #if defined(__arm__) || defined(__arm) || defined(__aarch64__)
 #include "port/atomics/arch-arm.h"
-#elif defined(__i386__) || defined(__i386) || defined(__x86_64__)
+#elif defined(__i386__) || defined(__i386) || defined(_M_IX86) || \
+	  defined(__x86_64__) || defined(_M_AMD64)
 #include "port/atomics/arch-x86.h"
 #elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
 #include "port/atomics/arch-ppc.h"
diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
index 2a8eca30fc..4ecf540d12 100644
--- a/src/include/port/atomics/arch-x86.h
+++ b/src/include/port/atomics/arch-x86.h
@@ -113,7 +113,7 @@ pg_spin_delay_impl(void)
 {
 	__asm__ __volatile__(" rep; nop			\n");
 }
-#elif defined(_MSC_VER) && defined(__x86_64__)
+#elif defined(_MSC_VER) && defined(_M_AMD64)
 #define PG_HAVE_SPIN_DELAY
 static __forceinline void
 pg_spin_delay_impl(void)
diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c
index 7f88c11480..9a87070853 100644
--- a/src/port/pg_crc32c_sse42.c
+++ b/src/port/pg_crc32c_sse42.c
@@ -32,7 +32,7 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
 	 * and performance testing didn't show any performance gain from aligning
 	 * the begin address.
 	 */
-#ifdef __x86_64__
+#if defined(__x86_64__) || defined(_M_AMD64)
 	while (p + 8 <= pend)
 	{
 		crc = (uint32) _mm_crc32_u64(crc, *((const uint64 *) p));
-- 
2.39.2

#26

Thomas Munro

thomas.munro@gmail.com

over 1 year ago

In reply to: Thomas Munro (#25)

Re: Remove last traces of HPPA support

On Tue, Jul 30, 2024 at 12:39 PM Thomas Munro <thomas.munro@gmail.com> wrote:

On Tue, Jul 30, 2024 at 11:16 AM Heikki Linnakangas <hlinnaka@iki.fi> wrote:

Looks good to me.

Thanks. I'll wait just a bit longer to see if anyone else has comments.

And pushed.

I am aware of a couple of build farm animals that will now fail
because they deliberately test --disable-spinlocks: francolin and
rorqual, which will need adjustment or retirement on master. I'll
watch out for other surprises on the farm...

#27

Thomas Munro

thomas.munro@gmail.com

over 1 year ago

In reply to: Thomas Munro (#23)

2 attachment(s)

Re: Remove last traces of HPPA support

On Tue, Jul 30, 2024 at 9:50 AM Thomas Munro <thomas.munro@gmail.com> wrote:

I guess we should also consider reimplementing the spinlock on the
atomic API, but I can see that Andres is poking at spinlock code right
now so I'll keep out of his way...

Here is a first attempt at that. I haven't compared the generated asm
yet, but it seems to work OK. I solved some mysteries (or probably
just rediscovered things that others already knew) along the way:

1. The reason we finished up with OK-looking MSVC atomics code that
was probably never actually reachable might be that it was
copied-and-pasted from the spinlock code. This patch de-duplicates
that (and much more).

2. The pg_atomic_unlocked_test_flag() function was surprising to me:
it returns true if it's not currently set (according to a relaxed
load). Most of this patch was easy, but figuring out that I had
reverse polarity here was a multi-coffee operation :-) I can't call
it wrong though, as it's not based on <stdatomic.h>, and it's clearly
documented, so *shrug*.

3. As for why we have a function that <stdatomic.h> doesn't, I
speculate that it might have been intended for implementing this exact
patch, ie wanting to perform that relaxed load while spinning as
recommended by Intel. (If we strictly had to use <stdatomic.h>
functions, we couldn't use atomic_flag due to the lack of a relaxed
load operation on that type, so we'd probably have to use atomic_char
instead. Perhaps one day we will cross that bridge.)

4. Another reason would be that you need it to implement
SpinLockFree() and S_LOCK_FREE(). They don't seem to have had any
real callers since the beginning of open source PostgreSQL!, except
for a test of limited value in a new world without ports developing
their own spinlock code. Let's remove them! I see this was already
threatened by Andres in 3b37a6de.

Archeological notes: I went back further and found that POSTGRES 4.2
used them only twice for assertions. These S_LOCK() etc interfaces
seem to derive from Dynix's parallel programming library, but it
didn't have S_LOCK_FREE() either. It looks like the Berkeley guys
added _FREE() for *internal* use when dealing with PA-RISC, where free
spinlocks were non-zero, but we later developed a different way of
dealing with that.

Attachments:

0001-Use-atomics-API-to-implement-spinlocks.patchtext/x-patch; charset=US-ASCII; name=0001-Use-atomics-API-to-implement-spinlocks.patchDownload

From a5378bc1c54e4ebe726f1b43b810734c55121a0f Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Wed, 31 Jul 2024 13:11:35 +1200
Subject: [PATCH 1/2] Use atomics API to implement spinlocks.

Since our spinlock API pre-dates our C11-style atomics API by decades,
it had its own hand-crafted operations written in assembler.  Use the
atomics API instead, to simplify and de-duplicate.  We couldn't have
done this earlier, because that'd be circular: atomics were simulated
with spinlocks in --disable-atomics builds.  Commit 81385261 removed
that option, so now we can delete most the system-specific spinlock code
and just redirect everything to pg_atomic_flag.

The main special knowledge embodied in the hand-crafted code was the
relaxed load of the lock value before attempting to test-and-set, while
spinning.  That is retained in simplified form in the new coding.
---
 configure                              |  22 -
 configure.ac                           |  19 -
 src/Makefile.global.in                 |   3 -
 src/backend/port/Makefile              |  12 -
 src/backend/port/meson.build           |   2 +-
 src/backend/port/tas/dummy.s           |   0
 src/backend/port/tas/sunstudio_sparc.s |  53 --
 src/backend/port/tas/sunstudio_x86.s   |  43 --
 src/backend/storage/lmgr/s_lock.c      | 126 +----
 src/include/storage/s_lock.h           | 671 +------------------------
 src/template/linux                     |  15 -
 src/template/solaris                   |  15 -
 src/test/regress/regress.c             |  18 -
 13 files changed, 33 insertions(+), 966 deletions(-)
 delete mode 100644 src/backend/port/tas/dummy.s
 delete mode 100644 src/backend/port/tas/sunstudio_sparc.s
 delete mode 100644 src/backend/port/tas/sunstudio_x86.s

diff --git a/configure b/configure
index 8f684f7945e..e2267837b7d 100755
--- a/configure
+++ b/configure
@@ -731,7 +731,6 @@ PKG_CONFIG_LIBDIR
 PKG_CONFIG_PATH
 PKG_CONFIG
 DLSUFFIX
-TAS
 GCC
 CPP
 CFLAGS_SL
@@ -3021,12 +3020,6 @@ $as_echo "$template" >&6; }
 PORTNAME=$template
 
 
-# Initialize default assumption that we do not need separate assembly code
-# for TAS (test-and-set).  This can be overridden by the template file
-# when it's executed.
-need_tas=no
-tas_file=dummy.s
-
 # Default, works for most platforms, override in template file if needed
 DLSUFFIX=".so"
 
@@ -7770,20 +7763,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-#
-# Set up TAS assembly code if needed; the template file has now had its
-# chance to request this.
-#
-ac_config_links="$ac_config_links src/backend/port/tas.s:src/backend/port/tas/${tas_file}"
-
-
-if test "$need_tas" = yes ; then
-  TAS=tas.o
-else
-  TAS=""
-fi
-
-
 
 cat >>confdefs.h <<_ACEOF
 #define DLSUFFIX "$DLSUFFIX"
@@ -19924,7 +19903,6 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 for ac_config_target in $ac_config_targets
 do
   case $ac_config_target in
-    "src/backend/port/tas.s") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/tas.s:src/backend/port/tas/${tas_file}" ;;
     "GNUmakefile") CONFIG_FILES="$CONFIG_FILES GNUmakefile" ;;
     "src/Makefile.global") CONFIG_FILES="$CONFIG_FILES src/Makefile.global" ;;
     "src/backend/port/pg_sema.c") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/pg_sema.c:${SEMA_IMPLEMENTATION}" ;;
diff --git a/configure.ac b/configure.ac
index 75b73532fe0..59c3b7e3d35 100644
--- a/configure.ac
+++ b/configure.ac
@@ -95,12 +95,6 @@ AC_MSG_RESULT([$template])
 PORTNAME=$template
 AC_SUBST(PORTNAME)
 
-# Initialize default assumption that we do not need separate assembly code
-# for TAS (test-and-set).  This can be overridden by the template file
-# when it's executed.
-need_tas=no
-tas_file=dummy.s
-
 # Default, works for most platforms, override in template file if needed
 DLSUFFIX=".so"
 
@@ -740,19 +734,6 @@ AC_PROG_CPP
 AC_SUBST(GCC)
 
 
-#
-# Set up TAS assembly code if needed; the template file has now had its
-# chance to request this.
-#
-AC_CONFIG_LINKS([src/backend/port/tas.s:src/backend/port/tas/${tas_file}])
-
-if test "$need_tas" = yes ; then
-  TAS=tas.o
-else
-  TAS=""
-fi
-AC_SUBST(TAS)
-
 AC_SUBST(DLSUFFIX)dnl
 AC_DEFINE_UNQUOTED([DLSUFFIX], ["$DLSUFFIX"],
                    [Define to the file name extension of dynamically-loadable modules.])
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 83b91fe9167..0301f463027 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -771,9 +771,6 @@ ifeq ($(PORTNAME),win32)
 LIBS += -lws2_32
 endif
 
-# Not really standard libc functions, used by the backend.
-TAS         = @TAS@
-
 
 ##########################################################################
 #
diff --git a/src/backend/port/Makefile b/src/backend/port/Makefile
index 47338d99229..8613ac01aff 100644
--- a/src/backend/port/Makefile
+++ b/src/backend/port/Makefile
@@ -22,7 +22,6 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = \
-	$(TAS) \
 	atomics.o \
 	pg_sema.o \
 	pg_shmem.o
@@ -33,16 +32,5 @@ endif
 
 include $(top_srcdir)/src/backend/common.mk
 
-tas.o: tas.s
-ifeq ($(SUN_STUDIO_CC), yes)
-# preprocess assembler file with cpp
-	$(CC) $(CFLAGS) -c -P $<
-	mv $*.i $*_cpp.s
-	$(CC) $(CFLAGS) -c $*_cpp.s -o $@
-else
-	$(CC) $(CFLAGS) -c $<
-endif
-
 clean:
-	rm -f tas_cpp.s
 	$(MAKE) -C win32 clean
diff --git a/src/backend/port/meson.build b/src/backend/port/meson.build
index 7820e86016d..3270ffb7030 100644
--- a/src/backend/port/meson.build
+++ b/src/backend/port/meson.build
@@ -30,4 +30,4 @@ if host_system == 'windows'
 endif
 
 # autoconf generates the file there, ensure we get a conflict
-generated_sources_ac += {'src/backend/port': ['pg_sema.c', 'pg_shmem.c', 'tas.s']}
+generated_sources_ac += {'src/backend/port': ['pg_sema.c', 'pg_shmem.c']}
diff --git a/src/backend/port/tas/dummy.s b/src/backend/port/tas/dummy.s
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/backend/port/tas/sunstudio_sparc.s b/src/backend/port/tas/sunstudio_sparc.s
deleted file mode 100644
index 3400713afd5..00000000000
--- a/src/backend/port/tas/sunstudio_sparc.s
+++ /dev/null
@@ -1,53 +0,0 @@
-!-------------------------------------------------------------------------
-!
-! sunstudio_sparc.s
-!	  compare and swap for Sun Studio on Sparc
-!
-! Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
-! Portions Copyright (c) 1994, Regents of the University of California
-!
-! IDENTIFICATION
-!	  src/backend/port/tas/sunstudio_sparc.s
-!
-!-------------------------------------------------------------------------
-
-! Fortunately the Sun compiler can process cpp conditionals with -P
-
-! '/' is the comment for x86, while '!' is the comment for Sparc
-
-#if defined(__sparcv9) || defined(__sparc)
-
-	.section        ".text"
-	.align  8
-	.skip   24
-	.align  4
-
-	.global pg_atomic_cas
-pg_atomic_cas:
-
-	! "cas" only works on sparcv9 and sparcv8plus chips, and
-	! requires a compiler targeting these CPUs.  It will fail
-	! on a compiler targeting sparcv8, and of course will not
-	! be understood by a sparcv8 CPU.  gcc continues to use
-	! "ldstub" because it targets sparcv7.
-	!
-	! There is actually a trick for embedding "cas" in a
-	! sparcv8-targeted compiler, but it can only be run
-	! on a sparcv8plus/v9 cpus:
-	!
-	!   http://cvs.opensolaris.org/source/xref/on/usr/src/lib/libc/sparc/threads/sparc.il
-	!
-	! NB: We're assuming we're running on a TSO system here - solaris
-	! userland luckily always has done so.
-
-#if defined(__sparcv9) || defined(__sparcv8plus)
-	cas     [%o0],%o2,%o1
-#else
-	ldstub [%o0],%o1
-#endif
-	mov     %o1,%o0
-	retl
-	nop
-	.type   pg_atomic_cas,2
-	.size   pg_atomic_cas,(.-pg_atomic_cas)
-#endif
diff --git a/src/backend/port/tas/sunstudio_x86.s b/src/backend/port/tas/sunstudio_x86.s
deleted file mode 100644
index b4608a9ceb2..00000000000
--- a/src/backend/port/tas/sunstudio_x86.s
+++ /dev/null
@@ -1,43 +0,0 @@
-/-------------------------------------------------------------------------
-/
-/ sunstudio_x86.s
-/	  compare and swap for Sun Studio on x86
-/
-/ Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
-/ Portions Copyright (c) 1994, Regents of the University of California
-/
-/ IDENTIFICATION
-/	  src/backend/port/tas/sunstudio_x86.s
-/
-/-------------------------------------------------------------------------
-
-/ Fortunately the Sun compiler can process cpp conditionals with -P
-
-/ '/' is the comment for x86, while '!' is the comment for Sparc
-
-	.file   "tas.s"
-
-#if defined(__amd64)
-	.code64
-#endif
-
-	.globl pg_atomic_cas
-	.type pg_atomic_cas, @function
-
-	.section .text, "ax"
-	.align 16
-
-pg_atomic_cas:
-#if defined(__amd64)
-	movl       %edx,%eax
-	lock
-	cmpxchgl   %esi,(%rdi)
-#else
-	movl    4(%esp), %edx
-	movl    8(%esp), %ecx
-	movl    12(%esp), %eax
-	lock
-	cmpxchgl %ecx, (%edx)
-#endif
-	ret
-	.size pg_atomic_cas, . - pg_atomic_cas
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index 69549a65dba..4acc944b1de 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -93,7 +93,7 @@ s_lock_stuck(const char *file, int line, const char *func)
 }
 
 /*
- * s_lock(lock) - platform-independent portion of waiting for a spinlock.
+ * s_lock(lock) - out-of-line portion of waiting for a spinlock.
  */
 int
 s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
@@ -102,8 +102,27 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 
 	init_spin_delay(&delayStatus, file, line, func);
 
-	while (TAS_SPIN(lock))
+	for (;;)
 	{
+		bool		probably_free = true;
+
+#if defined(__i386__) || defined(__x86_64__) || \
+	defined(_M_IX86) || defined(_M_AMD64) || \
+	defined(__ppc__) || defined(__powerpc__) || \
+	defined(__ppc64__) || defined(__powerpc64__) \
+
+
+		/*
+		 * On these architectures, it is known to be more efficient to test
+		 * the lock with a relaxed load first, while spinning.
+		 */
+		probably_free = pg_atomic_unlocked_test_flag(lock);
+#endif
+
+		/* Try to get the lock. */
+		if (probably_free && pg_atomic_test_set_flag(lock))
+			break;
+
 		perform_spin_delay(&delayStatus);
 	}
 
@@ -112,14 +131,6 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 	return delayStatus.delays;
 }
 
-#ifdef USE_DEFAULT_S_UNLOCK
-void
-s_unlock(volatile slock_t *lock)
-{
-	*lock = 0;
-}
-#endif
-
 /*
  * Wait while spinning on a contended spinlock.
  */
@@ -127,7 +138,7 @@ void
 perform_spin_delay(SpinDelayStatus *status)
 {
 	/* CPU-specific delay each time through the loop */
-	SPIN_DELAY();
+	pg_spin_delay();
 
 	/* Block the process every spins_per_delay tries */
 	if (++(status->spins) >= spins_per_delay)
@@ -230,96 +241,3 @@ update_spins_per_delay(int shared_spins_per_delay)
 	 */
 	return (shared_spins_per_delay * 15 + spins_per_delay) / 16;
 }
-
-
-/*****************************************************************************/
-#if defined(S_LOCK_TEST)
-
-/*
- * test program for verifying a port's spinlock support.
- */
-
-struct test_lock_struct
-{
-	char		pad1;
-	slock_t		lock;
-	char		pad2;
-};
-
-volatile struct test_lock_struct test_lock;
-
-int
-main()
-{
-	pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
-
-	test_lock.pad1 = test_lock.pad2 = 0x44;
-
-	S_INIT_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (!S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not initialized\n");
-		return 1;
-	}
-
-	S_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not locked\n");
-		return 1;
-	}
-
-	S_UNLOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (!S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not unlocked\n");
-		return 1;
-	}
-
-	S_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not re-locked\n");
-		return 1;
-	}
-
-	printf("S_LOCK_TEST: this will print %d stars and then\n", NUM_DELAYS);
-	printf("             exit with a 'stuck spinlock' message\n");
-	printf("             if S_LOCK() and TAS() are working.\n");
-	fflush(stdout);
-
-	s_lock(&test_lock.lock, __FILE__, __LINE__, __func__);
-
-	printf("S_LOCK_TEST: failed, lock not locked\n");
-	return 1;
-}
-
-#endif							/* S_LOCK_TEST */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index e94ed5f48bd..40e2d62ef38 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -6,8 +6,9 @@
  *	NOTE: none of the macros in this file are intended to be called directly.
  *	Call them through the macros in spin.h.
  *
- *	The following hardware-dependent macros must be provided for each
- *	supported platform:
+ *	In Berkeley POSTGRES, these began as hand-crafted emulations of system
+ *	interfaces from the Sequent Dynix operating system, but now map to our
+ *	C11-style atomics API.
  *
  *	void S_INIT_LOCK(slock_t *lock)
  *		Initialize a spinlock (to the unlocked state).
@@ -28,56 +29,6 @@
  *	void SPIN_DELAY(void)
  *		Delay operation to occur inside spinlock wait loop.
  *
- *	Note to implementors: there are default implementations for all these
- *	macros at the bottom of the file.  Check if your platform can use
- *	these or needs to override them.
- *
- *  Usually, S_LOCK() is implemented in terms of even lower-level macros
- *	TAS() and TAS_SPIN():
- *
- *	int TAS(slock_t *lock)
- *		Atomic test-and-set instruction.  Attempt to acquire the lock,
- *		but do *not* wait.	Returns 0 if successful, nonzero if unable
- *		to acquire the lock.
- *
- *	int TAS_SPIN(slock_t *lock)
- *		Like TAS(), but this version is used when waiting for a lock
- *		previously found to be contended.  By default, this is the
- *		same as TAS(), but on some architectures it's better to poll a
- *		contended lock using an unlocked instruction and retry the
- *		atomic test-and-set only when it appears free.
- *
- *	TAS() and TAS_SPIN() are NOT part of the API, and should never be called
- *	directly.
- *
- *	CAUTION: on some platforms TAS() and/or TAS_SPIN() may sometimes report
- *	failure to acquire a lock even when the lock is not locked.  For example,
- *	on Alpha TAS() will "fail" if interrupted.  Therefore a retry loop must
- *	always be used, even if you are certain the lock is free.
- *
- *	It is the responsibility of these macros to make sure that the compiler
- *	does not re-order accesses to shared memory to precede the actual lock
- *	acquisition, or follow the lock release.  Prior to PostgreSQL 9.5, this
- *	was the caller's responsibility, which meant that callers had to use
- *	volatile-qualified pointers to refer to both the spinlock itself and the
- *	shared data being accessed within the spinlocked critical section.  This
- *	was notationally awkward, easy to forget (and thus error-prone), and
- *	prevented some useful compiler optimizations.  For these reasons, we
- *	now require that the macros themselves prevent compiler re-ordering,
- *	so that the caller doesn't need to take special precautions.
- *
- *	On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and
- *	S_UNLOCK() macros must further include hardware-level memory fence
- *	instructions to prevent similar re-ordering at the hardware level.
- *	TAS() and TAS_SPIN() must guarantee that loads and stores issued after
- *	the macro are not executed until the lock has been obtained.  Conversely,
- *	S_UNLOCK() must guarantee that loads and stores issued before the macro
- *	have been executed before the lock is released.
- *
- *	On most supported platforms, TAS() uses a tas() function written
- *	in assembly language to execute a hardware atomic-test-and-set
- *	instruction.  Equivalent OS-supplied mutex routines could be used too.
- *
  *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -93,617 +44,15 @@
 #error "s_lock.h may not be included from frontend code"
 #endif
 
-#if defined(__GNUC__) || defined(__INTEL_COMPILER)
-/*************************************************************************
- * All the gcc inlines
- * Gcc consistently defines the CPU as __cpu__.
- * Other compilers use __cpu or __cpu__ so we test for both in those cases.
- */
-
-/*----------
- * Standard gcc asm format (assuming "volatile slock_t *lock"):
-
-	__asm__ __volatile__(
-		"	instruction	\n"
-		"	instruction	\n"
-		"	instruction	\n"
-:		"=r"(_res), "+m"(*lock)		// return register, in/out lock value
-:		"r"(lock)					// lock pointer, in input register
-:		"memory", "cc");			// show clobbered registers here
-
- * The output-operands list (after first colon) should always include
- * "+m"(*lock), whether or not the asm code actually refers to this
- * operand directly.  This ensures that gcc believes the value in the
- * lock variable is used and set by the asm code.  Also, the clobbers
- * list (after third colon) should always include "memory"; this prevents
- * gcc from thinking it can cache the values of shared-memory fields
- * across the asm code.  Add "cc" if your asm code changes the condition
- * code register, and also list any temp registers the code uses.
- *----------
- */
-
-
-#ifdef __i386__		/* 32-bit i386 */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res = 1;
-
-	/*
-	 * Use a non-locking test before asserting the bus lock.  Note that the
-	 * extra test appears to be a small loss on some x86 platforms and a small
-	 * win on others; it's by no means clear that we should keep it.
-	 *
-	 * When this was last tested, we didn't have separate TAS() and TAS_SPIN()
-	 * macros.  Nowadays it probably would be better to do a non-locking test
-	 * in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
-	 * testing to verify that.  Without some empirical evidence, better to
-	 * leave it alone.
-	 */
-	__asm__ __volatile__(
-		"	cmpb	$0,%1	\n"
-		"	jne		1f		\n"
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-		"1: \n"
-:		"+q"(_res), "+m"(*lock)
-:		/* no inputs */
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * This sequence is equivalent to the PAUSE instruction ("rep" is
-	 * ignored by old IA32 processors if the following instruction is
-	 * not a string operation); the IA-32 Architecture Software
-	 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
-	 * PAUSE in the inner loop of a spin lock is necessary for good
-	 * performance:
-	 *
-	 *     The PAUSE instruction improves the performance of IA-32
-	 *     processors supporting Hyper-Threading Technology when
-	 *     executing spin-wait loops and other routines where one
-	 *     thread is accessing a shared lock or semaphore in a tight
-	 *     polling loop. When executing a spin-wait loop, the
-	 *     processor can suffer a severe performance penalty when
-	 *     exiting the loop because it detects a possible memory order
-	 *     violation and flushes the core processor's pipeline. The
-	 *     PAUSE instruction provides a hint to the processor that the
-	 *     code sequence is a spin-wait loop. The processor uses this
-	 *     hint to avoid the memory order violation and prevent the
-	 *     pipeline flush. In addition, the PAUSE instruction
-	 *     de-pipelines the spin-wait loop to prevent it from
-	 *     consuming execution resources excessively.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __i386__ */
-
-
-#ifdef __x86_64__		/* AMD Opteron, Intel EM64T */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-/*
- * On Intel EM64T, it's a win to use a non-locking test before the xchg proper,
- * but only when spinning.
- *
- * See also Implementing Scalable Atomic Locks for Multi-Core Intel(tm) EM64T
- * and IA32, by Michael Chynoweth and Mary R. Lee. As of this writing, it is
- * available at:
- * http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
- */
-#define TAS_SPIN(lock)    (*(lock) ? 1 : TAS(lock))
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res = 1;
-
-	__asm__ __volatile__(
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-:		"+q"(_res), "+m"(*lock)
-:		/* no inputs */
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
-	 * Opteron, but it may be of some use on EM64T, so we keep it.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __x86_64__ */
-
-
-/*
- * On ARM and ARM64, we use __sync_lock_test_and_set(int *, int) if available.
- *
- * We use the int-width variant of the builtin because it works on more chips
- * than other widths.
- */
-#if defined(__arm__) || defined(__arm) || defined(__aarch64__)
-#ifdef HAVE_GCC__SYNC_INT32_TAS
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-/*
- * Using an ISB instruction to delay in spinlock loops appears beneficial on
- * high-core-count ARM64 processors.  It seems mostly a wash for smaller gear,
- * and ISB doesn't exist at all on pre-v7 ARM chips.
- */
-#if defined(__aarch64__)
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	__asm__ __volatile__(
-		" isb;				\n");
-}
-
-#endif	 /* __aarch64__ */
-#endif	 /* HAVE_GCC__SYNC_INT32_TAS */
-#endif	 /* __arm__ || __arm || __aarch64__ */
-
-
-/* S/390 and S/390x Linux (32- and 64-bit zSeries) */
-#if defined(__s390__) || defined(__s390x__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock)	   tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	int			_res = 0;
-
-	__asm__	__volatile__(
-		"	cs 	%0,%3,0(%2)		\n"
-:		"+d"(_res), "+m"(*lock)
-:		"a"(lock), "d"(1)
-:		"memory", "cc");
-	return _res;
-}
-
-#endif	 /* __s390__ || __s390x__ */
-
-
-#if defined(__sparc__)		/* Sparc */
-/*
- * Solaris has always run sparc processors in TSO (total store) mode, but
- * linux didn't use to and the *BSDs still don't. So, be careful about
- * acquire/release semantics. The CPU will treat superfluous members as
- * NOPs, so it's just code space.
- */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res;
-
-	/*
-	 *	See comment in src/backend/port/tas/sunstudio_sparc.s for why this
-	 *	uses "ldstub", and that file uses "cas".  gcc currently generates
-	 *	sparcv7-targeted binaries, so "cas" use isn't possible.
-	 */
-	__asm__ __volatile__(
-		"	ldstub	[%2], %0	\n"
-:		"=r"(_res), "+m"(*lock)
-:		"r"(lock)
-:		"memory");
-#if defined(__sparcv7) || defined(__sparc_v7__)
-	/*
-	 * No stbar or membar available, luckily no actually produced hardware
-	 * requires a barrier.
-	 */
-#elif defined(__sparcv8) || defined(__sparc_v8__)
-	/* stbar is available (and required for both PSO, RMO), membar isn't */
-	__asm__ __volatile__ ("stbar	 \n":::"memory");
-#else
-	/*
-	 * #LoadStore (RMO) | #LoadLoad (RMO) together are the appropriate acquire
-	 * barrier for sparcv8+ upwards.
-	 */
-	__asm__ __volatile__ ("membar #LoadStore | #LoadLoad \n":::"memory");
-#endif
-	return (int) _res;
-}
-
-#if defined(__sparcv7) || defined(__sparc_v7__)
-/*
- * No stbar or membar available, luckily no actually produced hardware
- * requires a barrier.  We fall through to the default gcc definition of
- * S_UNLOCK in this case.
- */
-#elif defined(__sparcv8) || defined(__sparc_v8__)
-/* stbar is available (and required for both PSO, RMO), membar isn't */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("stbar	 \n":::"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#else
-/*
- * #LoadStore (RMO) | #StoreStore (RMO, PSO) together are the appropriate
- * release barrier for sparcv8+ upwards.
- */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("membar #LoadStore | #StoreStore \n":::"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#endif
-
-#endif	 /* __sparc__ */
-
-
-/* PowerPC */
-#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/* On PPC, it's a win to use a non-locking test before the lwarx */
-#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
-
-/*
- * The second operand of addi can hold a constant zero or a register number,
- * hence constraint "=&b" to avoid allocating r0.  "b" stands for "address
- * base register"; most operands having this register-or-zero property are
- * address bases, e.g. the second operand of lwax.
- *
- * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
- * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
- * But if the spinlock is in ordinary memory, we can use lwsync instead for
- * better performance.
- */
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t _t;
-	int _res;
-
-	__asm__ __volatile__(
-"	lwarx   %0,0,%3,1	\n"
-"	cmpwi   %0,0		\n"
-"	bne     1f			\n"
-"	addi    %0,%0,1		\n"
-"	stwcx.  %0,0,%3		\n"
-"	beq     2f			\n"
-"1: \n"
-"	li      %1,1		\n"
-"	b       3f			\n"
-"2: \n"
-"	lwsync				\n"
-"	li      %1,0		\n"
-"3: \n"
-:	"=&b"(_t), "=r"(_res), "+m"(*lock)
-:	"r"(lock)
-:	"memory", "cc");
-	return _res;
-}
-
-/*
- * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction.
- * But we can use lwsync instead for better performance.
- */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("	lwsync \n" ::: "memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* powerpc */
-
-
-#if defined(__mips__) && !defined(__sgi)	/* non-SGI MIPS */
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/*
- * Original MIPS-I processors lacked the LL/SC instructions, but if we are
- * so unfortunate as to be running on one of those, we expect that the kernel
- * will handle the illegal-instruction traps and emulate them for us.  On
- * anything newer (and really, MIPS-I is extinct) LL/SC is the only sane
- * choice because any other synchronization method must involve a kernel
- * call.  Unfortunately, many toolchains still default to MIPS-I as the
- * codegen target; if the symbol __mips shows that that's the case, we
- * have to force the assembler to accept LL/SC.
- *
- * R10000 and up processors require a separate SYNC, which has the same
- * issues as LL/SC.
- */
-#if __mips < 2
-#define MIPS_SET_MIPS2	"       .set mips2          \n"
-#else
-#define MIPS_SET_MIPS2
-#endif
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	volatile slock_t *_l = lock;
-	int			_res;
-	int			_tmp;
-
-	__asm__ __volatile__(
-		"       .set push           \n"
-		MIPS_SET_MIPS2
-		"       .set noreorder      \n"
-		"       .set nomacro        \n"
-		"       ll      %0, %2      \n"
-		"       or      %1, %0, 1   \n"
-		"       sc      %1, %2      \n"
-		"       xori    %1, 1       \n"
-		"       or      %0, %0, %1  \n"
-		"       sync                \n"
-		"       .set pop              "
-:		"=&r" (_res), "=&r" (_tmp), "+R" (*_l)
-:		/* no inputs */
-:		"memory");
-	return _res;
-}
-
-/* MIPS S_UNLOCK is almost standard but requires a "sync" instruction */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__( \
-		"       .set push           \n" \
-		MIPS_SET_MIPS2 \
-		"       .set noreorder      \n" \
-		"       .set nomacro        \n" \
-		"       sync                \n" \
-		"       .set pop              " \
-:		/* no outputs */ \
-:		/* no inputs */	\
-:		"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* __mips__ && !__sgi */
-
-
-
-/*
- * If we have no platform-specific knowledge, but we found that the compiler
- * provides __sync_lock_test_and_set(), use that.  Prefer the int-width
- * version over the char-width version if we have both, on the rather dubious
- * grounds that that's known to be more likely to work in the ARM ecosystem.
- * (But we dealt with ARM above.)
- */
-#if !defined(HAS_TEST_AND_SET)
-
-#if defined(HAVE_GCC__SYNC_INT32_TAS)
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#elif defined(HAVE_GCC__SYNC_CHAR_TAS)
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef char slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#endif	 /* HAVE_GCC__SYNC_INT32_TAS */
-
-#endif	/* !defined(HAS_TEST_AND_SET) */
-
-
-/*
- * Default implementation of S_UNLOCK() for gcc/icc.
- *
- * Note that this implementation is unsafe for any platform that can reorder
- * a memory access (either load or store) after a following store.  That
- * happens not to be possible on x86 and most legacy architectures (some are
- * single-processor!), but many modern systems have weaker memory ordering.
- * Those that do must define their own version of S_UNLOCK() rather than
- * relying on this one.
- */
-#if !defined(S_UNLOCK)
-#define S_UNLOCK(lock)	\
-	do { __asm__ __volatile__("" : : : "memory");  *(lock) = 0; } while (0)
-#endif
-
-#endif	/* defined(__GNUC__) || defined(__INTEL_COMPILER) */
-
-
-/*
- * ---------------------------------------------------------------------
- * Platforms that use non-gcc inline assembly:
- * ---------------------------------------------------------------------
- */
-
-#if !defined(HAS_TEST_AND_SET)	/* We didn't trigger above, let's try here */
-
-/* These are in sunstudio_(sparc|x86).s */
-
-#if defined(__SUNPRO_C) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc))
-#define HAS_TEST_AND_SET
-
-#if defined(__i386) || defined(__x86_64__) || defined(__sparcv9) || defined(__sparcv8plus)
-typedef unsigned int slock_t;
-#else
-typedef unsigned char slock_t;
-#endif
-
-extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with,
-									  slock_t cmp);
-
-#define TAS(a) (pg_atomic_cas((a), 1, 0) != 0)
-#endif
-
-
-#ifdef _MSC_VER
-typedef LONG slock_t;
-
-#define HAS_TEST_AND_SET
-#define TAS(lock) (InterlockedCompareExchange(lock, 1, 0))
-
-#define SPIN_DELAY() spin_delay()
-
-/* If using Visual C++ on Win64, inline assembly is unavailable.
- * Use a _mm_pause intrinsic instead of rep nop.
- */
-#if defined(_WIN64)
-static __forceinline void
-spin_delay(void)
-{
-	_mm_pause();
-}
-#else
-static __forceinline void
-spin_delay(void)
-{
-	/* See comment for gcc code. Same code, MASM syntax */
-	__asm rep nop;
-}
-#endif
-
-#include <intrin.h>
-#pragma intrinsic(_ReadWriteBarrier)
-
-#define S_UNLOCK(lock)	\
-	do { _ReadWriteBarrier(); (*(lock)) = 0; } while (0)
-
-#endif
-
-
-#endif	/* !defined(HAS_TEST_AND_SET) */
-
-
-/* Blow up if we didn't have any way to do spinlocks */
-#ifndef HAS_TEST_AND_SET
-#error PostgreSQL does not have spinlock support on this platform.  Please report this to pgsql-bugs@lists.postgresql.org.
-#endif
-
-
-/*
- * Default Definitions - override these above as needed.
- */
-
-#if !defined(S_LOCK)
-#define S_LOCK(lock) \
-	(TAS(lock) ? s_lock((lock), __FILE__, __LINE__, __func__) : 0)
-#endif	 /* S_LOCK */
-
-#if !defined(S_LOCK_FREE)
-#define S_LOCK_FREE(lock)	(*(lock) == 0)
-#endif	 /* S_LOCK_FREE */
-
-#if !defined(S_UNLOCK)
-/*
- * Our default implementation of S_UNLOCK is essentially *(lock) = 0.  This
- * is unsafe if the platform can reorder a memory access (either load or
- * store) after a following store; platforms where this is possible must
- * define their own S_UNLOCK.  But CPU reordering is not the only concern:
- * if we simply defined S_UNLOCK() as an inline macro, the compiler might
- * reorder instructions from inside the critical section to occur after the
- * lock release.  Since the compiler probably can't know what the external
- * function s_unlock is doing, putting the same logic there should be adequate.
- * A sufficiently-smart globally optimizing compiler could break that
- * assumption, though, and the cost of a function call for every spinlock
- * release may hurt performance significantly, so we use this implementation
- * only for platforms where we don't know of a suitable intrinsic.  For the
- * most part, those are relatively obscure platform/compiler combinations to
- * which the PostgreSQL project does not have access.
- */
-#define USE_DEFAULT_S_UNLOCK
-extern void s_unlock(volatile slock_t *lock);
-#define S_UNLOCK(lock)		s_unlock(lock)
-#endif	 /* S_UNLOCK */
-
-#if !defined(S_INIT_LOCK)
-#define S_INIT_LOCK(lock)	S_UNLOCK(lock)
-#endif	 /* S_INIT_LOCK */
-
-#if !defined(SPIN_DELAY)
-#define SPIN_DELAY()	((void) 0)
-#endif	 /* SPIN_DELAY */
-
-#if !defined(TAS)
-extern int	tas(volatile slock_t *lock);		/* in port/.../tas.s, or
-												 * s_lock.c */
-
-#define TAS(lock)		tas(lock)
-#endif	 /* TAS */
+#include "port/atomics.h"
 
-#if !defined(TAS_SPIN)
-#define TAS_SPIN(lock)	TAS(lock)
-#endif	 /* TAS_SPIN */
+typedef pg_atomic_flag slock_t;
 
+#define S_INIT_LOCK(lock) pg_atomic_init_flag(lock)
+#define S_LOCK(lock) (pg_atomic_test_set_flag(lock) ? 0 : s_lock((lock), __FILE__, __LINE__, __func__))
+#define S_UNLOCK(lock) pg_atomic_clear_flag(lock)
+#define S_LOCK_FREE(lock) (pg_atomic_unlocked_test_flag(lock))
+#define SPIN_DELAY() pg_spin_delay()
 
 /*
  * Platform-independent out-of-line support routines
diff --git a/src/template/linux b/src/template/linux
index ec3302c4a22..2f04c1a6610 100644
--- a/src/template/linux
+++ b/src/template/linux
@@ -21,19 +21,4 @@ if test "$SUN_STUDIO_CC" = "yes" ; then
   if test "$enable_debug" != yes; then
     CFLAGS="$CFLAGS -O"		# any optimization breaks debug
   fi
-
-  # Pick the right test-and-set (TAS) code for the Sun compiler.
-  # We would like to use in-line assembler, but the compiler
-  # requires *.il files to be on every compile line, making
-  # the build system too fragile.
-  case $host_cpu in
-    sparc)
-	need_tas=yes
-	tas_file=sunstudio_sparc.s
-    ;;
-    i?86|x86_64)
-	need_tas=yes
-	tas_file=sunstudio_x86.s
-    ;;
-  esac
 fi
diff --git a/src/template/solaris b/src/template/solaris
index f88b1cdad37..f5306b3dd5b 100644
--- a/src/template/solaris
+++ b/src/template/solaris
@@ -13,19 +13,4 @@ if test "$SUN_STUDIO_CC" = yes ; then
   if test "$enable_debug" != yes; then
     CFLAGS="$CFLAGS -O"		# any optimization breaks debug
   fi
-
-  # Pick the right test-and-set (TAS) code for the Sun compiler.
-  # We would like to use in-line assembler, but the compiler
-  # requires *.il files to be on every compile line, making
-  # the build system too fragile.
-  case $host_cpu in
-    sparc)
-	need_tas=yes
-	tas_file=sunstudio_sparc.s
-    ;;
-    i?86|x86_64)
-	need_tas=yes
-	tas_file=sunstudio_x86.s
-    ;;
-  esac
 fi
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 14aad5a0c6e..188dd6c9d69 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -860,24 +860,6 @@ test_spinlock(void)
 		s_lock(&struct_w_lock.lock, "testfile", 17, "testfunc");
 		S_UNLOCK(&struct_w_lock.lock);
 
-		/*
-		 * Check, using TAS directly, that a single spin cycle doesn't block
-		 * when acquiring an already acquired lock.
-		 */
-#ifdef TAS
-		S_LOCK(&struct_w_lock.lock);
-
-		if (!TAS(&struct_w_lock.lock))
-			elog(ERROR, "acquired already held spinlock");
-
-#ifdef TAS_SPIN
-		if (!TAS_SPIN(&struct_w_lock.lock))
-			elog(ERROR, "acquired already held spinlock");
-#endif							/* defined(TAS_SPIN) */
-
-		S_UNLOCK(&struct_w_lock.lock);
-#endif							/* defined(TAS) */
-
 		/*
 		 * Verify that after all of this the non-lock contents are still
 		 * correct.
-- 
2.45.2

0002-Remove-SpinLockFree.patchtext/x-patch; charset=US-ASCII; name=0002-Remove-SpinLockFree.patchDownload

From 8648b1549eabd62725f0013825667fb5d00ec453 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Wed, 31 Jul 2024 17:17:36 +1200
Subject: [PATCH 2/2] Remove SpinLockFree().

This interface has been unused for a long time, except in a test that is
now gone.
---
 src/include/storage/s_lock.h | 5 -----
 src/include/storage/spin.h   | 6 ------
 2 files changed, 11 deletions(-)

diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 40e2d62ef38..b0fcfce16cf 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -22,10 +22,6 @@
  *	void S_UNLOCK(slock_t *lock)
  *		Unlock a previously acquired lock.
  *
- *	bool S_LOCK_FREE(slock_t *lock)
- *		Tests if the lock is free. Returns true if free, false if locked.
- *		This does *not* change the state of the lock.
- *
  *	void SPIN_DELAY(void)
  *		Delay operation to occur inside spinlock wait loop.
  *
@@ -51,7 +47,6 @@ typedef pg_atomic_flag slock_t;
 #define S_INIT_LOCK(lock) pg_atomic_init_flag(lock)
 #define S_LOCK(lock) (pg_atomic_test_set_flag(lock) ? 0 : s_lock((lock), __FILE__, __LINE__, __func__))
 #define S_UNLOCK(lock) pg_atomic_clear_flag(lock)
-#define S_LOCK_FREE(lock) (pg_atomic_unlocked_test_flag(lock))
 #define SPIN_DELAY() pg_spin_delay()
 
 /*
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index 3ae2a56d073..d4dea2a98ff 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -18,10 +18,6 @@
  *	void SpinLockRelease(volatile slock_t *lock)
  *		Unlock a previously acquired lock.
  *
- *	bool SpinLockFree(slock_t *lock)
- *		Tests if the lock is free. Returns true if free, false if locked.
- *		This does *not* change the state of the lock.
- *
  *	Callers must beware that the macro argument may be evaluated multiple
  *	times!
  *
@@ -60,6 +56,4 @@
 
 #define SpinLockRelease(lock) S_UNLOCK(lock)
 
-#define SpinLockFree(lock)	S_LOCK_FREE(lock)
-
 #endif							/* SPIN_H */
-- 
2.45.2

#28

Heikki Linnakangas

hlinnaka@iki.fi

over 1 year ago

In reply to: Thomas Munro (#27)

Re: Remove last traces of HPPA support

On 31/07/2024 08:52, Thomas Munro wrote:

On Tue, Jul 30, 2024 at 9:50 AM Thomas Munro <thomas.munro@gmail.com> wrote:

I guess we should also consider reimplementing the spinlock on the
atomic API, but I can see that Andres is poking at spinlock code right
now so I'll keep out of his way...

Here is a first attempt at that.

Looks good, thanks!

I haven't compared the generated asm yet, but it seems to work OK.

The old __i386__ implementation of TAS() said:

* When this was last tested, we didn't have separate TAS() and TAS_SPIN()
* macros. Nowadays it probably would be better to do a non-locking test
* in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
* testing to verify that. Without some empirical evidence, better to
* leave it alone.

It seems that you did what the comment suggested. That seems fine. For
sake of completeness, if someone has an i386 machine lying around, it
would be nice to verify that. Or an official CPU manufacturer's
implementation guide, or references to other implementations or something.

2. The pg_atomic_unlocked_test_flag() function was surprising to me:
it returns true if it's not currently set (according to a relaxed
load). Most of this patch was easy, but figuring out that I had
reverse polarity here was a multi-coffee operation :-) I can't call
it wrong though, as it's not based on <stdatomic.h>, and it's clearly
documented, so *shrug*.

Huh, yeah that's unexpected.

3. As for why we have a function that <stdatomic.h> doesn't, I
speculate that it might have been intended for implementing this exact
patch, ie wanting to perform that relaxed load while spinning as
recommended by Intel. (If we strictly had to use <stdatomic.h>
functions, we couldn't use atomic_flag due to the lack of a relaxed
load operation on that type, so we'd probably have to use atomic_char
instead. Perhaps one day we will cross that bridge.)

As a side note, I remember when I've tried to use pg_atomic_flag in the
past, I wanted to do an atomic compare-and-exchange on it, to clear the
value and return the old value. Surprisingly, there's no function to do
that. There's pg_atomic_test_set_flag(), but no
pg_atomic_test_clear_flag(). C11 has both "atomic_flag" and
"atomic_bool", and I guess what I actually wanted was atomic_bool.

- * On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and
- * S_UNLOCK() macros must further include hardware-level memory fence
- * instructions to prevent similar re-ordering at the hardware level.
- * TAS() and TAS_SPIN() must guarantee that loads and stores issued after
- * the macro are not executed until the lock has been obtained. Conversely,
- * S_UNLOCK() must guarantee that loads and stores issued before the macro
- * have been executed before the lock is released.

That old comment means that both SpinLockAcquire() and SpinLockRelease()
acted as full memory barriers, and looking at the implementations, that
was indeed so. With the new implementation, SpinLockAcquire() will have
"acquire semantics" and SpinLockRelease will have "release semantics".
That's very sensible, and I don't believe it will break anything, but
it's a change in semantics nevertheless.

--
Heikki Linnakangas
Neon (https://neon.tech)

#29

Thomas Munro

thomas.munro@gmail.com

over 1 year ago

In reply to: Heikki Linnakangas (#28)

Re: Remove last traces of HPPA support

On Wed, Jul 31, 2024 at 8:47 PM Heikki Linnakangas <hlinnaka@iki.fi> wrote:

On 31/07/2024 08:52, Thomas Munro wrote:
The old __i386__ implementation of TAS() said:

* When this was last tested, we didn't have separate TAS() and TAS_SPIN()
* macros. Nowadays it probably would be better to do a non-locking test
* in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
* testing to verify that. Without some empirical evidence, better to
* leave it alone.

It seems that you did what the comment suggested. That seems fine. For
sake of completeness, if someone has an i386 machine lying around, it
would be nice to verify that. Or an official CPU manufacturer's
implementation guide, or references to other implementations or something.

Hmm, the last "real" 32 bit CPU is from ~20 years ago. Now the only
32 bit x86 systems we should nominally care about are modern CPUs that
can also run 32 bit instruction; is there a reason to think they'd
behave differently at this level? Looking at the current Intel
optimisation guide's discussion of spinlock implementation at page
2-34 of [1]https://www.intel.com/content/www/us/en/content-details/671488/intel-64-and-ia-32-architectures-optimization-reference-manual-volume-1.html, it doesn't distinguish between 32 and 64, and it has that
double-check thing.

- * On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and
- * S_UNLOCK() macros must further include hardware-level memory fence
- * instructions to prevent similar re-ordering at the hardware level.
- * TAS() and TAS_SPIN() must guarantee that loads and stores issued after
- * the macro are not executed until the lock has been obtained. Conversely,
- * S_UNLOCK() must guarantee that loads and stores issued before the macro
- * have been executed before the lock is released.

That old comment means that both SpinLockAcquire() and SpinLockRelease()
acted as full memory barriers, and looking at the implementations, that
was indeed so. With the new implementation, SpinLockAcquire() will have
"acquire semantics" and SpinLockRelease will have "release semantics".
That's very sensible, and I don't believe it will break anything, but
it's a change in semantics nevertheless.

Yeah. It's interesting that our pg_atomic_clear_flag(f) is like
standard atomic_flag_clear_explicit(f, memory_order_release), not like
atomic_flag_clear(f) which is short for atomic_flag_clear_explicit(f,
memory_order_seq_cst). Example spinlock code I've seen written in
modern C or C++ therefore uses the _explicit variants, so it can get
acquire/release, which is what people usually want from a lock-like
thing. What's a good way to test the performance in PostgreSQL? In a
naive loop that just test-and-sets and clears a flag a billion times
in a loop and does nothing else, I see 20-40% performance increase
depending on architecture when comparing _seq_cst with
_acquire/_release. You're right that this semantic change deserves
explicit highlighting, in comments somewhere... I wonder if we have
anywhere that is counting on the stronger barrier...

[1]: https://www.intel.com/content/www/us/en/content-details/671488/intel-64-and-ia-32-architectures-optimization-reference-manual-volume-1.html

#30

Andres Freund

andres@anarazel.de

over 1 year ago

In reply to: Thomas Munro (#27)

Re: Remove last traces of HPPA support

Hi,

On 2024-07-31 17:52:34 +1200, Thomas Munro wrote:

2. The pg_atomic_unlocked_test_flag() function was surprising to me:
it returns true if it's not currently set (according to a relaxed
load). Most of this patch was easy, but figuring out that I had
reverse polarity here was a multi-coffee operation :-) I can't call
it wrong though, as it's not based on <stdatomic.h>, and it's clearly
documented, so *shrug*.

I have no idea why I did it that way round. This was a long time ago...

4. Another reason would be that you need it to implement
SpinLockFree() and S_LOCK_FREE(). They don't seem to have had any
real callers since the beginning of open source PostgreSQL!, except
for a test of limited value in a new world without ports developing
their own spinlock code. Let's remove them! I see this was already
threatened by Andres in 3b37a6de.

Note that I would like to add a user for S_LOCK_FREE(), to detect repeated
SpinLockRelease():
/messages/by-id/20240729182952.hua325647e2ggbsy@awork3.anarazel.de

Greetings,

Andres Freund

#31

Andres Freund

andres@anarazel.de

over 1 year ago

In reply to: Thomas Munro (#26)

Re: Remove last traces of HPPA support

Hi,

On 2024-07-30 23:08:36 +1200, Thomas Munro wrote:

On Tue, Jul 30, 2024 at 12:39 PM Thomas Munro <thomas.munro@gmail.com> wrote:

On Tue, Jul 30, 2024 at 11:16 AM Heikki Linnakangas <hlinnaka@iki.fi> wrote:

Looks good to me.

Thanks. I'll wait just a bit longer to see if anyone else has comments.

And pushed.

Yay!

I am aware of a couple of build farm animals that will now fail
because they deliberately test --disable-spinlocks: francolin and
rorqual, which will need adjustment or retirement on master. I'll
watch out for other surprises on the farm...

I've now adjusted rorqual, francolin, piculet to not run on master anymore -
they're just there to test combinations of --disable-atomics and
--disable-spinlocks, so there seems not much point in just disabling those
options for HEAD.

Greetings,

Andres Freund

#32

Andres Freund

andres@anarazel.de

over 1 year ago

In reply to: Thomas Munro (#29)

Re: Remove last traces of HPPA support

Hi,

On 2024-07-31 22:32:19 +1200, Thomas Munro wrote:

That old comment means that both SpinLockAcquire() and SpinLockRelease()
acted as full memory barriers, and looking at the implementations, that
was indeed so. With the new implementation, SpinLockAcquire() will have
"acquire semantics" and SpinLockRelease will have "release semantics".
That's very sensible, and I don't believe it will break anything, but
it's a change in semantics nevertheless.

Yeah. It's interesting that our pg_atomic_clear_flag(f) is like
standard atomic_flag_clear_explicit(f, memory_order_release), not like
atomic_flag_clear(f) which is short for atomic_flag_clear_explicit(f,
memory_order_seq_cst). Example spinlock code I've seen written in
modern C or C++ therefore uses the _explicit variants, so it can get
acquire/release, which is what people usually want from a lock-like
thing. What's a good way to test the performance in PostgreSQL?

I've used
c=8;pgbench -n -Mprepared -c$c -j$c -P1 -T10 -f <(echo "SELECT pg_logical_emit_message(false, \:client_id::text, '1'), generate_series(1, 1000) OFFSET 1000;")
in the past. Because of NUM_XLOGINSERT_LOCKS = 8 this ends up with 8 backends
doing tiny xlog insertions and heavily contending on insertpos_lck.

The generate_series() is necessary as otherwise the context switch and
executor startup overhead dominates.

In a naive loop that just test-and-sets and clears a flag a billion times in
a loop and does nothing else, I see 20-40% performance increase depending on
architecture when comparing _seq_cst with _acquire/_release.

I'd expect the difference to be even bigger on concurrent workloads on x86-64
- the added memory barrier during lock release really hurts. I have a test
program to play around with this and the difference in isolation is like 0.4x
the throughput with a full barrier release on my older 2 socket workstation
[1]: 2x Xeon Gold 5215

On said workstation [1]2x Xeon Gold 5215, with the above pgbench, I get ~1.95M inserts/sec
(1959 TPS * 1000) on HEAD and 1.80M insert/sec after adding
#define S_UNLOCK(lock) __atomic_store_n(lock, 0, __ATOMIC_SEQ_CST)

If I change NUM_XLOGINSERT_LOCKS = 40 and use 40 clients, I get
1.03M inserts/sec with the current code and 0.86M inserts/sec with
__ATOMIC_SEQ_CST.

Greetings,

Andres Freund

[1]: 2x Xeon Gold 5215

#33

Thomas Munro

thomas.munro@gmail.com

over 1 year ago

In reply to: Andres Freund (#30)

2 attachment(s)

Re: Remove last traces of HPPA support

On Thu, Aug 1, 2024 at 7:07 AM Andres Freund <andres@anarazel.de> wrote:

Note that I would like to add a user for S_LOCK_FREE(), to detect repeated
SpinLockRelease():
/messages/by-id/20240729182952.hua325647e2ggbsy@awork3.anarazel.de

What about adding a "magic" member in assertion builds? Here is my
attempt at that, in 0002.

I also realised that we might as well skip the trivial S_XXX macros
and delete s_lock.h. In this version of 0001 we retain just spin.h,
but s_lock.c still exists to hold the slow path.

Attachments:

v2-0001-Use-atomics-API-to-implement-spinlocks.patchtext/x-patch; charset=US-ASCII; name=v2-0001-Use-atomics-API-to-implement-spinlocks.patchDownload

From 47d5c4537dd741efc0fd6ac54393d2e7aca7ec8b Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Wed, 31 Jul 2024 13:11:35 +1200
Subject: [PATCH v2 1/2] Use atomics API to implement spinlocks.

Since our spinlock API pre-dates our C11-style atomics API by decades,
it had its own hand-crafted operations written in assembler.  Use the
atomics API instead, to simplify and de-duplicate.  We couldn't have
done this earlier, because that'd be circular: atomics were simulated
with spinlocks in --disable-atomics builds.  Commit 81385261 removed
that option, so now we can delete most the system-specific spinlock code
and just redirect everything to pg_atomic_flag.

The main special knowledge embodied in the hand-crafted code was the
relaxed load of the lock value before attempting to test-and-set, while
spinning.  That is retained in simplified form in the new coding.
---
 configure                              |  22 -
 configure.ac                           |  19 -
 src/Makefile.global.in                 |   3 -
 src/backend/port/Makefile              |  12 -
 src/backend/port/meson.build           |   2 +-
 src/backend/port/tas/dummy.s           |   0
 src/backend/port/tas/sunstudio_sparc.s |  53 --
 src/backend/port/tas/sunstudio_x86.s   |  43 --
 src/backend/storage/lmgr/s_lock.c      | 128 +----
 src/include/storage/s_lock.h           | 749 -------------------------
 src/include/storage/spin.h             |  60 +-
 src/template/linux                     |  15 -
 src/template/solaris                   |  15 -
 src/test/regress/regress.c             |  25 +-
 14 files changed, 75 insertions(+), 1071 deletions(-)
 delete mode 100644 src/backend/port/tas/dummy.s
 delete mode 100644 src/backend/port/tas/sunstudio_sparc.s
 delete mode 100644 src/backend/port/tas/sunstudio_x86.s
 delete mode 100644 src/include/storage/s_lock.h

diff --git a/configure b/configure
index 8f684f7945e..e2267837b7d 100755
--- a/configure
+++ b/configure
@@ -731,7 +731,6 @@ PKG_CONFIG_LIBDIR
 PKG_CONFIG_PATH
 PKG_CONFIG
 DLSUFFIX
-TAS
 GCC
 CPP
 CFLAGS_SL
@@ -3021,12 +3020,6 @@ $as_echo "$template" >&6; }
 PORTNAME=$template
 
 
-# Initialize default assumption that we do not need separate assembly code
-# for TAS (test-and-set).  This can be overridden by the template file
-# when it's executed.
-need_tas=no
-tas_file=dummy.s
-
 # Default, works for most platforms, override in template file if needed
 DLSUFFIX=".so"
 
@@ -7770,20 +7763,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-#
-# Set up TAS assembly code if needed; the template file has now had its
-# chance to request this.
-#
-ac_config_links="$ac_config_links src/backend/port/tas.s:src/backend/port/tas/${tas_file}"
-
-
-if test "$need_tas" = yes ; then
-  TAS=tas.o
-else
-  TAS=""
-fi
-
-
 
 cat >>confdefs.h <<_ACEOF
 #define DLSUFFIX "$DLSUFFIX"
@@ -19924,7 +19903,6 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 for ac_config_target in $ac_config_targets
 do
   case $ac_config_target in
-    "src/backend/port/tas.s") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/tas.s:src/backend/port/tas/${tas_file}" ;;
     "GNUmakefile") CONFIG_FILES="$CONFIG_FILES GNUmakefile" ;;
     "src/Makefile.global") CONFIG_FILES="$CONFIG_FILES src/Makefile.global" ;;
     "src/backend/port/pg_sema.c") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/pg_sema.c:${SEMA_IMPLEMENTATION}" ;;
diff --git a/configure.ac b/configure.ac
index 75b73532fe0..59c3b7e3d35 100644
--- a/configure.ac
+++ b/configure.ac
@@ -95,12 +95,6 @@ AC_MSG_RESULT([$template])
 PORTNAME=$template
 AC_SUBST(PORTNAME)
 
-# Initialize default assumption that we do not need separate assembly code
-# for TAS (test-and-set).  This can be overridden by the template file
-# when it's executed.
-need_tas=no
-tas_file=dummy.s
-
 # Default, works for most platforms, override in template file if needed
 DLSUFFIX=".so"
 
@@ -740,19 +734,6 @@ AC_PROG_CPP
 AC_SUBST(GCC)
 
 
-#
-# Set up TAS assembly code if needed; the template file has now had its
-# chance to request this.
-#
-AC_CONFIG_LINKS([src/backend/port/tas.s:src/backend/port/tas/${tas_file}])
-
-if test "$need_tas" = yes ; then
-  TAS=tas.o
-else
-  TAS=""
-fi
-AC_SUBST(TAS)
-
 AC_SUBST(DLSUFFIX)dnl
 AC_DEFINE_UNQUOTED([DLSUFFIX], ["$DLSUFFIX"],
                    [Define to the file name extension of dynamically-loadable modules.])
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 83b91fe9167..0301f463027 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -771,9 +771,6 @@ ifeq ($(PORTNAME),win32)
 LIBS += -lws2_32
 endif
 
-# Not really standard libc functions, used by the backend.
-TAS         = @TAS@
-
 
 ##########################################################################
 #
diff --git a/src/backend/port/Makefile b/src/backend/port/Makefile
index 47338d99229..8613ac01aff 100644
--- a/src/backend/port/Makefile
+++ b/src/backend/port/Makefile
@@ -22,7 +22,6 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = \
-	$(TAS) \
 	atomics.o \
 	pg_sema.o \
 	pg_shmem.o
@@ -33,16 +32,5 @@ endif
 
 include $(top_srcdir)/src/backend/common.mk
 
-tas.o: tas.s
-ifeq ($(SUN_STUDIO_CC), yes)
-# preprocess assembler file with cpp
-	$(CC) $(CFLAGS) -c -P $<
-	mv $*.i $*_cpp.s
-	$(CC) $(CFLAGS) -c $*_cpp.s -o $@
-else
-	$(CC) $(CFLAGS) -c $<
-endif
-
 clean:
-	rm -f tas_cpp.s
 	$(MAKE) -C win32 clean
diff --git a/src/backend/port/meson.build b/src/backend/port/meson.build
index 7820e86016d..3270ffb7030 100644
--- a/src/backend/port/meson.build
+++ b/src/backend/port/meson.build
@@ -30,4 +30,4 @@ if host_system == 'windows'
 endif
 
 # autoconf generates the file there, ensure we get a conflict
-generated_sources_ac += {'src/backend/port': ['pg_sema.c', 'pg_shmem.c', 'tas.s']}
+generated_sources_ac += {'src/backend/port': ['pg_sema.c', 'pg_shmem.c']}
diff --git a/src/backend/port/tas/dummy.s b/src/backend/port/tas/dummy.s
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/backend/port/tas/sunstudio_sparc.s b/src/backend/port/tas/sunstudio_sparc.s
deleted file mode 100644
index 3400713afd5..00000000000
--- a/src/backend/port/tas/sunstudio_sparc.s
+++ /dev/null
@@ -1,53 +0,0 @@
-!-------------------------------------------------------------------------
-!
-! sunstudio_sparc.s
-!	  compare and swap for Sun Studio on Sparc
-!
-! Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
-! Portions Copyright (c) 1994, Regents of the University of California
-!
-! IDENTIFICATION
-!	  src/backend/port/tas/sunstudio_sparc.s
-!
-!-------------------------------------------------------------------------
-
-! Fortunately the Sun compiler can process cpp conditionals with -P
-
-! '/' is the comment for x86, while '!' is the comment for Sparc
-
-#if defined(__sparcv9) || defined(__sparc)
-
-	.section        ".text"
-	.align  8
-	.skip   24
-	.align  4
-
-	.global pg_atomic_cas
-pg_atomic_cas:
-
-	! "cas" only works on sparcv9 and sparcv8plus chips, and
-	! requires a compiler targeting these CPUs.  It will fail
-	! on a compiler targeting sparcv8, and of course will not
-	! be understood by a sparcv8 CPU.  gcc continues to use
-	! "ldstub" because it targets sparcv7.
-	!
-	! There is actually a trick for embedding "cas" in a
-	! sparcv8-targeted compiler, but it can only be run
-	! on a sparcv8plus/v9 cpus:
-	!
-	!   http://cvs.opensolaris.org/source/xref/on/usr/src/lib/libc/sparc/threads/sparc.il
-	!
-	! NB: We're assuming we're running on a TSO system here - solaris
-	! userland luckily always has done so.
-
-#if defined(__sparcv9) || defined(__sparcv8plus)
-	cas     [%o0],%o2,%o1
-#else
-	ldstub [%o0],%o1
-#endif
-	mov     %o1,%o0
-	retl
-	nop
-	.type   pg_atomic_cas,2
-	.size   pg_atomic_cas,(.-pg_atomic_cas)
-#endif
diff --git a/src/backend/port/tas/sunstudio_x86.s b/src/backend/port/tas/sunstudio_x86.s
deleted file mode 100644
index b4608a9ceb2..00000000000
--- a/src/backend/port/tas/sunstudio_x86.s
+++ /dev/null
@@ -1,43 +0,0 @@
-/-------------------------------------------------------------------------
-/
-/ sunstudio_x86.s
-/	  compare and swap for Sun Studio on x86
-/
-/ Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
-/ Portions Copyright (c) 1994, Regents of the University of California
-/
-/ IDENTIFICATION
-/	  src/backend/port/tas/sunstudio_x86.s
-/
-/-------------------------------------------------------------------------
-
-/ Fortunately the Sun compiler can process cpp conditionals with -P
-
-/ '/' is the comment for x86, while '!' is the comment for Sparc
-
-	.file   "tas.s"
-
-#if defined(__amd64)
-	.code64
-#endif
-
-	.globl pg_atomic_cas
-	.type pg_atomic_cas, @function
-
-	.section .text, "ax"
-	.align 16
-
-pg_atomic_cas:
-#if defined(__amd64)
-	movl       %edx,%eax
-	lock
-	cmpxchgl   %esi,(%rdi)
-#else
-	movl    4(%esp), %edx
-	movl    8(%esp), %ecx
-	movl    12(%esp), %eax
-	lock
-	cmpxchgl %ecx, (%edx)
-#endif
-	ret
-	.size pg_atomic_cas, . - pg_atomic_cas
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index 69549a65dba..18a98b6e638 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -52,7 +52,7 @@
 
 #include "common/pg_prng.h"
 #include "port/atomics.h"
-#include "storage/s_lock.h"
+#include "storage/spin.h"
 #include "utils/wait_event.h"
 
 #define MIN_SPINS_PER_DELAY 10
@@ -93,7 +93,7 @@ s_lock_stuck(const char *file, int line, const char *func)
 }
 
 /*
- * s_lock(lock) - platform-independent portion of waiting for a spinlock.
+ * s_lock(lock) - out-of-line portion of waiting for a spinlock.
  */
 int
 s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
@@ -102,8 +102,27 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 
 	init_spin_delay(&delayStatus, file, line, func);
 
-	while (TAS_SPIN(lock))
+	for (;;)
 	{
+		bool		probably_free = true;
+
+#if defined(__i386__) || defined(__x86_64__) || \
+	defined(_M_IX86) || defined(_M_AMD64) || \
+	defined(__ppc__) || defined(__powerpc__) || \
+	defined(__ppc64__) || defined(__powerpc64__) \
+
+
+		/*
+		 * On these architectures, it is known to be more efficient to test
+		 * the lock with a relaxed load first, while spinning.
+		 */
+		probably_free = pg_atomic_unlocked_test_flag(lock);
+#endif
+
+		/* Try to get the lock. */
+		if (probably_free && pg_atomic_test_set_flag(lock))
+			break;
+
 		perform_spin_delay(&delayStatus);
 	}
 
@@ -112,14 +131,6 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 	return delayStatus.delays;
 }
 
-#ifdef USE_DEFAULT_S_UNLOCK
-void
-s_unlock(volatile slock_t *lock)
-{
-	*lock = 0;
-}
-#endif
-
 /*
  * Wait while spinning on a contended spinlock.
  */
@@ -127,7 +138,7 @@ void
 perform_spin_delay(SpinDelayStatus *status)
 {
 	/* CPU-specific delay each time through the loop */
-	SPIN_DELAY();
+	pg_spin_delay();
 
 	/* Block the process every spins_per_delay tries */
 	if (++(status->spins) >= spins_per_delay)
@@ -230,96 +241,3 @@ update_spins_per_delay(int shared_spins_per_delay)
 	 */
 	return (shared_spins_per_delay * 15 + spins_per_delay) / 16;
 }
-
-
-/*****************************************************************************/
-#if defined(S_LOCK_TEST)
-
-/*
- * test program for verifying a port's spinlock support.
- */
-
-struct test_lock_struct
-{
-	char		pad1;
-	slock_t		lock;
-	char		pad2;
-};
-
-volatile struct test_lock_struct test_lock;
-
-int
-main()
-{
-	pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
-
-	test_lock.pad1 = test_lock.pad2 = 0x44;
-
-	S_INIT_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (!S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not initialized\n");
-		return 1;
-	}
-
-	S_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not locked\n");
-		return 1;
-	}
-
-	S_UNLOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (!S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not unlocked\n");
-		return 1;
-	}
-
-	S_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not re-locked\n");
-		return 1;
-	}
-
-	printf("S_LOCK_TEST: this will print %d stars and then\n", NUM_DELAYS);
-	printf("             exit with a 'stuck spinlock' message\n");
-	printf("             if S_LOCK() and TAS() are working.\n");
-	fflush(stdout);
-
-	s_lock(&test_lock.lock, __FILE__, __LINE__, __func__);
-
-	printf("S_LOCK_TEST: failed, lock not locked\n");
-	return 1;
-}
-
-#endif							/* S_LOCK_TEST */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
deleted file mode 100644
index e94ed5f48bd..00000000000
--- a/src/include/storage/s_lock.h
+++ /dev/null
@@ -1,749 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * s_lock.h
- *	   Implementation of spinlocks.
- *
- *	NOTE: none of the macros in this file are intended to be called directly.
- *	Call them through the macros in spin.h.
- *
- *	The following hardware-dependent macros must be provided for each
- *	supported platform:
- *
- *	void S_INIT_LOCK(slock_t *lock)
- *		Initialize a spinlock (to the unlocked state).
- *
- *	int S_LOCK(slock_t *lock)
- *		Acquire a spinlock, waiting if necessary.
- *		Time out and abort() if unable to acquire the lock in a
- *		"reasonable" amount of time --- typically ~ 1 minute.
- *		Should return number of "delays"; see s_lock.c
- *
- *	void S_UNLOCK(slock_t *lock)
- *		Unlock a previously acquired lock.
- *
- *	bool S_LOCK_FREE(slock_t *lock)
- *		Tests if the lock is free. Returns true if free, false if locked.
- *		This does *not* change the state of the lock.
- *
- *	void SPIN_DELAY(void)
- *		Delay operation to occur inside spinlock wait loop.
- *
- *	Note to implementors: there are default implementations for all these
- *	macros at the bottom of the file.  Check if your platform can use
- *	these or needs to override them.
- *
- *  Usually, S_LOCK() is implemented in terms of even lower-level macros
- *	TAS() and TAS_SPIN():
- *
- *	int TAS(slock_t *lock)
- *		Atomic test-and-set instruction.  Attempt to acquire the lock,
- *		but do *not* wait.	Returns 0 if successful, nonzero if unable
- *		to acquire the lock.
- *
- *	int TAS_SPIN(slock_t *lock)
- *		Like TAS(), but this version is used when waiting for a lock
- *		previously found to be contended.  By default, this is the
- *		same as TAS(), but on some architectures it's better to poll a
- *		contended lock using an unlocked instruction and retry the
- *		atomic test-and-set only when it appears free.
- *
- *	TAS() and TAS_SPIN() are NOT part of the API, and should never be called
- *	directly.
- *
- *	CAUTION: on some platforms TAS() and/or TAS_SPIN() may sometimes report
- *	failure to acquire a lock even when the lock is not locked.  For example,
- *	on Alpha TAS() will "fail" if interrupted.  Therefore a retry loop must
- *	always be used, even if you are certain the lock is free.
- *
- *	It is the responsibility of these macros to make sure that the compiler
- *	does not re-order accesses to shared memory to precede the actual lock
- *	acquisition, or follow the lock release.  Prior to PostgreSQL 9.5, this
- *	was the caller's responsibility, which meant that callers had to use
- *	volatile-qualified pointers to refer to both the spinlock itself and the
- *	shared data being accessed within the spinlocked critical section.  This
- *	was notationally awkward, easy to forget (and thus error-prone), and
- *	prevented some useful compiler optimizations.  For these reasons, we
- *	now require that the macros themselves prevent compiler re-ordering,
- *	so that the caller doesn't need to take special precautions.
- *
- *	On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and
- *	S_UNLOCK() macros must further include hardware-level memory fence
- *	instructions to prevent similar re-ordering at the hardware level.
- *	TAS() and TAS_SPIN() must guarantee that loads and stores issued after
- *	the macro are not executed until the lock has been obtained.  Conversely,
- *	S_UNLOCK() must guarantee that loads and stores issued before the macro
- *	have been executed before the lock is released.
- *
- *	On most supported platforms, TAS() uses a tas() function written
- *	in assembly language to execute a hardware atomic-test-and-set
- *	instruction.  Equivalent OS-supplied mutex routines could be used too.
- *
- *
- * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *	  src/include/storage/s_lock.h
- *
- *-------------------------------------------------------------------------
- */
-#ifndef S_LOCK_H
-#define S_LOCK_H
-
-#ifdef FRONTEND
-#error "s_lock.h may not be included from frontend code"
-#endif
-
-#if defined(__GNUC__) || defined(__INTEL_COMPILER)
-/*************************************************************************
- * All the gcc inlines
- * Gcc consistently defines the CPU as __cpu__.
- * Other compilers use __cpu or __cpu__ so we test for both in those cases.
- */
-
-/*----------
- * Standard gcc asm format (assuming "volatile slock_t *lock"):
-
-	__asm__ __volatile__(
-		"	instruction	\n"
-		"	instruction	\n"
-		"	instruction	\n"
-:		"=r"(_res), "+m"(*lock)		// return register, in/out lock value
-:		"r"(lock)					// lock pointer, in input register
-:		"memory", "cc");			// show clobbered registers here
-
- * The output-operands list (after first colon) should always include
- * "+m"(*lock), whether or not the asm code actually refers to this
- * operand directly.  This ensures that gcc believes the value in the
- * lock variable is used and set by the asm code.  Also, the clobbers
- * list (after third colon) should always include "memory"; this prevents
- * gcc from thinking it can cache the values of shared-memory fields
- * across the asm code.  Add "cc" if your asm code changes the condition
- * code register, and also list any temp registers the code uses.
- *----------
- */
-
-
-#ifdef __i386__		/* 32-bit i386 */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res = 1;
-
-	/*
-	 * Use a non-locking test before asserting the bus lock.  Note that the
-	 * extra test appears to be a small loss on some x86 platforms and a small
-	 * win on others; it's by no means clear that we should keep it.
-	 *
-	 * When this was last tested, we didn't have separate TAS() and TAS_SPIN()
-	 * macros.  Nowadays it probably would be better to do a non-locking test
-	 * in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
-	 * testing to verify that.  Without some empirical evidence, better to
-	 * leave it alone.
-	 */
-	__asm__ __volatile__(
-		"	cmpb	$0,%1	\n"
-		"	jne		1f		\n"
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-		"1: \n"
-:		"+q"(_res), "+m"(*lock)
-:		/* no inputs */
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * This sequence is equivalent to the PAUSE instruction ("rep" is
-	 * ignored by old IA32 processors if the following instruction is
-	 * not a string operation); the IA-32 Architecture Software
-	 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
-	 * PAUSE in the inner loop of a spin lock is necessary for good
-	 * performance:
-	 *
-	 *     The PAUSE instruction improves the performance of IA-32
-	 *     processors supporting Hyper-Threading Technology when
-	 *     executing spin-wait loops and other routines where one
-	 *     thread is accessing a shared lock or semaphore in a tight
-	 *     polling loop. When executing a spin-wait loop, the
-	 *     processor can suffer a severe performance penalty when
-	 *     exiting the loop because it detects a possible memory order
-	 *     violation and flushes the core processor's pipeline. The
-	 *     PAUSE instruction provides a hint to the processor that the
-	 *     code sequence is a spin-wait loop. The processor uses this
-	 *     hint to avoid the memory order violation and prevent the
-	 *     pipeline flush. In addition, the PAUSE instruction
-	 *     de-pipelines the spin-wait loop to prevent it from
-	 *     consuming execution resources excessively.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __i386__ */
-
-
-#ifdef __x86_64__		/* AMD Opteron, Intel EM64T */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-/*
- * On Intel EM64T, it's a win to use a non-locking test before the xchg proper,
- * but only when spinning.
- *
- * See also Implementing Scalable Atomic Locks for Multi-Core Intel(tm) EM64T
- * and IA32, by Michael Chynoweth and Mary R. Lee. As of this writing, it is
- * available at:
- * http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
- */
-#define TAS_SPIN(lock)    (*(lock) ? 1 : TAS(lock))
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res = 1;
-
-	__asm__ __volatile__(
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-:		"+q"(_res), "+m"(*lock)
-:		/* no inputs */
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
-	 * Opteron, but it may be of some use on EM64T, so we keep it.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __x86_64__ */
-
-
-/*
- * On ARM and ARM64, we use __sync_lock_test_and_set(int *, int) if available.
- *
- * We use the int-width variant of the builtin because it works on more chips
- * than other widths.
- */
-#if defined(__arm__) || defined(__arm) || defined(__aarch64__)
-#ifdef HAVE_GCC__SYNC_INT32_TAS
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-/*
- * Using an ISB instruction to delay in spinlock loops appears beneficial on
- * high-core-count ARM64 processors.  It seems mostly a wash for smaller gear,
- * and ISB doesn't exist at all on pre-v7 ARM chips.
- */
-#if defined(__aarch64__)
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	__asm__ __volatile__(
-		" isb;				\n");
-}
-
-#endif	 /* __aarch64__ */
-#endif	 /* HAVE_GCC__SYNC_INT32_TAS */
-#endif	 /* __arm__ || __arm || __aarch64__ */
-
-
-/* S/390 and S/390x Linux (32- and 64-bit zSeries) */
-#if defined(__s390__) || defined(__s390x__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock)	   tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	int			_res = 0;
-
-	__asm__	__volatile__(
-		"	cs 	%0,%3,0(%2)		\n"
-:		"+d"(_res), "+m"(*lock)
-:		"a"(lock), "d"(1)
-:		"memory", "cc");
-	return _res;
-}
-
-#endif	 /* __s390__ || __s390x__ */
-
-
-#if defined(__sparc__)		/* Sparc */
-/*
- * Solaris has always run sparc processors in TSO (total store) mode, but
- * linux didn't use to and the *BSDs still don't. So, be careful about
- * acquire/release semantics. The CPU will treat superfluous members as
- * NOPs, so it's just code space.
- */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res;
-
-	/*
-	 *	See comment in src/backend/port/tas/sunstudio_sparc.s for why this
-	 *	uses "ldstub", and that file uses "cas".  gcc currently generates
-	 *	sparcv7-targeted binaries, so "cas" use isn't possible.
-	 */
-	__asm__ __volatile__(
-		"	ldstub	[%2], %0	\n"
-:		"=r"(_res), "+m"(*lock)
-:		"r"(lock)
-:		"memory");
-#if defined(__sparcv7) || defined(__sparc_v7__)
-	/*
-	 * No stbar or membar available, luckily no actually produced hardware
-	 * requires a barrier.
-	 */
-#elif defined(__sparcv8) || defined(__sparc_v8__)
-	/* stbar is available (and required for both PSO, RMO), membar isn't */
-	__asm__ __volatile__ ("stbar	 \n":::"memory");
-#else
-	/*
-	 * #LoadStore (RMO) | #LoadLoad (RMO) together are the appropriate acquire
-	 * barrier for sparcv8+ upwards.
-	 */
-	__asm__ __volatile__ ("membar #LoadStore | #LoadLoad \n":::"memory");
-#endif
-	return (int) _res;
-}
-
-#if defined(__sparcv7) || defined(__sparc_v7__)
-/*
- * No stbar or membar available, luckily no actually produced hardware
- * requires a barrier.  We fall through to the default gcc definition of
- * S_UNLOCK in this case.
- */
-#elif defined(__sparcv8) || defined(__sparc_v8__)
-/* stbar is available (and required for both PSO, RMO), membar isn't */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("stbar	 \n":::"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#else
-/*
- * #LoadStore (RMO) | #StoreStore (RMO, PSO) together are the appropriate
- * release barrier for sparcv8+ upwards.
- */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("membar #LoadStore | #StoreStore \n":::"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#endif
-
-#endif	 /* __sparc__ */
-
-
-/* PowerPC */
-#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/* On PPC, it's a win to use a non-locking test before the lwarx */
-#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
-
-/*
- * The second operand of addi can hold a constant zero or a register number,
- * hence constraint "=&b" to avoid allocating r0.  "b" stands for "address
- * base register"; most operands having this register-or-zero property are
- * address bases, e.g. the second operand of lwax.
- *
- * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
- * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
- * But if the spinlock is in ordinary memory, we can use lwsync instead for
- * better performance.
- */
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t _t;
-	int _res;
-
-	__asm__ __volatile__(
-"	lwarx   %0,0,%3,1	\n"
-"	cmpwi   %0,0		\n"
-"	bne     1f			\n"
-"	addi    %0,%0,1		\n"
-"	stwcx.  %0,0,%3		\n"
-"	beq     2f			\n"
-"1: \n"
-"	li      %1,1		\n"
-"	b       3f			\n"
-"2: \n"
-"	lwsync				\n"
-"	li      %1,0		\n"
-"3: \n"
-:	"=&b"(_t), "=r"(_res), "+m"(*lock)
-:	"r"(lock)
-:	"memory", "cc");
-	return _res;
-}
-
-/*
- * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction.
- * But we can use lwsync instead for better performance.
- */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("	lwsync \n" ::: "memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* powerpc */
-
-
-#if defined(__mips__) && !defined(__sgi)	/* non-SGI MIPS */
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/*
- * Original MIPS-I processors lacked the LL/SC instructions, but if we are
- * so unfortunate as to be running on one of those, we expect that the kernel
- * will handle the illegal-instruction traps and emulate them for us.  On
- * anything newer (and really, MIPS-I is extinct) LL/SC is the only sane
- * choice because any other synchronization method must involve a kernel
- * call.  Unfortunately, many toolchains still default to MIPS-I as the
- * codegen target; if the symbol __mips shows that that's the case, we
- * have to force the assembler to accept LL/SC.
- *
- * R10000 and up processors require a separate SYNC, which has the same
- * issues as LL/SC.
- */
-#if __mips < 2
-#define MIPS_SET_MIPS2	"       .set mips2          \n"
-#else
-#define MIPS_SET_MIPS2
-#endif
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	volatile slock_t *_l = lock;
-	int			_res;
-	int			_tmp;
-
-	__asm__ __volatile__(
-		"       .set push           \n"
-		MIPS_SET_MIPS2
-		"       .set noreorder      \n"
-		"       .set nomacro        \n"
-		"       ll      %0, %2      \n"
-		"       or      %1, %0, 1   \n"
-		"       sc      %1, %2      \n"
-		"       xori    %1, 1       \n"
-		"       or      %0, %0, %1  \n"
-		"       sync                \n"
-		"       .set pop              "
-:		"=&r" (_res), "=&r" (_tmp), "+R" (*_l)
-:		/* no inputs */
-:		"memory");
-	return _res;
-}
-
-/* MIPS S_UNLOCK is almost standard but requires a "sync" instruction */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__( \
-		"       .set push           \n" \
-		MIPS_SET_MIPS2 \
-		"       .set noreorder      \n" \
-		"       .set nomacro        \n" \
-		"       sync                \n" \
-		"       .set pop              " \
-:		/* no outputs */ \
-:		/* no inputs */	\
-:		"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* __mips__ && !__sgi */
-
-
-
-/*
- * If we have no platform-specific knowledge, but we found that the compiler
- * provides __sync_lock_test_and_set(), use that.  Prefer the int-width
- * version over the char-width version if we have both, on the rather dubious
- * grounds that that's known to be more likely to work in the ARM ecosystem.
- * (But we dealt with ARM above.)
- */
-#if !defined(HAS_TEST_AND_SET)
-
-#if defined(HAVE_GCC__SYNC_INT32_TAS)
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#elif defined(HAVE_GCC__SYNC_CHAR_TAS)
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef char slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#endif	 /* HAVE_GCC__SYNC_INT32_TAS */
-
-#endif	/* !defined(HAS_TEST_AND_SET) */
-
-
-/*
- * Default implementation of S_UNLOCK() for gcc/icc.
- *
- * Note that this implementation is unsafe for any platform that can reorder
- * a memory access (either load or store) after a following store.  That
- * happens not to be possible on x86 and most legacy architectures (some are
- * single-processor!), but many modern systems have weaker memory ordering.
- * Those that do must define their own version of S_UNLOCK() rather than
- * relying on this one.
- */
-#if !defined(S_UNLOCK)
-#define S_UNLOCK(lock)	\
-	do { __asm__ __volatile__("" : : : "memory");  *(lock) = 0; } while (0)
-#endif
-
-#endif	/* defined(__GNUC__) || defined(__INTEL_COMPILER) */
-
-
-/*
- * ---------------------------------------------------------------------
- * Platforms that use non-gcc inline assembly:
- * ---------------------------------------------------------------------
- */
-
-#if !defined(HAS_TEST_AND_SET)	/* We didn't trigger above, let's try here */
-
-/* These are in sunstudio_(sparc|x86).s */
-
-#if defined(__SUNPRO_C) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc))
-#define HAS_TEST_AND_SET
-
-#if defined(__i386) || defined(__x86_64__) || defined(__sparcv9) || defined(__sparcv8plus)
-typedef unsigned int slock_t;
-#else
-typedef unsigned char slock_t;
-#endif
-
-extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with,
-									  slock_t cmp);
-
-#define TAS(a) (pg_atomic_cas((a), 1, 0) != 0)
-#endif
-
-
-#ifdef _MSC_VER
-typedef LONG slock_t;
-
-#define HAS_TEST_AND_SET
-#define TAS(lock) (InterlockedCompareExchange(lock, 1, 0))
-
-#define SPIN_DELAY() spin_delay()
-
-/* If using Visual C++ on Win64, inline assembly is unavailable.
- * Use a _mm_pause intrinsic instead of rep nop.
- */
-#if defined(_WIN64)
-static __forceinline void
-spin_delay(void)
-{
-	_mm_pause();
-}
-#else
-static __forceinline void
-spin_delay(void)
-{
-	/* See comment for gcc code. Same code, MASM syntax */
-	__asm rep nop;
-}
-#endif
-
-#include <intrin.h>
-#pragma intrinsic(_ReadWriteBarrier)
-
-#define S_UNLOCK(lock)	\
-	do { _ReadWriteBarrier(); (*(lock)) = 0; } while (0)
-
-#endif
-
-
-#endif	/* !defined(HAS_TEST_AND_SET) */
-
-
-/* Blow up if we didn't have any way to do spinlocks */
-#ifndef HAS_TEST_AND_SET
-#error PostgreSQL does not have spinlock support on this platform.  Please report this to pgsql-bugs@lists.postgresql.org.
-#endif
-
-
-/*
- * Default Definitions - override these above as needed.
- */
-
-#if !defined(S_LOCK)
-#define S_LOCK(lock) \
-	(TAS(lock) ? s_lock((lock), __FILE__, __LINE__, __func__) : 0)
-#endif	 /* S_LOCK */
-
-#if !defined(S_LOCK_FREE)
-#define S_LOCK_FREE(lock)	(*(lock) == 0)
-#endif	 /* S_LOCK_FREE */
-
-#if !defined(S_UNLOCK)
-/*
- * Our default implementation of S_UNLOCK is essentially *(lock) = 0.  This
- * is unsafe if the platform can reorder a memory access (either load or
- * store) after a following store; platforms where this is possible must
- * define their own S_UNLOCK.  But CPU reordering is not the only concern:
- * if we simply defined S_UNLOCK() as an inline macro, the compiler might
- * reorder instructions from inside the critical section to occur after the
- * lock release.  Since the compiler probably can't know what the external
- * function s_unlock is doing, putting the same logic there should be adequate.
- * A sufficiently-smart globally optimizing compiler could break that
- * assumption, though, and the cost of a function call for every spinlock
- * release may hurt performance significantly, so we use this implementation
- * only for platforms where we don't know of a suitable intrinsic.  For the
- * most part, those are relatively obscure platform/compiler combinations to
- * which the PostgreSQL project does not have access.
- */
-#define USE_DEFAULT_S_UNLOCK
-extern void s_unlock(volatile slock_t *lock);
-#define S_UNLOCK(lock)		s_unlock(lock)
-#endif	 /* S_UNLOCK */
-
-#if !defined(S_INIT_LOCK)
-#define S_INIT_LOCK(lock)	S_UNLOCK(lock)
-#endif	 /* S_INIT_LOCK */
-
-#if !defined(SPIN_DELAY)
-#define SPIN_DELAY()	((void) 0)
-#endif	 /* SPIN_DELAY */
-
-#if !defined(TAS)
-extern int	tas(volatile slock_t *lock);		/* in port/.../tas.s, or
-												 * s_lock.c */
-
-#define TAS(lock)		tas(lock)
-#endif	 /* TAS */
-
-#if !defined(TAS_SPIN)
-#define TAS_SPIN(lock)	TAS(lock)
-#endif	 /* TAS_SPIN */
-
-
-/*
- * Platform-independent out-of-line support routines
- */
-extern int s_lock(volatile slock_t *lock, const char *file, int line, const char *func);
-
-/* Support for dynamic adjustment of spins_per_delay */
-#define DEFAULT_SPINS_PER_DELAY  100
-
-extern void set_spins_per_delay(int shared_spins_per_delay);
-extern int	update_spins_per_delay(int shared_spins_per_delay);
-
-/*
- * Support for spin delay which is useful in various places where
- * spinlock-like procedures take place.
- */
-typedef struct
-{
-	int			spins;
-	int			delays;
-	int			cur_delay;
-	const char *file;
-	int			line;
-	const char *func;
-} SpinDelayStatus;
-
-static inline void
-init_spin_delay(SpinDelayStatus *status,
-				const char *file, int line, const char *func)
-{
-	status->spins = 0;
-	status->delays = 0;
-	status->cur_delay = 0;
-	status->file = file;
-	status->line = line;
-	status->func = func;
-}
-
-#define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
-extern void perform_spin_delay(SpinDelayStatus *status);
-extern void finish_spin_delay(SpinDelayStatus *status);
-
-#endif	 /* S_LOCK_H */
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index 3ae2a56d073..24edac4822d 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -14,9 +14,11 @@
  *		Acquire a spinlock, waiting if necessary.
  *		Time out and abort() if unable to acquire the lock in a
  *		"reasonable" amount of time --- typically ~ 1 minute.
+ *		Acquire (including read barrier) semantics.
  *
  *	void SpinLockRelease(volatile slock_t *lock)
  *		Unlock a previously acquired lock.
+ *		Release (including write barrier) semantics.
  *
  *	bool SpinLockFree(slock_t *lock)
  *		Tests if the lock is free. Returns true if free, false if locked.
@@ -35,11 +37,6 @@
  *	for a CHECK_FOR_INTERRUPTS() to occur while holding a spinlock, and so
  *	it is not necessary to do HOLD/RESUME_INTERRUPTS() in these macros.
  *
- *	These macros are implemented in terms of hardware-dependent macros
- *	supplied by s_lock.h.  There is not currently any extra functionality
- *	added by this header, but there has been in the past and may someday
- *	be again.
- *
  *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -51,15 +48,58 @@
 #ifndef SPIN_H
 #define SPIN_H
 
-#include "storage/s_lock.h"
+#ifdef FRONTEND
+#error "spin.h may not be included from frontend code"
+#endif
+
+#include "port/atomics.h"
+
+typedef pg_atomic_flag slock_t;
 
 
-#define SpinLockInit(lock)	S_INIT_LOCK(lock)
+/* Support for dynamic adjustment of spins_per_delay */
+#define DEFAULT_SPINS_PER_DELAY  100
+
+/*
+ * Support for spin delay which is useful in various places where
+ * spinlock-like procedures take place.
+ */
+typedef struct
+{
+	int			spins;
+	int			delays;
+	int			cur_delay;
+	const char *file;
+	int			line;
+	const char *func;
+} SpinDelayStatus;
+
+static inline void
+init_spin_delay(SpinDelayStatus *status,
+				const char *file, int line, const char *func)
+{
+	status->spins = 0;
+	status->delays = 0;
+	status->cur_delay = 0;
+	status->file = file;
+	status->line = line;
+	status->func = func;
+}
 
-#define SpinLockAcquire(lock) S_LOCK(lock)
+#define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
+extern void perform_spin_delay(SpinDelayStatus *status);
+extern void finish_spin_delay(SpinDelayStatus *status);
+extern void set_spins_per_delay(int shared_spins_per_delay);
+extern int	update_spins_per_delay(int shared_spins_per_delay);
 
-#define SpinLockRelease(lock) S_UNLOCK(lock)
+/* Out-of-line part of spinlock acquisition. */
+extern int	s_lock(volatile slock_t *lock,
+				   const char *file, int line,
+				   const char *func);
 
-#define SpinLockFree(lock)	S_LOCK_FREE(lock)
+#define SpinLockInit(lock) pg_atomic_init_flag(lock)
+#define SpinLockAcquire(lock) (pg_atomic_test_set_flag(lock) ? 0 : s_lock((lock), __FILE__, __LINE__, __func__))
+#define SpinLockRelease(lock) pg_atomic_clear_flag(lock)
+#define SpinLockFree(lock) pg_atomic_unlocked_test_flag(lock)
 
 #endif							/* SPIN_H */
diff --git a/src/template/linux b/src/template/linux
index ec3302c4a22..2f04c1a6610 100644
--- a/src/template/linux
+++ b/src/template/linux
@@ -21,19 +21,4 @@ if test "$SUN_STUDIO_CC" = "yes" ; then
   if test "$enable_debug" != yes; then
     CFLAGS="$CFLAGS -O"		# any optimization breaks debug
   fi
-
-  # Pick the right test-and-set (TAS) code for the Sun compiler.
-  # We would like to use in-line assembler, but the compiler
-  # requires *.il files to be on every compile line, making
-  # the build system too fragile.
-  case $host_cpu in
-    sparc)
-	need_tas=yes
-	tas_file=sunstudio_sparc.s
-    ;;
-    i?86|x86_64)
-	need_tas=yes
-	tas_file=sunstudio_x86.s
-    ;;
-  esac
 fi
diff --git a/src/template/solaris b/src/template/solaris
index f88b1cdad37..f5306b3dd5b 100644
--- a/src/template/solaris
+++ b/src/template/solaris
@@ -13,19 +13,4 @@ if test "$SUN_STUDIO_CC" = yes ; then
   if test "$enable_debug" != yes; then
     CFLAGS="$CFLAGS -O"		# any optimization breaks debug
   fi
-
-  # Pick the right test-and-set (TAS) code for the Sun compiler.
-  # We would like to use in-line assembler, but the compiler
-  # requires *.il files to be on every compile line, making
-  # the build system too fragile.
-  case $host_cpu in
-    sparc)
-	need_tas=yes
-	tas_file=sunstudio_sparc.s
-    ;;
-    i?86|x86_64)
-	need_tas=yes
-	tas_file=sunstudio_x86.s
-    ;;
-  esac
 fi
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 14aad5a0c6e..0cc0bbe3b40 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -851,32 +851,9 @@ test_spinlock(void)
 		SpinLockAcquire(&struct_w_lock.lock);
 		SpinLockRelease(&struct_w_lock.lock);
 
-		/* test basic operations via underlying S_* API */
-		S_INIT_LOCK(&struct_w_lock.lock);
-		S_LOCK(&struct_w_lock.lock);
-		S_UNLOCK(&struct_w_lock.lock);
-
 		/* and that "contended" acquisition works */
 		s_lock(&struct_w_lock.lock, "testfile", 17, "testfunc");
-		S_UNLOCK(&struct_w_lock.lock);
-
-		/*
-		 * Check, using TAS directly, that a single spin cycle doesn't block
-		 * when acquiring an already acquired lock.
-		 */
-#ifdef TAS
-		S_LOCK(&struct_w_lock.lock);
-
-		if (!TAS(&struct_w_lock.lock))
-			elog(ERROR, "acquired already held spinlock");
-
-#ifdef TAS_SPIN
-		if (!TAS_SPIN(&struct_w_lock.lock))
-			elog(ERROR, "acquired already held spinlock");
-#endif							/* defined(TAS_SPIN) */
-
-		S_UNLOCK(&struct_w_lock.lock);
-#endif							/* defined(TAS) */
+		SpinLockRelease(&struct_w_lock.lock);
 
 		/*
 		 * Verify that after all of this the non-lock contents are still
-- 
2.45.2

v2-0002-Add-some-assertions-to-spinlocks.patchtext/x-patch; charset=US-ASCII; name=v2-0002-Add-some-assertions-to-spinlocks.patchDownload

From e9f4795bcb0b85ef7d95bc16bb91b2b0ebeef131 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Thu, 1 Aug 2024 09:55:18 +1200
Subject: [PATCH v2 2/2] Add some assertions to spinlocks.

In assertion builds, use a magic values to check that the spinlock was
initialized, and also check that a lock was held when releasing.
---
 src/backend/storage/lmgr/s_lock.c |  4 +--
 src/include/storage/spin.h        | 46 ++++++++++++++++++++++++++-----
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index 18a98b6e638..5785082a720 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -116,11 +116,11 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 		 * On these architectures, it is known to be more efficient to test
 		 * the lock with a relaxed load first, while spinning.
 		 */
-		probably_free = pg_atomic_unlocked_test_flag(lock);
+		probably_free = pg_atomic_unlocked_test_flag(&lock->flag);
 #endif
 
 		/* Try to get the lock. */
-		if (probably_free && pg_atomic_test_set_flag(lock))
+		if (probably_free && pg_atomic_test_set_flag(&lock->flag))
 			break;
 
 		perform_spin_delay(&delayStatus);
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index 24edac4822d..e9bf1023954 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -54,12 +54,19 @@
 
 #include "port/atomics.h"
 
-typedef pg_atomic_flag slock_t;
-
-
 /* Support for dynamic adjustment of spins_per_delay */
 #define DEFAULT_SPINS_PER_DELAY  100
 
+#define SLOCKT_T_MAGIC 0xc7f31f05
+
+typedef struct
+{
+	pg_atomic_flag flag;
+#ifdef USE_ASSERT_CHECKING
+	int			magic;
+#endif
+} slock_t;
+
 /*
  * Support for spin delay which is useful in various places where
  * spinlock-like procedures take place.
@@ -97,9 +104,34 @@ extern int	s_lock(volatile slock_t *lock,
 				   const char *file, int line,
 				   const char *func);
 
-#define SpinLockInit(lock) pg_atomic_init_flag(lock)
-#define SpinLockAcquire(lock) (pg_atomic_test_set_flag(lock) ? 0 : s_lock((lock), __FILE__, __LINE__, __func__))
-#define SpinLockRelease(lock) pg_atomic_clear_flag(lock)
-#define SpinLockFree(lock) pg_atomic_unlocked_test_flag(lock)
+static inline void
+SpinLockInit(volatile slock_t *lock)
+{
+#ifdef USE_ASSERT_CHECKING
+	/* Used to detect use of uninitialized spinlocks. */
+	lock->magic = SLOCKT_T_MAGIC;
+#endif
+
+	pg_atomic_init_flag(&lock->flag);
+}
+
+#define SpinLockAcquire(lock) \
+	(AssertMacro((lock)->magic == SLOCKT_T_MAGIC), \
+	 pg_atomic_test_set_flag(&(lock)->flag) ? 0 : \
+	 s_lock((lock), __FILE__, __LINE__, __func__))
+
+static inline void
+SpinLockRelease(volatile slock_t *lock)
+{
+	Assert(lock->magic == SLOCKT_T_MAGIC);
+
+	/*
+	 * Use a relaxed load to see that it's currently held.  That's OK because
+	 * we expect the calling thread to be the one that set it.
+	 */
+	Assert(!pg_atomic_unlocked_test_flag(&(lock)->flag));
+
+	pg_atomic_clear_flag(&(lock)->flag);
+}
 
 #endif							/* SPIN_H */
-- 
2.45.2

#34

Andres Freund

andres@anarazel.de

over 1 year ago

In reply to: Thomas Munro (#33)

Re: Remove last traces of HPPA support

Hi,

On 2024-08-01 10:09:07 +1200, Thomas Munro wrote:

On Thu, Aug 1, 2024 at 7:07 AM Andres Freund <andres@anarazel.de> wrote:

Note that I would like to add a user for S_LOCK_FREE(), to detect repeated
SpinLockRelease():
/messages/by-id/20240729182952.hua325647e2ggbsy@awork3.anarazel.de

What about adding a "magic" member in assertion builds? Here is my
attempt at that, in 0002.

That changes the ABI, which we don't want, because it breaks using
extensions against a differently built postgres.

I don't really see a reason to avoid having S_LOCK_FREE(), am I missing
something? Previously the semaphore fallback was a reason, but that's gone
now...

Greetings,

Andres Freund

#35

Thomas Munro

thomas.munro@gmail.com

over 1 year ago

In reply to: Andres Freund (#34)

3 attachment(s)

Re: Remove last traces of HPPA support

On Thu, Aug 1, 2024 at 10:38 AM Andres Freund <andres@anarazel.de> wrote:

On 2024-08-01 10:09:07 +1200, Thomas Munro wrote:

On Thu, Aug 1, 2024 at 7:07 AM Andres Freund <andres@anarazel.de> wrote:

Note that I would like to add a user for S_LOCK_FREE(), to detect repeated
SpinLockRelease():
/messages/by-id/20240729182952.hua325647e2ggbsy@awork3.anarazel.de

What about adding a "magic" member in assertion builds? Here is my
attempt at that, in 0002.

That changes the ABI, which we don't want, because it breaks using
extensions against a differently built postgres.

Yeah, right, bad idea. Let me think about how to do something like
what you showed, but with the atomics patch...

Hmm. One of the interesting things about the atomic_flag interface is
that it completely hides the contents of memory. (Guess: its weird
minimal interface was designed to help weird architectures like
PA-RISC, staying on topic for $SUBJECT; I doubt we'll see such a
system again but it's useful for this trick). So I guess we could
push the check down to that layer, and choose arbitrary non-zero
values for the arch-x86.h implementation of pg_atomic_flag . See
attached. Is this on the right track?

(Looking ahead, if we eventually move to using <stdatomic.h>, we won't
be able to use atomic_flag due to lack of relaxed load anyway, so we
could generalise this to atomic_char (rather than atomic_bool), and
keep using non-zero values. Presumably at that point we could also
decree that zero-initialised memory is valid for initialising our
spinlocks, but it seems useful as a defence against uninitialised
objects anyway.)

I don't really see a reason to avoid having S_LOCK_FREE(), am I missing
something? Previously the semaphore fallback was a reason, but that's gone
now...

Sure, but if it's just for assertions, we don't need it. Or any of
the S_XXX stuff.

Attachments:

v3-0001-Use-atomics-API-to-implement-spinlocks.patchtext/x-patch; charset=US-ASCII; name=v3-0001-Use-atomics-API-to-implement-spinlocks.patchDownload

From 8981ac9dcf0a76d4f75a1d2c71822579e1b18a93 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Wed, 31 Jul 2024 13:11:35 +1200
Subject: [PATCH v3 1/3] Use atomics API to implement spinlocks.

Since our spinlock API pre-dates our C11-style atomics API by decades,
it had its own hand-crafted operations written in assembler.  Use the
atomics API instead, to simplify and de-duplicate.  We couldn't have
done this earlier, because that'd be circular: atomics were simulated
with spinlocks in --disable-atomics builds.  Commit 81385261 removed
that option, so now we can delete most the system-specific spinlock code
and just redirect everything to pg_atomic_flag.

The main special knowledge embodied in the hand-crafted code was the
relaxed load of the lock value before attempting to test-and-set, while
spinning.  That is retained in simplified form in the new coding.
---
 configure                              |  22 -
 configure.ac                           |  19 -
 src/Makefile.global.in                 |   3 -
 src/backend/port/Makefile              |  12 -
 src/backend/port/meson.build           |   2 +-
 src/backend/port/tas/dummy.s           |   0
 src/backend/port/tas/sunstudio_sparc.s |  53 --
 src/backend/port/tas/sunstudio_x86.s   |  43 --
 src/backend/storage/lmgr/s_lock.c      | 128 +----
 src/include/storage/s_lock.h           | 749 -------------------------
 src/include/storage/spin.h             |  70 ++-
 src/template/linux                     |  15 -
 src/template/solaris                   |  15 -
 src/test/regress/regress.c             |  25 +-
 14 files changed, 85 insertions(+), 1071 deletions(-)
 delete mode 100644 src/backend/port/tas/dummy.s
 delete mode 100644 src/backend/port/tas/sunstudio_sparc.s
 delete mode 100644 src/backend/port/tas/sunstudio_x86.s
 delete mode 100644 src/include/storage/s_lock.h

diff --git a/configure b/configure
index 8f684f7945e..e2267837b7d 100755
--- a/configure
+++ b/configure
@@ -731,7 +731,6 @@ PKG_CONFIG_LIBDIR
 PKG_CONFIG_PATH
 PKG_CONFIG
 DLSUFFIX
-TAS
 GCC
 CPP
 CFLAGS_SL
@@ -3021,12 +3020,6 @@ $as_echo "$template" >&6; }
 PORTNAME=$template
 
 
-# Initialize default assumption that we do not need separate assembly code
-# for TAS (test-and-set).  This can be overridden by the template file
-# when it's executed.
-need_tas=no
-tas_file=dummy.s
-
 # Default, works for most platforms, override in template file if needed
 DLSUFFIX=".so"
 
@@ -7770,20 +7763,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-#
-# Set up TAS assembly code if needed; the template file has now had its
-# chance to request this.
-#
-ac_config_links="$ac_config_links src/backend/port/tas.s:src/backend/port/tas/${tas_file}"
-
-
-if test "$need_tas" = yes ; then
-  TAS=tas.o
-else
-  TAS=""
-fi
-
-
 
 cat >>confdefs.h <<_ACEOF
 #define DLSUFFIX "$DLSUFFIX"
@@ -19924,7 +19903,6 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 for ac_config_target in $ac_config_targets
 do
   case $ac_config_target in
-    "src/backend/port/tas.s") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/tas.s:src/backend/port/tas/${tas_file}" ;;
     "GNUmakefile") CONFIG_FILES="$CONFIG_FILES GNUmakefile" ;;
     "src/Makefile.global") CONFIG_FILES="$CONFIG_FILES src/Makefile.global" ;;
     "src/backend/port/pg_sema.c") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/pg_sema.c:${SEMA_IMPLEMENTATION}" ;;
diff --git a/configure.ac b/configure.ac
index 75b73532fe0..59c3b7e3d35 100644
--- a/configure.ac
+++ b/configure.ac
@@ -95,12 +95,6 @@ AC_MSG_RESULT([$template])
 PORTNAME=$template
 AC_SUBST(PORTNAME)
 
-# Initialize default assumption that we do not need separate assembly code
-# for TAS (test-and-set).  This can be overridden by the template file
-# when it's executed.
-need_tas=no
-tas_file=dummy.s
-
 # Default, works for most platforms, override in template file if needed
 DLSUFFIX=".so"
 
@@ -740,19 +734,6 @@ AC_PROG_CPP
 AC_SUBST(GCC)
 
 
-#
-# Set up TAS assembly code if needed; the template file has now had its
-# chance to request this.
-#
-AC_CONFIG_LINKS([src/backend/port/tas.s:src/backend/port/tas/${tas_file}])
-
-if test "$need_tas" = yes ; then
-  TAS=tas.o
-else
-  TAS=""
-fi
-AC_SUBST(TAS)
-
 AC_SUBST(DLSUFFIX)dnl
 AC_DEFINE_UNQUOTED([DLSUFFIX], ["$DLSUFFIX"],
                    [Define to the file name extension of dynamically-loadable modules.])
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 83b91fe9167..0301f463027 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -771,9 +771,6 @@ ifeq ($(PORTNAME),win32)
 LIBS += -lws2_32
 endif
 
-# Not really standard libc functions, used by the backend.
-TAS         = @TAS@
-
 
 ##########################################################################
 #
diff --git a/src/backend/port/Makefile b/src/backend/port/Makefile
index 47338d99229..8613ac01aff 100644
--- a/src/backend/port/Makefile
+++ b/src/backend/port/Makefile
@@ -22,7 +22,6 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = \
-	$(TAS) \
 	atomics.o \
 	pg_sema.o \
 	pg_shmem.o
@@ -33,16 +32,5 @@ endif
 
 include $(top_srcdir)/src/backend/common.mk
 
-tas.o: tas.s
-ifeq ($(SUN_STUDIO_CC), yes)
-# preprocess assembler file with cpp
-	$(CC) $(CFLAGS) -c -P $<
-	mv $*.i $*_cpp.s
-	$(CC) $(CFLAGS) -c $*_cpp.s -o $@
-else
-	$(CC) $(CFLAGS) -c $<
-endif
-
 clean:
-	rm -f tas_cpp.s
 	$(MAKE) -C win32 clean
diff --git a/src/backend/port/meson.build b/src/backend/port/meson.build
index 7820e86016d..3270ffb7030 100644
--- a/src/backend/port/meson.build
+++ b/src/backend/port/meson.build
@@ -30,4 +30,4 @@ if host_system == 'windows'
 endif
 
 # autoconf generates the file there, ensure we get a conflict
-generated_sources_ac += {'src/backend/port': ['pg_sema.c', 'pg_shmem.c', 'tas.s']}
+generated_sources_ac += {'src/backend/port': ['pg_sema.c', 'pg_shmem.c']}
diff --git a/src/backend/port/tas/dummy.s b/src/backend/port/tas/dummy.s
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/backend/port/tas/sunstudio_sparc.s b/src/backend/port/tas/sunstudio_sparc.s
deleted file mode 100644
index 3400713afd5..00000000000
--- a/src/backend/port/tas/sunstudio_sparc.s
+++ /dev/null
@@ -1,53 +0,0 @@
-!-------------------------------------------------------------------------
-!
-! sunstudio_sparc.s
-!	  compare and swap for Sun Studio on Sparc
-!
-! Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
-! Portions Copyright (c) 1994, Regents of the University of California
-!
-! IDENTIFICATION
-!	  src/backend/port/tas/sunstudio_sparc.s
-!
-!-------------------------------------------------------------------------
-
-! Fortunately the Sun compiler can process cpp conditionals with -P
-
-! '/' is the comment for x86, while '!' is the comment for Sparc
-
-#if defined(__sparcv9) || defined(__sparc)
-
-	.section        ".text"
-	.align  8
-	.skip   24
-	.align  4
-
-	.global pg_atomic_cas
-pg_atomic_cas:
-
-	! "cas" only works on sparcv9 and sparcv8plus chips, and
-	! requires a compiler targeting these CPUs.  It will fail
-	! on a compiler targeting sparcv8, and of course will not
-	! be understood by a sparcv8 CPU.  gcc continues to use
-	! "ldstub" because it targets sparcv7.
-	!
-	! There is actually a trick for embedding "cas" in a
-	! sparcv8-targeted compiler, but it can only be run
-	! on a sparcv8plus/v9 cpus:
-	!
-	!   http://cvs.opensolaris.org/source/xref/on/usr/src/lib/libc/sparc/threads/sparc.il
-	!
-	! NB: We're assuming we're running on a TSO system here - solaris
-	! userland luckily always has done so.
-
-#if defined(__sparcv9) || defined(__sparcv8plus)
-	cas     [%o0],%o2,%o1
-#else
-	ldstub [%o0],%o1
-#endif
-	mov     %o1,%o0
-	retl
-	nop
-	.type   pg_atomic_cas,2
-	.size   pg_atomic_cas,(.-pg_atomic_cas)
-#endif
diff --git a/src/backend/port/tas/sunstudio_x86.s b/src/backend/port/tas/sunstudio_x86.s
deleted file mode 100644
index b4608a9ceb2..00000000000
--- a/src/backend/port/tas/sunstudio_x86.s
+++ /dev/null
@@ -1,43 +0,0 @@
-/-------------------------------------------------------------------------
-/
-/ sunstudio_x86.s
-/	  compare and swap for Sun Studio on x86
-/
-/ Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
-/ Portions Copyright (c) 1994, Regents of the University of California
-/
-/ IDENTIFICATION
-/	  src/backend/port/tas/sunstudio_x86.s
-/
-/-------------------------------------------------------------------------
-
-/ Fortunately the Sun compiler can process cpp conditionals with -P
-
-/ '/' is the comment for x86, while '!' is the comment for Sparc
-
-	.file   "tas.s"
-
-#if defined(__amd64)
-	.code64
-#endif
-
-	.globl pg_atomic_cas
-	.type pg_atomic_cas, @function
-
-	.section .text, "ax"
-	.align 16
-
-pg_atomic_cas:
-#if defined(__amd64)
-	movl       %edx,%eax
-	lock
-	cmpxchgl   %esi,(%rdi)
-#else
-	movl    4(%esp), %edx
-	movl    8(%esp), %ecx
-	movl    12(%esp), %eax
-	lock
-	cmpxchgl %ecx, (%edx)
-#endif
-	ret
-	.size pg_atomic_cas, . - pg_atomic_cas
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index 69549a65dba..18a98b6e638 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -52,7 +52,7 @@
 
 #include "common/pg_prng.h"
 #include "port/atomics.h"
-#include "storage/s_lock.h"
+#include "storage/spin.h"
 #include "utils/wait_event.h"
 
 #define MIN_SPINS_PER_DELAY 10
@@ -93,7 +93,7 @@ s_lock_stuck(const char *file, int line, const char *func)
 }
 
 /*
- * s_lock(lock) - platform-independent portion of waiting for a spinlock.
+ * s_lock(lock) - out-of-line portion of waiting for a spinlock.
  */
 int
 s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
@@ -102,8 +102,27 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 
 	init_spin_delay(&delayStatus, file, line, func);
 
-	while (TAS_SPIN(lock))
+	for (;;)
 	{
+		bool		probably_free = true;
+
+#if defined(__i386__) || defined(__x86_64__) || \
+	defined(_M_IX86) || defined(_M_AMD64) || \
+	defined(__ppc__) || defined(__powerpc__) || \
+	defined(__ppc64__) || defined(__powerpc64__) \
+
+
+		/*
+		 * On these architectures, it is known to be more efficient to test
+		 * the lock with a relaxed load first, while spinning.
+		 */
+		probably_free = pg_atomic_unlocked_test_flag(lock);
+#endif
+
+		/* Try to get the lock. */
+		if (probably_free && pg_atomic_test_set_flag(lock))
+			break;
+
 		perform_spin_delay(&delayStatus);
 	}
 
@@ -112,14 +131,6 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 	return delayStatus.delays;
 }
 
-#ifdef USE_DEFAULT_S_UNLOCK
-void
-s_unlock(volatile slock_t *lock)
-{
-	*lock = 0;
-}
-#endif
-
 /*
  * Wait while spinning on a contended spinlock.
  */
@@ -127,7 +138,7 @@ void
 perform_spin_delay(SpinDelayStatus *status)
 {
 	/* CPU-specific delay each time through the loop */
-	SPIN_DELAY();
+	pg_spin_delay();
 
 	/* Block the process every spins_per_delay tries */
 	if (++(status->spins) >= spins_per_delay)
@@ -230,96 +241,3 @@ update_spins_per_delay(int shared_spins_per_delay)
 	 */
 	return (shared_spins_per_delay * 15 + spins_per_delay) / 16;
 }
-
-
-/*****************************************************************************/
-#if defined(S_LOCK_TEST)
-
-/*
- * test program for verifying a port's spinlock support.
- */
-
-struct test_lock_struct
-{
-	char		pad1;
-	slock_t		lock;
-	char		pad2;
-};
-
-volatile struct test_lock_struct test_lock;
-
-int
-main()
-{
-	pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
-
-	test_lock.pad1 = test_lock.pad2 = 0x44;
-
-	S_INIT_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (!S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not initialized\n");
-		return 1;
-	}
-
-	S_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not locked\n");
-		return 1;
-	}
-
-	S_UNLOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (!S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not unlocked\n");
-		return 1;
-	}
-
-	S_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not re-locked\n");
-		return 1;
-	}
-
-	printf("S_LOCK_TEST: this will print %d stars and then\n", NUM_DELAYS);
-	printf("             exit with a 'stuck spinlock' message\n");
-	printf("             if S_LOCK() and TAS() are working.\n");
-	fflush(stdout);
-
-	s_lock(&test_lock.lock, __FILE__, __LINE__, __func__);
-
-	printf("S_LOCK_TEST: failed, lock not locked\n");
-	return 1;
-}
-
-#endif							/* S_LOCK_TEST */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
deleted file mode 100644
index e94ed5f48bd..00000000000
--- a/src/include/storage/s_lock.h
+++ /dev/null
@@ -1,749 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * s_lock.h
- *	   Implementation of spinlocks.
- *
- *	NOTE: none of the macros in this file are intended to be called directly.
- *	Call them through the macros in spin.h.
- *
- *	The following hardware-dependent macros must be provided for each
- *	supported platform:
- *
- *	void S_INIT_LOCK(slock_t *lock)
- *		Initialize a spinlock (to the unlocked state).
- *
- *	int S_LOCK(slock_t *lock)
- *		Acquire a spinlock, waiting if necessary.
- *		Time out and abort() if unable to acquire the lock in a
- *		"reasonable" amount of time --- typically ~ 1 minute.
- *		Should return number of "delays"; see s_lock.c
- *
- *	void S_UNLOCK(slock_t *lock)
- *		Unlock a previously acquired lock.
- *
- *	bool S_LOCK_FREE(slock_t *lock)
- *		Tests if the lock is free. Returns true if free, false if locked.
- *		This does *not* change the state of the lock.
- *
- *	void SPIN_DELAY(void)
- *		Delay operation to occur inside spinlock wait loop.
- *
- *	Note to implementors: there are default implementations for all these
- *	macros at the bottom of the file.  Check if your platform can use
- *	these or needs to override them.
- *
- *  Usually, S_LOCK() is implemented in terms of even lower-level macros
- *	TAS() and TAS_SPIN():
- *
- *	int TAS(slock_t *lock)
- *		Atomic test-and-set instruction.  Attempt to acquire the lock,
- *		but do *not* wait.	Returns 0 if successful, nonzero if unable
- *		to acquire the lock.
- *
- *	int TAS_SPIN(slock_t *lock)
- *		Like TAS(), but this version is used when waiting for a lock
- *		previously found to be contended.  By default, this is the
- *		same as TAS(), but on some architectures it's better to poll a
- *		contended lock using an unlocked instruction and retry the
- *		atomic test-and-set only when it appears free.
- *
- *	TAS() and TAS_SPIN() are NOT part of the API, and should never be called
- *	directly.
- *
- *	CAUTION: on some platforms TAS() and/or TAS_SPIN() may sometimes report
- *	failure to acquire a lock even when the lock is not locked.  For example,
- *	on Alpha TAS() will "fail" if interrupted.  Therefore a retry loop must
- *	always be used, even if you are certain the lock is free.
- *
- *	It is the responsibility of these macros to make sure that the compiler
- *	does not re-order accesses to shared memory to precede the actual lock
- *	acquisition, or follow the lock release.  Prior to PostgreSQL 9.5, this
- *	was the caller's responsibility, which meant that callers had to use
- *	volatile-qualified pointers to refer to both the spinlock itself and the
- *	shared data being accessed within the spinlocked critical section.  This
- *	was notationally awkward, easy to forget (and thus error-prone), and
- *	prevented some useful compiler optimizations.  For these reasons, we
- *	now require that the macros themselves prevent compiler re-ordering,
- *	so that the caller doesn't need to take special precautions.
- *
- *	On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and
- *	S_UNLOCK() macros must further include hardware-level memory fence
- *	instructions to prevent similar re-ordering at the hardware level.
- *	TAS() and TAS_SPIN() must guarantee that loads and stores issued after
- *	the macro are not executed until the lock has been obtained.  Conversely,
- *	S_UNLOCK() must guarantee that loads and stores issued before the macro
- *	have been executed before the lock is released.
- *
- *	On most supported platforms, TAS() uses a tas() function written
- *	in assembly language to execute a hardware atomic-test-and-set
- *	instruction.  Equivalent OS-supplied mutex routines could be used too.
- *
- *
- * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *	  src/include/storage/s_lock.h
- *
- *-------------------------------------------------------------------------
- */
-#ifndef S_LOCK_H
-#define S_LOCK_H
-
-#ifdef FRONTEND
-#error "s_lock.h may not be included from frontend code"
-#endif
-
-#if defined(__GNUC__) || defined(__INTEL_COMPILER)
-/*************************************************************************
- * All the gcc inlines
- * Gcc consistently defines the CPU as __cpu__.
- * Other compilers use __cpu or __cpu__ so we test for both in those cases.
- */
-
-/*----------
- * Standard gcc asm format (assuming "volatile slock_t *lock"):
-
-	__asm__ __volatile__(
-		"	instruction	\n"
-		"	instruction	\n"
-		"	instruction	\n"
-:		"=r"(_res), "+m"(*lock)		// return register, in/out lock value
-:		"r"(lock)					// lock pointer, in input register
-:		"memory", "cc");			// show clobbered registers here
-
- * The output-operands list (after first colon) should always include
- * "+m"(*lock), whether or not the asm code actually refers to this
- * operand directly.  This ensures that gcc believes the value in the
- * lock variable is used and set by the asm code.  Also, the clobbers
- * list (after third colon) should always include "memory"; this prevents
- * gcc from thinking it can cache the values of shared-memory fields
- * across the asm code.  Add "cc" if your asm code changes the condition
- * code register, and also list any temp registers the code uses.
- *----------
- */
-
-
-#ifdef __i386__		/* 32-bit i386 */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res = 1;
-
-	/*
-	 * Use a non-locking test before asserting the bus lock.  Note that the
-	 * extra test appears to be a small loss on some x86 platforms and a small
-	 * win on others; it's by no means clear that we should keep it.
-	 *
-	 * When this was last tested, we didn't have separate TAS() and TAS_SPIN()
-	 * macros.  Nowadays it probably would be better to do a non-locking test
-	 * in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
-	 * testing to verify that.  Without some empirical evidence, better to
-	 * leave it alone.
-	 */
-	__asm__ __volatile__(
-		"	cmpb	$0,%1	\n"
-		"	jne		1f		\n"
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-		"1: \n"
-:		"+q"(_res), "+m"(*lock)
-:		/* no inputs */
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * This sequence is equivalent to the PAUSE instruction ("rep" is
-	 * ignored by old IA32 processors if the following instruction is
-	 * not a string operation); the IA-32 Architecture Software
-	 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
-	 * PAUSE in the inner loop of a spin lock is necessary for good
-	 * performance:
-	 *
-	 *     The PAUSE instruction improves the performance of IA-32
-	 *     processors supporting Hyper-Threading Technology when
-	 *     executing spin-wait loops and other routines where one
-	 *     thread is accessing a shared lock or semaphore in a tight
-	 *     polling loop. When executing a spin-wait loop, the
-	 *     processor can suffer a severe performance penalty when
-	 *     exiting the loop because it detects a possible memory order
-	 *     violation and flushes the core processor's pipeline. The
-	 *     PAUSE instruction provides a hint to the processor that the
-	 *     code sequence is a spin-wait loop. The processor uses this
-	 *     hint to avoid the memory order violation and prevent the
-	 *     pipeline flush. In addition, the PAUSE instruction
-	 *     de-pipelines the spin-wait loop to prevent it from
-	 *     consuming execution resources excessively.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __i386__ */
-
-
-#ifdef __x86_64__		/* AMD Opteron, Intel EM64T */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-/*
- * On Intel EM64T, it's a win to use a non-locking test before the xchg proper,
- * but only when spinning.
- *
- * See also Implementing Scalable Atomic Locks for Multi-Core Intel(tm) EM64T
- * and IA32, by Michael Chynoweth and Mary R. Lee. As of this writing, it is
- * available at:
- * http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
- */
-#define TAS_SPIN(lock)    (*(lock) ? 1 : TAS(lock))
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res = 1;
-
-	__asm__ __volatile__(
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-:		"+q"(_res), "+m"(*lock)
-:		/* no inputs */
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
-	 * Opteron, but it may be of some use on EM64T, so we keep it.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __x86_64__ */
-
-
-/*
- * On ARM and ARM64, we use __sync_lock_test_and_set(int *, int) if available.
- *
- * We use the int-width variant of the builtin because it works on more chips
- * than other widths.
- */
-#if defined(__arm__) || defined(__arm) || defined(__aarch64__)
-#ifdef HAVE_GCC__SYNC_INT32_TAS
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-/*
- * Using an ISB instruction to delay in spinlock loops appears beneficial on
- * high-core-count ARM64 processors.  It seems mostly a wash for smaller gear,
- * and ISB doesn't exist at all on pre-v7 ARM chips.
- */
-#if defined(__aarch64__)
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	__asm__ __volatile__(
-		" isb;				\n");
-}
-
-#endif	 /* __aarch64__ */
-#endif	 /* HAVE_GCC__SYNC_INT32_TAS */
-#endif	 /* __arm__ || __arm || __aarch64__ */
-
-
-/* S/390 and S/390x Linux (32- and 64-bit zSeries) */
-#if defined(__s390__) || defined(__s390x__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock)	   tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	int			_res = 0;
-
-	__asm__	__volatile__(
-		"	cs 	%0,%3,0(%2)		\n"
-:		"+d"(_res), "+m"(*lock)
-:		"a"(lock), "d"(1)
-:		"memory", "cc");
-	return _res;
-}
-
-#endif	 /* __s390__ || __s390x__ */
-
-
-#if defined(__sparc__)		/* Sparc */
-/*
- * Solaris has always run sparc processors in TSO (total store) mode, but
- * linux didn't use to and the *BSDs still don't. So, be careful about
- * acquire/release semantics. The CPU will treat superfluous members as
- * NOPs, so it's just code space.
- */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res;
-
-	/*
-	 *	See comment in src/backend/port/tas/sunstudio_sparc.s for why this
-	 *	uses "ldstub", and that file uses "cas".  gcc currently generates
-	 *	sparcv7-targeted binaries, so "cas" use isn't possible.
-	 */
-	__asm__ __volatile__(
-		"	ldstub	[%2], %0	\n"
-:		"=r"(_res), "+m"(*lock)
-:		"r"(lock)
-:		"memory");
-#if defined(__sparcv7) || defined(__sparc_v7__)
-	/*
-	 * No stbar or membar available, luckily no actually produced hardware
-	 * requires a barrier.
-	 */
-#elif defined(__sparcv8) || defined(__sparc_v8__)
-	/* stbar is available (and required for both PSO, RMO), membar isn't */
-	__asm__ __volatile__ ("stbar	 \n":::"memory");
-#else
-	/*
-	 * #LoadStore (RMO) | #LoadLoad (RMO) together are the appropriate acquire
-	 * barrier for sparcv8+ upwards.
-	 */
-	__asm__ __volatile__ ("membar #LoadStore | #LoadLoad \n":::"memory");
-#endif
-	return (int) _res;
-}
-
-#if defined(__sparcv7) || defined(__sparc_v7__)
-/*
- * No stbar or membar available, luckily no actually produced hardware
- * requires a barrier.  We fall through to the default gcc definition of
- * S_UNLOCK in this case.
- */
-#elif defined(__sparcv8) || defined(__sparc_v8__)
-/* stbar is available (and required for both PSO, RMO), membar isn't */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("stbar	 \n":::"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#else
-/*
- * #LoadStore (RMO) | #StoreStore (RMO, PSO) together are the appropriate
- * release barrier for sparcv8+ upwards.
- */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("membar #LoadStore | #StoreStore \n":::"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#endif
-
-#endif	 /* __sparc__ */
-
-
-/* PowerPC */
-#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/* On PPC, it's a win to use a non-locking test before the lwarx */
-#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
-
-/*
- * The second operand of addi can hold a constant zero or a register number,
- * hence constraint "=&b" to avoid allocating r0.  "b" stands for "address
- * base register"; most operands having this register-or-zero property are
- * address bases, e.g. the second operand of lwax.
- *
- * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
- * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
- * But if the spinlock is in ordinary memory, we can use lwsync instead for
- * better performance.
- */
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t _t;
-	int _res;
-
-	__asm__ __volatile__(
-"	lwarx   %0,0,%3,1	\n"
-"	cmpwi   %0,0		\n"
-"	bne     1f			\n"
-"	addi    %0,%0,1		\n"
-"	stwcx.  %0,0,%3		\n"
-"	beq     2f			\n"
-"1: \n"
-"	li      %1,1		\n"
-"	b       3f			\n"
-"2: \n"
-"	lwsync				\n"
-"	li      %1,0		\n"
-"3: \n"
-:	"=&b"(_t), "=r"(_res), "+m"(*lock)
-:	"r"(lock)
-:	"memory", "cc");
-	return _res;
-}
-
-/*
- * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction.
- * But we can use lwsync instead for better performance.
- */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("	lwsync \n" ::: "memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* powerpc */
-
-
-#if defined(__mips__) && !defined(__sgi)	/* non-SGI MIPS */
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/*
- * Original MIPS-I processors lacked the LL/SC instructions, but if we are
- * so unfortunate as to be running on one of those, we expect that the kernel
- * will handle the illegal-instruction traps and emulate them for us.  On
- * anything newer (and really, MIPS-I is extinct) LL/SC is the only sane
- * choice because any other synchronization method must involve a kernel
- * call.  Unfortunately, many toolchains still default to MIPS-I as the
- * codegen target; if the symbol __mips shows that that's the case, we
- * have to force the assembler to accept LL/SC.
- *
- * R10000 and up processors require a separate SYNC, which has the same
- * issues as LL/SC.
- */
-#if __mips < 2
-#define MIPS_SET_MIPS2	"       .set mips2          \n"
-#else
-#define MIPS_SET_MIPS2
-#endif
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	volatile slock_t *_l = lock;
-	int			_res;
-	int			_tmp;
-
-	__asm__ __volatile__(
-		"       .set push           \n"
-		MIPS_SET_MIPS2
-		"       .set noreorder      \n"
-		"       .set nomacro        \n"
-		"       ll      %0, %2      \n"
-		"       or      %1, %0, 1   \n"
-		"       sc      %1, %2      \n"
-		"       xori    %1, 1       \n"
-		"       or      %0, %0, %1  \n"
-		"       sync                \n"
-		"       .set pop              "
-:		"=&r" (_res), "=&r" (_tmp), "+R" (*_l)
-:		/* no inputs */
-:		"memory");
-	return _res;
-}
-
-/* MIPS S_UNLOCK is almost standard but requires a "sync" instruction */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__( \
-		"       .set push           \n" \
-		MIPS_SET_MIPS2 \
-		"       .set noreorder      \n" \
-		"       .set nomacro        \n" \
-		"       sync                \n" \
-		"       .set pop              " \
-:		/* no outputs */ \
-:		/* no inputs */	\
-:		"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* __mips__ && !__sgi */
-
-
-
-/*
- * If we have no platform-specific knowledge, but we found that the compiler
- * provides __sync_lock_test_and_set(), use that.  Prefer the int-width
- * version over the char-width version if we have both, on the rather dubious
- * grounds that that's known to be more likely to work in the ARM ecosystem.
- * (But we dealt with ARM above.)
- */
-#if !defined(HAS_TEST_AND_SET)
-
-#if defined(HAVE_GCC__SYNC_INT32_TAS)
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#elif defined(HAVE_GCC__SYNC_CHAR_TAS)
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef char slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#endif	 /* HAVE_GCC__SYNC_INT32_TAS */
-
-#endif	/* !defined(HAS_TEST_AND_SET) */
-
-
-/*
- * Default implementation of S_UNLOCK() for gcc/icc.
- *
- * Note that this implementation is unsafe for any platform that can reorder
- * a memory access (either load or store) after a following store.  That
- * happens not to be possible on x86 and most legacy architectures (some are
- * single-processor!), but many modern systems have weaker memory ordering.
- * Those that do must define their own version of S_UNLOCK() rather than
- * relying on this one.
- */
-#if !defined(S_UNLOCK)
-#define S_UNLOCK(lock)	\
-	do { __asm__ __volatile__("" : : : "memory");  *(lock) = 0; } while (0)
-#endif
-
-#endif	/* defined(__GNUC__) || defined(__INTEL_COMPILER) */
-
-
-/*
- * ---------------------------------------------------------------------
- * Platforms that use non-gcc inline assembly:
- * ---------------------------------------------------------------------
- */
-
-#if !defined(HAS_TEST_AND_SET)	/* We didn't trigger above, let's try here */
-
-/* These are in sunstudio_(sparc|x86).s */
-
-#if defined(__SUNPRO_C) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc))
-#define HAS_TEST_AND_SET
-
-#if defined(__i386) || defined(__x86_64__) || defined(__sparcv9) || defined(__sparcv8plus)
-typedef unsigned int slock_t;
-#else
-typedef unsigned char slock_t;
-#endif
-
-extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with,
-									  slock_t cmp);
-
-#define TAS(a) (pg_atomic_cas((a), 1, 0) != 0)
-#endif
-
-
-#ifdef _MSC_VER
-typedef LONG slock_t;
-
-#define HAS_TEST_AND_SET
-#define TAS(lock) (InterlockedCompareExchange(lock, 1, 0))
-
-#define SPIN_DELAY() spin_delay()
-
-/* If using Visual C++ on Win64, inline assembly is unavailable.
- * Use a _mm_pause intrinsic instead of rep nop.
- */
-#if defined(_WIN64)
-static __forceinline void
-spin_delay(void)
-{
-	_mm_pause();
-}
-#else
-static __forceinline void
-spin_delay(void)
-{
-	/* See comment for gcc code. Same code, MASM syntax */
-	__asm rep nop;
-}
-#endif
-
-#include <intrin.h>
-#pragma intrinsic(_ReadWriteBarrier)
-
-#define S_UNLOCK(lock)	\
-	do { _ReadWriteBarrier(); (*(lock)) = 0; } while (0)
-
-#endif
-
-
-#endif	/* !defined(HAS_TEST_AND_SET) */
-
-
-/* Blow up if we didn't have any way to do spinlocks */
-#ifndef HAS_TEST_AND_SET
-#error PostgreSQL does not have spinlock support on this platform.  Please report this to pgsql-bugs@lists.postgresql.org.
-#endif
-
-
-/*
- * Default Definitions - override these above as needed.
- */
-
-#if !defined(S_LOCK)
-#define S_LOCK(lock) \
-	(TAS(lock) ? s_lock((lock), __FILE__, __LINE__, __func__) : 0)
-#endif	 /* S_LOCK */
-
-#if !defined(S_LOCK_FREE)
-#define S_LOCK_FREE(lock)	(*(lock) == 0)
-#endif	 /* S_LOCK_FREE */
-
-#if !defined(S_UNLOCK)
-/*
- * Our default implementation of S_UNLOCK is essentially *(lock) = 0.  This
- * is unsafe if the platform can reorder a memory access (either load or
- * store) after a following store; platforms where this is possible must
- * define their own S_UNLOCK.  But CPU reordering is not the only concern:
- * if we simply defined S_UNLOCK() as an inline macro, the compiler might
- * reorder instructions from inside the critical section to occur after the
- * lock release.  Since the compiler probably can't know what the external
- * function s_unlock is doing, putting the same logic there should be adequate.
- * A sufficiently-smart globally optimizing compiler could break that
- * assumption, though, and the cost of a function call for every spinlock
- * release may hurt performance significantly, so we use this implementation
- * only for platforms where we don't know of a suitable intrinsic.  For the
- * most part, those are relatively obscure platform/compiler combinations to
- * which the PostgreSQL project does not have access.
- */
-#define USE_DEFAULT_S_UNLOCK
-extern void s_unlock(volatile slock_t *lock);
-#define S_UNLOCK(lock)		s_unlock(lock)
-#endif	 /* S_UNLOCK */
-
-#if !defined(S_INIT_LOCK)
-#define S_INIT_LOCK(lock)	S_UNLOCK(lock)
-#endif	 /* S_INIT_LOCK */
-
-#if !defined(SPIN_DELAY)
-#define SPIN_DELAY()	((void) 0)
-#endif	 /* SPIN_DELAY */
-
-#if !defined(TAS)
-extern int	tas(volatile slock_t *lock);		/* in port/.../tas.s, or
-												 * s_lock.c */
-
-#define TAS(lock)		tas(lock)
-#endif	 /* TAS */
-
-#if !defined(TAS_SPIN)
-#define TAS_SPIN(lock)	TAS(lock)
-#endif	 /* TAS_SPIN */
-
-
-/*
- * Platform-independent out-of-line support routines
- */
-extern int s_lock(volatile slock_t *lock, const char *file, int line, const char *func);
-
-/* Support for dynamic adjustment of spins_per_delay */
-#define DEFAULT_SPINS_PER_DELAY  100
-
-extern void set_spins_per_delay(int shared_spins_per_delay);
-extern int	update_spins_per_delay(int shared_spins_per_delay);
-
-/*
- * Support for spin delay which is useful in various places where
- * spinlock-like procedures take place.
- */
-typedef struct
-{
-	int			spins;
-	int			delays;
-	int			cur_delay;
-	const char *file;
-	int			line;
-	const char *func;
-} SpinDelayStatus;
-
-static inline void
-init_spin_delay(SpinDelayStatus *status,
-				const char *file, int line, const char *func)
-{
-	status->spins = 0;
-	status->delays = 0;
-	status->cur_delay = 0;
-	status->file = file;
-	status->line = line;
-	status->func = func;
-}
-
-#define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
-extern void perform_spin_delay(SpinDelayStatus *status);
-extern void finish_spin_delay(SpinDelayStatus *status);
-
-#endif	 /* S_LOCK_H */
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index 3ae2a56d073..326a2711f23 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -14,9 +14,11 @@
  *		Acquire a spinlock, waiting if necessary.
  *		Time out and abort() if unable to acquire the lock in a
  *		"reasonable" amount of time --- typically ~ 1 minute.
+ *		Acquire (including read barrier) semantics.
  *
  *	void SpinLockRelease(volatile slock_t *lock)
  *		Unlock a previously acquired lock.
+ *		Release (including write barrier) semantics.
  *
  *	bool SpinLockFree(slock_t *lock)
  *		Tests if the lock is free. Returns true if free, false if locked.
@@ -35,11 +37,6 @@
  *	for a CHECK_FOR_INTERRUPTS() to occur while holding a spinlock, and so
  *	it is not necessary to do HOLD/RESUME_INTERRUPTS() in these macros.
  *
- *	These macros are implemented in terms of hardware-dependent macros
- *	supplied by s_lock.h.  There is not currently any extra functionality
- *	added by this header, but there has been in the past and may someday
- *	be again.
- *
  *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -51,15 +48,68 @@
 #ifndef SPIN_H
 #define SPIN_H
 
-#include "storage/s_lock.h"
+#ifdef FRONTEND
+#error "spin.h may not be included from frontend code"
+#endif
+
+#include "port/atomics.h"
+
+/* Support for dynamic adjustment of spins_per_delay */
+#define DEFAULT_SPINS_PER_DELAY  100
+
+typedef pg_atomic_flag slock_t;
+
+/*
+ * Support for spin delay which is useful in various places where
+ * spinlock-like procedures take place.
+ */
+typedef struct
+{
+	int			spins;
+	int			delays;
+	int			cur_delay;
+	const char *file;
+	int			line;
+	const char *func;
+} SpinDelayStatus;
+
+static inline void
+init_spin_delay(SpinDelayStatus *status,
+				const char *file, int line, const char *func)
+{
+	status->spins = 0;
+	status->delays = 0;
+	status->cur_delay = 0;
+	status->file = file;
+	status->line = line;
+	status->func = func;
+}
 
+#define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
+extern void perform_spin_delay(SpinDelayStatus *status);
+extern void finish_spin_delay(SpinDelayStatus *status);
+extern void set_spins_per_delay(int shared_spins_per_delay);
+extern int	update_spins_per_delay(int shared_spins_per_delay);
 
-#define SpinLockInit(lock)	S_INIT_LOCK(lock)
+/* Out-of-line part of spinlock acquisition. */
+extern int	s_lock(volatile slock_t *lock,
+				   const char *file, int line,
+				   const char *func);
 
-#define SpinLockAcquire(lock) S_LOCK(lock)
+static inline void
+SpinLockInit(volatile slock_t *lock)
+{
+	pg_atomic_init_flag(lock);
+}
 
-#define SpinLockRelease(lock) S_UNLOCK(lock)
+#define SpinLockAcquire(lock)						\
+	(pg_atomic_test_set_flag(lock) ? 0 :			\
+	 s_lock((lock), __FILE__, __LINE__, __func__))
 
-#define SpinLockFree(lock)	S_LOCK_FREE(lock)
+static inline void
+SpinLockRelease(volatile slock_t *lock)
+{
+	pg_atomic_clear_flag(lock);
+}
 
 #endif							/* SPIN_H */
diff --git a/src/template/linux b/src/template/linux
index ec3302c4a22..2f04c1a6610 100644
--- a/src/template/linux
+++ b/src/template/linux
@@ -21,19 +21,4 @@ if test "$SUN_STUDIO_CC" = "yes" ; then
   if test "$enable_debug" != yes; then
     CFLAGS="$CFLAGS -O"		# any optimization breaks debug
   fi
-
-  # Pick the right test-and-set (TAS) code for the Sun compiler.
-  # We would like to use in-line assembler, but the compiler
-  # requires *.il files to be on every compile line, making
-  # the build system too fragile.
-  case $host_cpu in
-    sparc)
-	need_tas=yes
-	tas_file=sunstudio_sparc.s
-    ;;
-    i?86|x86_64)
-	need_tas=yes
-	tas_file=sunstudio_x86.s
-    ;;
-  esac
 fi
diff --git a/src/template/solaris b/src/template/solaris
index f88b1cdad37..f5306b3dd5b 100644
--- a/src/template/solaris
+++ b/src/template/solaris
@@ -13,19 +13,4 @@ if test "$SUN_STUDIO_CC" = yes ; then
   if test "$enable_debug" != yes; then
     CFLAGS="$CFLAGS -O"		# any optimization breaks debug
   fi
-
-  # Pick the right test-and-set (TAS) code for the Sun compiler.
-  # We would like to use in-line assembler, but the compiler
-  # requires *.il files to be on every compile line, making
-  # the build system too fragile.
-  case $host_cpu in
-    sparc)
-	need_tas=yes
-	tas_file=sunstudio_sparc.s
-    ;;
-    i?86|x86_64)
-	need_tas=yes
-	tas_file=sunstudio_x86.s
-    ;;
-  esac
 fi
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 14aad5a0c6e..0cc0bbe3b40 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -851,32 +851,9 @@ test_spinlock(void)
 		SpinLockAcquire(&struct_w_lock.lock);
 		SpinLockRelease(&struct_w_lock.lock);
 
-		/* test basic operations via underlying S_* API */
-		S_INIT_LOCK(&struct_w_lock.lock);
-		S_LOCK(&struct_w_lock.lock);
-		S_UNLOCK(&struct_w_lock.lock);
-
 		/* and that "contended" acquisition works */
 		s_lock(&struct_w_lock.lock, "testfile", 17, "testfunc");
-		S_UNLOCK(&struct_w_lock.lock);
-
-		/*
-		 * Check, using TAS directly, that a single spin cycle doesn't block
-		 * when acquiring an already acquired lock.
-		 */
-#ifdef TAS
-		S_LOCK(&struct_w_lock.lock);
-
-		if (!TAS(&struct_w_lock.lock))
-			elog(ERROR, "acquired already held spinlock");
-
-#ifdef TAS_SPIN
-		if (!TAS_SPIN(&struct_w_lock.lock))
-			elog(ERROR, "acquired already held spinlock");
-#endif							/* defined(TAS_SPIN) */
-
-		S_UNLOCK(&struct_w_lock.lock);
-#endif							/* defined(TAS) */
+		SpinLockRelease(&struct_w_lock.lock);
 
 		/*
 		 * Verify that after all of this the non-lock contents are still
-- 
2.45.2

v3-0002-Assert-that-spinlocks-are-not-double-released.patchtext/x-patch; charset=US-ASCII; name=v3-0002-Assert-that-spinlocks-are-not-double-released.patchDownload

From 3354ee5fd668b00b47920faffe454542a365932f Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Thu, 1 Aug 2024 11:00:20 +1200
Subject: [PATCH v3 2/3] Assert that spinlocks are not double-released.

---
 src/include/storage/spin.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index 326a2711f23..9414c111db8 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -109,6 +109,12 @@ SpinLockInit(volatile slock_t *lock)
 static inline void
 SpinLockRelease(volatile slock_t *lock)
 {
+	/*
+	 * Use a relaxed load to see that it's currently held.  That's OK because
+	 * we expect the calling thread to be the one that set it.
+	 */
+	Assert(!pg_atomic_unlocked_test_flag(lock));
+
 	pg_atomic_clear_flag(lock);
 }
 
-- 
2.45.2

v3-0003-Assert-that-pg_atomic_flag-is-initialized.patchtext/x-patch; charset=US-ASCII; name=v3-0003-Assert-that-pg_atomic_flag-is-initialized.patchDownload

From 8cf5d1088b22d0261423fe9e9060e6f9066c8fb5 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Thu, 1 Aug 2024 11:09:44 +1200
Subject: [PATCH v3 3/3] Assert that pg_atomic_flag is initialized.

On x86, use 1 and 2 as the clear and set values for pg_atomic_flag.
This way we can add a useful assertion that spinlocks, built on top of
pg_atomic_flag, have been initialized before use and are not relying on
zeroed memory.
---
 src/include/port/atomics/arch-x86.h | 48 +++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
index c12f8a60697..b0c4ae566a0 100644
--- a/src/include/port/atomics/arch-x86.h
+++ b/src/include/port/atomics/arch-x86.h
@@ -130,11 +130,41 @@ pg_spin_delay_impl(void)
 
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
 
+/*
+ * Note: This implementation uses non-zero values to help detect uninitialized
+ * usage on a common platform.  Note that, like the C11 atomic_flag interface
+ * that it is modeled on, pg_atomic_flag never reveals the actual values to
+ * callers and contents of memory are not specified.
+ */
+#define PG_ATOMIC_FLAG_SET 1
+#define PG_ATOMIC_FLAG_CLEAR 2
+
+static inline void
+pg_atomic_check_flag(volatile pg_atomic_flag *ptr)
+{
+#if USE_ASSERT_CHECKING
+	char		value = ptr->value;
+
+	/* Sanity check that flag has been initialized. */
+	Assert(value == PG_ATOMIC_FLAG_SET || value == PG_ATOMIC_FLAG_CLEAR);
+#endif
+}
+
+#define PG_HAVE_ATOMIC_INIT_FLAG
+static inline void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	ptr->value = PG_ATOMIC_FLAG_CLEAR;
+	__asm__ __volatile__("" ::: "memory");
+}
+
 #define PG_HAVE_ATOMIC_TEST_SET_FLAG
 static inline bool
 pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
 {
-	char		_res = 1;
+	char		_res = PG_ATOMIC_FLAG_SET;
+
+	pg_atomic_check_flag(ptr);
 
 	__asm__ __volatile__(
 		"	lock			\n"
@@ -142,19 +172,31 @@ pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
 :		"+q"(_res), "+m"(ptr->value)
 :
 :		"memory");
-	return _res == 0;
+	return _res == PG_ATOMIC_FLAG_CLEAR;
 }
 
 #define PG_HAVE_ATOMIC_CLEAR_FLAG
 static inline void
 pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
 {
+	pg_atomic_check_flag(ptr);
+
 	/*
 	 * On a TSO architecture like x86 it's sufficient to use a compiler
 	 * barrier to achieve release semantics.
 	 */
 	__asm__ __volatile__("" ::: "memory");
-	ptr->value = 0;
+	ptr->value = PG_ATOMIC_FLAG_CLEAR;
+}
+
+#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
+static inline bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_check_flag(ptr);
+
+	__asm__ __volatile__("" ::: "memory");
+	return ptr->value == PG_ATOMIC_FLAG_CLEAR;
 }
 
 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
-- 
2.45.2

#36

Thomas Munro

thomas.munro@gmail.com

over 1 year ago

In reply to: Thomas Munro (#25)

1 attachment(s)

Re: Remove last traces of HPPA support

On Tue, Jul 30, 2024 at 12:39 PM Thomas Munro <thomas.munro@gmail.com> wrote:

On Tue, Jul 30, 2024 at 11:16 AM Heikki Linnakangas <hlinnaka@iki.fi> wrote:

I think we should do:

#ifdef _M_AMD64
#define __x86_64__
#endif

somewhere, perhaps in src/include/port/win32.h.

I suppose we could define our own
PG_ARCH_{ARM,MIPS,POWER,RISCV,S390,SPARC,X86}_{32,64} in one central
place, instead. Draft patch for illustration.

Attachments:

0001-Standardize-macros-for-detecting-architectures.patchtext/x-patch; charset=US-ASCII; name=0001-Standardize-macros-for-detecting-architectures.patchDownload

From 4df24c6fe7370cdeacd4e794f4ccc6202a909e62 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Thu, 1 Aug 2024 16:38:05 +1200
Subject: [PATCH] Standardize macros for detecting architectures.

Instead of repeating multiple compilers' architecture macros throughout
the tree, detect them in one central place, and define our own macros of
the form:

  PG_ARCH_{ARM,MIPS,POWER,RISCV,S390,SPARC,X86}_{32,64}

This fixes the problem that MSVC builds were unintentionally using
suboptimal fallback code defined by "port/atomics.h", due to
inconsistent testing for macros.  A couple of other places were also
affected.

XXX This patch doesn't adjust s_lock.h, because it's complicated, full
of old dead sub-architectures, and a nearby patch proposes to delete
it...

Discussion: https://postgr.es/m/CA%2BhUKGKAf_i6w7hB_3pqZXQeqn%2BixvY%2BCMps_n%3DmJ5HAatMjMw%40mail.gmail.com
---
 contrib/pgcrypto/crypt-blowfish.c   |  4 ++--
 src/include/c.h                     | 33 +++++++++++++++++++++++++++++
 src/include/port/atomics.h          |  6 +++---
 src/include/port/atomics/arch-x86.h | 16 +++++++-------
 src/include/port/pg_bitutils.h      |  6 +++---
 src/include/port/simd.h             |  2 +-
 src/include/storage/s_lock.h        |  2 +-
 src/port/pg_crc32c_sse42.c          |  4 ++--
 8 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/contrib/pgcrypto/crypt-blowfish.c b/contrib/pgcrypto/crypt-blowfish.c
index 5a1b1e10091..9c4e02e428b 100644
--- a/contrib/pgcrypto/crypt-blowfish.c
+++ b/contrib/pgcrypto/crypt-blowfish.c
@@ -38,10 +38,10 @@
 #include "px-crypt.h"
 #include "px.h"
 
-#ifdef __i386__
+#if defined(PG_ARCH_X86_32)
 #define BF_ASM				0	/* 1 */
 #define BF_SCALE			1
-#elif defined(__x86_64__)
+#elif defined(PG_ARCH_X86_64)
 #define BF_ASM				0
 #define BF_SCALE			1
 #else
diff --git a/src/include/c.h b/src/include/c.h
index dc1841346cd..542cbd33fad 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -425,6 +425,39 @@ typedef void (*pg_funcptr_t) (void);
 #define HAVE_PRAGMA_GCC_SYSTEM_HEADER	1
 #endif
 
+/*
+ * Project-standardized name for CPU architectures, to avoid having to repeat
+ * the names that different compilers use.
+ */
+#if defined(__arm__) || defined(__arm)
+#define PG_ARCH_ARM_32
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#define PG_ARCH_ARM_64
+#elif defined(__mips__)
+#define PG_ARCH_MIPS_32
+#elif defined(__mips64__)
+#define PG_ARCH_MIPS_64
+#elif defined(__ppc__) || defined(__powerpc__)
+#define PG_ARCH_POWER_32
+#elif defined(__ppc64__) || defined(__powerpc64__)
+#define PG_ARCH_POWER_64
+#elif defined(__riscv__)
+#define PG_ARCH_RISCV_32
+#elif defined(__riscv64__)
+#define PG_ARCH_RISCV_64
+#elif defined(__s390__)
+#define PG_ARCH_S390_32
+#elif defined(__s390x__)
+#define PG_ARCH_S390_64
+#elif defined(__sparc)
+#define PG_ARCH_SPARC_32
+#elif defined(__sparcv9)
+#define PG_ARCH_SPARC_64
+#elif defined(__i386__) || defined (__i386) || defined(_M_IX86)
+#define PG_ARCH_X86_32
+#elif defined(__x86_64__) || defined(__x86_64) || defined (__amd64)
+#define PG_ARCH_X86_64
+#endif
 
 /* ----------------------------------------------------------------
  *				Section 2:	bool, true, false
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index c0c8688f736..3300ea54c17 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -63,11 +63,11 @@
  * compiler barrier.
  *
  */
-#if defined(__arm__) || defined(__arm) || defined(__aarch64__)
+#if defined(PG_ARCH_ARM_32) || defined(PG_ARCH_ARM_64)
 #include "port/atomics/arch-arm.h"
-#elif defined(__i386__) || defined(__i386) || defined(__x86_64__)
+#elif defined(PG_ARCH_X86_32) || defined(PG_ARCH_X86_64)
 #include "port/atomics/arch-x86.h"
-#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
+#elif defined(PG_ARCH_POWER_32) || defined (PG_ARCH_POWER_64)
 #include "port/atomics/arch-ppc.h"
 #endif
 
diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
index c12f8a60697..9f20edf221f 100644
--- a/src/include/port/atomics/arch-x86.h
+++ b/src/include/port/atomics/arch-x86.h
@@ -32,10 +32,10 @@
  */
 
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
-#if defined(__i386__) || defined(__i386)
+#if defined(PG_ARCH_X86_32)
 #define pg_memory_barrier_impl()		\
 	__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
-#elif defined(__x86_64__)
+#elif defined(PG_ARCH_X86_64)
 #define pg_memory_barrier_impl()		\
 	__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")
 #endif
@@ -67,14 +67,14 @@ typedef struct pg_atomic_uint32
  * It's too complicated to write inline asm for 64bit types on 32bit and the
  * 486 can't do it anyway.
  */
-#ifdef __x86_64__
+#ifdef PG_ARCH_X86_64
 #define PG_HAVE_ATOMIC_U64_SUPPORT
 typedef struct pg_atomic_uint64
 {
 	/* alignment guaranteed due to being on a 64bit platform */
 	volatile uint64 value;
 } pg_atomic_uint64;
-#endif	/* __x86_64__ */
+#endif	/* PG_ARCH_X86_64 */
 
 #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
 
@@ -109,7 +109,7 @@ pg_spin_delay_impl(void)
 {
 	__asm__ __volatile__(" rep; nop			\n");
 }
-#elif defined(_MSC_VER) && defined(__x86_64__)
+#elif defined(_MSC_VER) && defined(PG_ARCH_X86_64)
 #define PG_HAVE_SPIN_DELAY
 static __forceinline void
 pg_spin_delay_impl(void)
@@ -192,7 +192,7 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
 	return res;
 }
 
-#ifdef __x86_64__
+#ifdef PG_ARCH_X86_64
 
 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
 static inline bool
@@ -231,7 +231,7 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 	return res;
 }
 
-#endif /* __x86_64__ */
+#endif /* PG_ARCH_X86_64 */
 
 #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
 
@@ -241,6 +241,6 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
  */
 #if defined(__i568__) || defined(__i668__) || /* gcc i586+ */  \
 	(defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \
-	defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
+	defined(PG_ARCH_X86_64)
 #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
 #endif /* 8 byte single-copy atomicity */
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index 4d88478c9c2..39a756b7638 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -82,7 +82,7 @@ pg_leftmost_one_pos64(uint64 word)
 #error must have a working 64-bit integer datatype
 #endif							/* HAVE_LONG_INT_64 */
 
-#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && (defined(PG_ARCH_ARM_64) || defined(PG_ARCH_X86_64))
 	unsigned long result;
 	bool		non_zero;
 
@@ -155,7 +155,7 @@ pg_rightmost_one_pos64(uint64 word)
 #error must have a working 64-bit integer datatype
 #endif							/* HAVE_LONG_INT_64 */
 
-#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && (defined(PG_ARCH_ARM_64) || defined(PG_ARCH_X86_64))
 	unsigned long result;
 	bool		non_zero;
 
@@ -282,7 +282,7 @@ pg_ceil_log2_64(uint64 num)
  * __builtin_popcount* intrinsic functions as they always emit popcnt
  * instructions.
  */
-#if defined(_MSC_VER) && defined(_M_AMD64)
+#if defined(_MSC_VER) && defined(PG_ARCH_X86_64)
 #define HAVE_X86_64_POPCNTQ
 #endif
 
diff --git a/src/include/port/simd.h b/src/include/port/simd.h
index 597496f2fb7..8c3555707e1 100644
--- a/src/include/port/simd.h
+++ b/src/include/port/simd.h
@@ -18,7 +18,7 @@
 #ifndef SIMD_H
 #define SIMD_H
 
-#if (defined(__x86_64__) || defined(_M_AMD64))
+#ifdef PG_ARCH_X86_64
 /*
  * SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume
  * that compilers targeting this architecture understand SSE2 intrinsics.
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index e94ed5f48bd..3b9749e6a4b 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -385,7 +385,7 @@ do \
 
 
 /* PowerPC */
-#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
+#if defined(PG_ARCH_POWER_32) || defined(PG_ARCH_POWER_64)
 #define HAS_TEST_AND_SET
 
 typedef unsigned int slock_t;
diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c
index 7f88c114800..30c008ce4f4 100644
--- a/src/port/pg_crc32c_sse42.c
+++ b/src/port/pg_crc32c_sse42.c
@@ -32,7 +32,7 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
 	 * and performance testing didn't show any performance gain from aligning
 	 * the begin address.
 	 */
-#ifdef __x86_64__
+#ifdef PG_ARCH_X86_64
 	while (p + 8 <= pend)
 	{
 		crc = (uint32) _mm_crc32_u64(crc, *((const uint64 *) p));
@@ -56,7 +56,7 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
 		crc = _mm_crc32_u32(crc, *((const unsigned int *) p));
 		p += 4;
 	}
-#endif							/* __x86_64__ */
+#endif							/* PG_ARCH_X86_64 */
 
 	/* Process any remaining bytes one at a time. */
 	while (p < pend)
-- 
2.45.2