From ea80fc0c988b1d51816b2fd4b9d2bba2e7aead05 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Wed, 6 Nov 2013 10:32:53 +0100
Subject: [PATCH 2/4] Very basic atomic ops implementation

Only gcc has been tested, stub implementations for
* msvc
* HUPX acc
* IBM xlc
* Sun/Oracle Sunpro compiler
are included.

All implementations only provide the bare minimum of operations for
now and rely on emulating more complex operations via
compare_and_swap().

Most of s_lock.h is removed and replaced by usage of the atomics ops.
---
 config/c-compiler.m4                             |  77 ++
 configure.in                                     |  19 +-
 src/backend/storage/lmgr/Makefile                |   2 +-
 src/backend/storage/lmgr/atomics.c               |  26 +
 src/backend/storage/lmgr/spin.c                  |   8 -
 src/include/c.h                                  |   7 +
 src/include/storage/atomics-arch-alpha.h         |  25 +
 src/include/storage/atomics-arch-amd64.h         |  56 ++
 src/include/storage/atomics-arch-arm.h           |  29 +
 src/include/storage/atomics-arch-hppa.h          |  17 +
 src/include/storage/atomics-arch-i386.h          |  96 +++
 src/include/storage/atomics-arch-ia64.h          |  27 +
 src/include/storage/atomics-arch-ppc.h           |  25 +
 src/include/storage/atomics-generic-acc.h        |  99 +++
 src/include/storage/atomics-generic-gcc.h        | 174 +++++
 src/include/storage/atomics-generic-msvc.h       |  67 ++
 src/include/storage/atomics-generic-sunpro.h     |  66 ++
 src/include/storage/atomics-generic-xlc.h        |  84 +++
 src/include/storage/atomics-generic.h            | 402 +++++++++++
 src/include/storage/atomics.h                    | 543 ++++++++++++++
 src/include/storage/barrier.h                    | 137 +---
 src/include/storage/s_lock.h                     | 880 +----------------------
 src/test/regress/expected/lock.out               |   8 +
 src/test/regress/input/create_function_1.source  |   5 +
 src/test/regress/output/create_function_1.source |   4 +
 src/test/regress/regress.c                       | 213 ++++++
 src/test/regress/sql/lock.sql                    |   5 +
 27 files changed, 2078 insertions(+), 1023 deletions(-)
 create mode 100644 src/backend/storage/lmgr/atomics.c
 create mode 100644 src/include/storage/atomics-arch-alpha.h
 create mode 100644 src/include/storage/atomics-arch-amd64.h
 create mode 100644 src/include/storage/atomics-arch-arm.h
 create mode 100644 src/include/storage/atomics-arch-hppa.h
 create mode 100644 src/include/storage/atomics-arch-i386.h
 create mode 100644 src/include/storage/atomics-arch-ia64.h
 create mode 100644 src/include/storage/atomics-arch-ppc.h
 create mode 100644 src/include/storage/atomics-generic-acc.h
 create mode 100644 src/include/storage/atomics-generic-gcc.h
 create mode 100644 src/include/storage/atomics-generic-msvc.h
 create mode 100644 src/include/storage/atomics-generic-sunpro.h
 create mode 100644 src/include/storage/atomics-generic-xlc.h
 create mode 100644 src/include/storage/atomics-generic.h
 create mode 100644 src/include/storage/atomics.h

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 4ba3236..c24cb59 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -289,3 +289,80 @@ if test x"$Ac_cachevar" = x"yes"; then
 fi
 undefine([Ac_cachevar])dnl
 ])# PGAC_PROG_CC_LDFLAGS_OPT
+
+AC_DEFUN([PGAC_HAVE_GCC__SYNC_INT32_ATOMICS],
+[AC_CACHE_CHECK(for __builtin_constant_p, pgac_cv__builtin_constant_p,
+[AC_TRY_COMPILE([static int x; static int y[__builtin_constant_p(x) ? x : 1];],
+[],
+[pgac_cv__builtin_constant_p=yes],
+[pgac_cv__builtin_constant_p=no])])
+if test x"$pgac_cv__builtin_constant_p" = xyes ; then
+AC_DEFINE(HAVE__BUILTIN_CONSTANT_P, 1,
+          [Define to 1 if your compiler understands __builtin_constant_p.])
+fi])# PGAC_C_BUILTIN_CONSTANT_P
+
+# PGAC_HAVE_GCC__SYNC_INT32_TAS
+# -------------------------
+# Check if the C compiler understands __sync_lock_test_and_set(),
+# and define HAVE_GCC__SYNC_INT32_TAS
+#
+# NB: There are platforms where test_and_set is available but compare_and_swap
+# is not, so test this separa
+AC_DEFUN([PGAC_HAVE_GCC__SYNC_INT32_TAS],
+[AC_CACHE_CHECK(for builtin locking functions, pgac_cv_gcc_sync_int32_tas,
+[AC_TRY_LINK([],
+  [int lock = 0;
+   __sync_lock_test_and_set(&lock, 1);
+   __sync_lock_release(&lock);],
+  [pgac_cv_gcc_sync_int32_tas="yes"],
+  [pgac_cv_gcc_sync_int32_tas="no"])])
+if test x"$pgac_cv_gcc_sync_int32_tas" = x"yes"; then
+  AC_DEFINE(HAVE_GCC__SYNC_INT32_TAS, 1, [Define to 1 if you have __sync_lock_test_and_set(int *) and friends.])
+fi])# PGAC_HAVE_GCC__SYNC_INT32_TAS
+
+# PGAC_HAVE_GCC__SYNC_INT32_CAS
+# -------------------------
+# Check if the C compiler understands __sync_compare_and_swap() for 32bit
+# types, and define HAVE_GCC__SYNC_INT32_CAS if so.
+AC_DEFUN([PGAC_HAVE_GCC__SYNC_INT32_CAS],
+[AC_CACHE_CHECK(for builtin __sync int32 atomic operations, pgac_cv_gcc_sync_int32_cas,
+[AC_TRY_LINK([],
+  [int val = 0;
+   __sync_val_compare_and_swap(&val, 0, 37);],
+  [pgac_cv_gcc_sync_int32_cas="yes"],
+  [pgac_cv_gcc_sync_int32_cas="no"])])
+if test x"$pgac_cv_gcc_sync_int32_cas" = x"yes"; then
+  AC_DEFINE(HAVE_GCC__SYNC_INT32_CAS, 1, [Define to 1 if you have __sync_compare_and_swap(int *, int, int).])
+fi])# PGAC_HAVE_GCC__SYNC_INT32_CAS
+
+# PGAC_HAVE_GCC__SYNC_INT64_CAS
+# -------------------------
+# Check if the C compiler understands __sync_compare_and_swap() for 64bit
+# types, and define HAVE_GCC__SYNC_INT64_CAS if so.
+AC_DEFUN([PGAC_HAVE_GCC__SYNC_INT64_CAS],
+[AC_CACHE_CHECK(for builtin __sync int64 atomic operations, pgac_cv_gcc_sync_int64_cas,
+[AC_TRY_LINK([],
+  [PG_INT64_TYPE lock = 0;
+   __sync_val_compare_and_swap(&lock, 0, (PG_INT64_TYPE) 37);],
+  [pgac_cv_gcc_sync_int64_cas="yes"],
+  [pgac_cv_gcc_sync_int64_cas="no"])])
+if test x"$pgac_cv_gcc_sync_int64_cas" = x"yes"; then
+  AC_DEFINE(HAVE_GCC__SYNC_INT64_CAS, 1, [Define to 1 if you have __sync_compare_and_swap(int64 *, int64, int64).])
+fi])# PGAC_HAVE_GCC__SYNC_INT64_CAS
+
+# PGAC_HAVE_GCC__ATOMIC_INT32_CAS
+# -------------------------
+
+# Check if the C compiler understands __atomic_compare_exchange_n() for 32bit
+# types, and define HAVE_GCC__ATOMIC_INT32_CAS if so.
+AC_DEFUN([PGAC_HAVE_GCC__ATOMIC_INT32_CAS],
+[AC_CACHE_CHECK(for builtin __atomic int32 atomic operations, pgac_cv_gcc_atomic_int32_cas,
+[AC_TRY_LINK([],
+  [int val = 0;
+   int expect = 0;
+   __atomic_compare_exchange_n(&val, &expect, 37, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);],
+  [pgac_cv_gcc_atomic_int32_cas="yes"],
+  [pgac_cv_gcc_atomic_int32_cas="no"])])
+if test x"$pgac_cv_gcc_atomic_int32_cas" = x"yes"; then
+  AC_DEFINE(HAVE_GCC__ATOMIC_INT32_CAS, 1, [Define to 1 if you have __atomic_compare_exchange_n(int *, int *, int).])
+fi])# PGAC_HAVE_GCC__ATOMIC_INT32_CAS
diff --git a/configure.in b/configure.in
index 304399e..1fab189 100644
--- a/configure.in
+++ b/configure.in
@@ -19,7 +19,7 @@ m4_pattern_forbid(^PGAC_)dnl to catch undefined macros
 
 AC_INIT([PostgreSQL], [9.4devel], [pgsql-bugs@postgresql.org])
 
-m4_if(m4_defn([m4_PACKAGE_VERSION]), [2.63], [], [m4_fatal([Autoconf version 2.63 is required.
+m4_if(m4_defn([m4_PACKAGE_VERSION]), [2.69], [], [m4_fatal([Autoconf version 2.63 is required.
 Untested combinations of 'autoconf' and PostgreSQL versions are not
 recommended.  You can remove the check from 'configure.in' but it is then
 your responsibility whether the result works or not.])])
@@ -1453,17 +1453,6 @@ fi
 AC_CHECK_FUNCS([strtoll strtoq], [break])
 AC_CHECK_FUNCS([strtoull strtouq], [break])
 
-AC_CACHE_CHECK([for builtin locking functions], pgac_cv_gcc_int_atomics,
-[AC_TRY_LINK([],
-  [int lock = 0;
-   __sync_lock_test_and_set(&lock, 1);
-   __sync_lock_release(&lock);],
-  [pgac_cv_gcc_int_atomics="yes"],
-  [pgac_cv_gcc_int_atomics="no"])])
-if test x"$pgac_cv_gcc_int_atomics" = x"yes"; then
-  AC_DEFINE(HAVE_GCC_INT_ATOMICS, 1, [Define to 1 if you have __sync_lock_test_and_set(int *) and friends.])
-fi
-
 # Lastly, restore full LIBS list and check for readline/libedit symbols
 LIBS="$LIBS_including_readline"
 
@@ -1738,6 +1727,12 @@ AC_CHECK_TYPES([int8, uint8, int64, uint64], [], [],
 # C, but is missing on some old platforms.
 AC_CHECK_TYPES(sig_atomic_t, [], [], [#include <signal.h>])
 
+# Check for various atomic operations now that we have checked how to declare
+# 64bit integers.
+PGAC_HAVE_GCC__SYNC_INT32_TAS
+PGAC_HAVE_GCC__SYNC_INT32_CAS
+PGAC_HAVE_GCC__SYNC_INT64_CAS
+PGAC_HAVE_GCC__ATOMIC_INT32_CAS
 
 if test "$PORTNAME" != "win32"
 then
diff --git a/src/backend/storage/lmgr/Makefile b/src/backend/storage/lmgr/Makefile
index e12a854..be95d50 100644
--- a/src/backend/storage/lmgr/Makefile
+++ b/src/backend/storage/lmgr/Makefile
@@ -12,7 +12,7 @@ subdir = src/backend/storage/lmgr
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o predicate.o
+OBJS = atomics.o lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o predicate.o
 
 include $(top_srcdir)/src/backend/common.mk
 
diff --git a/src/backend/storage/lmgr/atomics.c b/src/backend/storage/lmgr/atomics.c
new file mode 100644
index 0000000..af95153
--- /dev/null
+++ b/src/backend/storage/lmgr/atomics.c
@@ -0,0 +1,26 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics.c
+ *	   Non-Inline parts of the atomics implementation
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/storage/lmgr/atomics.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+/*
+ * We want the functions below to be inline; but if the compiler doesn't
+ * support that, fall back on providing them as regular functions.	See
+ * STATIC_IF_INLINE in c.h.
+ */
+#define ATOMICS_INCLUDE_DEFINITIONS
+
+#include "storage/atomics.h"
+
+#ifdef PG_USE_ATOMICS_EMULATION
+#endif
diff --git a/src/backend/storage/lmgr/spin.c b/src/backend/storage/lmgr/spin.c
index f054be8..469dd84 100644
--- a/src/backend/storage/lmgr/spin.c
+++ b/src/backend/storage/lmgr/spin.c
@@ -86,14 +86,6 @@ s_unlock_sema(volatile slock_t *lock)
 	PGSemaphoreUnlock((PGSemaphore) lock);
 }
 
-bool
-s_lock_free_sema(volatile slock_t *lock)
-{
-	/* We don't currently use S_LOCK_FREE anyway */
-	elog(ERROR, "spin.c does not support S_LOCK_FREE()");
-	return false;
-}
-
 int
 tas_sema(volatile slock_t *lock)
 {
diff --git a/src/include/c.h b/src/include/c.h
index 6e19c6d..347f394 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -866,6 +866,13 @@ typedef NameData *Name;
 #define STATIC_IF_INLINE
 #endif   /* PG_USE_INLINE */
 
+#ifdef PG_USE_INLINE
+#define STATIC_IF_INLINE_DECLARE static inline
+#else
+#define STATIC_IF_INLINE_DECLARE extern
+#endif   /* PG_USE_INLINE */
+
+
 /* ----------------------------------------------------------------
  *				Section 8:	random stuff
  * ----------------------------------------------------------------
diff --git a/src/include/storage/atomics-arch-alpha.h b/src/include/storage/atomics-arch-alpha.h
new file mode 100644
index 0000000..06b7125
--- /dev/null
+++ b/src/include/storage/atomics-arch-alpha.h
@@ -0,0 +1,25 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-alpha.h
+ *	  Atomic operations considerations specific to Alpha
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-alpha.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#if defined(__GNUC__)
+/*
+ * Unlike all other known architectures, Alpha allows dependent reads to be
+ * reordered, but we don't currently find it necessary to provide a conditional
+ * read barrier to cover that case.  We might need to add that later.
+ */
+#define pg_memory_barrier()		__asm__ __volatile__ ("mb" : : : "memory")
+#define pg_read_barrier()		__asm__ __volatile__ ("rmb" : : : "memory")
+#define pg_write_barrier()		__asm__ __volatile__ ("wmb" : : : "memory")
+#endif
diff --git a/src/include/storage/atomics-arch-amd64.h b/src/include/storage/atomics-arch-amd64.h
new file mode 100644
index 0000000..90f4abc
--- /dev/null
+++ b/src/include/storage/atomics-arch-amd64.h
@@ -0,0 +1,56 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-amd64.h
+ *	  Atomic operations considerations specific to intel/amd x86-64
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-amd64.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * x86_64 has similar ordering characteristics to i386.
+ *
+ * Technically, some x86-ish chips support uncached memory access and/or
+ * special instructions that are weakly ordered.  In those cases we'd need
+ * the read and write barriers to be lfence and sfence.  But since we don't
+ * do those things, a compiler barrier should be enough.
+ */
+#if defined(__INTEL_COMPILER)
+#	define pg_memory_barrier_impl()		_mm_mfence()
+#elif defined(__GNUC__)
+#	define pg_memory_barrier_impl()		\
+	__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory")
+#endif
+
+#define pg_read_barrier_impl()		pg_compiler_barrier()
+#define pg_write_barrier_impl()		pg_compiler_barrier()
+
+#if defined(__GNUC__)
+
+#ifndef PG_HAS_SPIN_DELAY
+#define PG_HAS_SPIN_DELAY
+static __inline__ void
+pg_spin_delay_impl(void)
+{
+	/*
+	 * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
+	 * Opteron, but it may be of some use on EM64T, so we keep it.
+	 */
+	__asm__ __volatile__(
+		" rep; nop			\n");
+}
+#endif
+
+#elif defined(WIN32_ONLY_COMPILER)
+static __forceinline void
+pg_spin_delay_impl(void)
+{
+	_mm_pause();
+}
+#endif
diff --git a/src/include/storage/atomics-arch-arm.h b/src/include/storage/atomics-arch-arm.h
new file mode 100644
index 0000000..a00f8f5
--- /dev/null
+++ b/src/include/storage/atomics-arch-arm.h
@@ -0,0 +1,29 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-arm.h
+ *	  Atomic operations considerations specific to ARM
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-arm.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#error "should be included via atomics.h"
+#endif
+
+
+/*
+ * FIXME: Disable 32bit atomics for ARMs before v6 or error out
+ */
+
+/*
+ * 64 bit atomics on arm are implemented using kernel fallbacks and might be
+ * slow, so disable entirely for now.
+ */
+#define PG_DISABLE_64_BIT_ATOMICS
diff --git a/src/include/storage/atomics-arch-hppa.h b/src/include/storage/atomics-arch-hppa.h
new file mode 100644
index 0000000..8913801
--- /dev/null
+++ b/src/include/storage/atomics-arch-hppa.h
@@ -0,0 +1,17 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-alpha.h
+ *	  Atomic operations considerations specific to HPPA
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-hppa.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* HPPA doesn't do either read or write reordering */
+#define pg_memory_barrier_impl()		pg_compiler_barrier_impl()
diff --git a/src/include/storage/atomics-arch-i386.h b/src/include/storage/atomics-arch-i386.h
new file mode 100644
index 0000000..151a14f
--- /dev/null
+++ b/src/include/storage/atomics-arch-i386.h
@@ -0,0 +1,96 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-i386.h
+ *	  Atomic operations considerations specific to intel x86
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-i386.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * i386 does not allow loads to be reordered with other loads, or stores to be
+ * reordered with other stores, but a load can be performed before a subsequent
+ * store.
+ *
+ * "lock; addl" has worked for longer than "mfence".
+ */
+
+#if defined(__INTEL_COMPILER)
+#define pg_memory_barrier_impl()		_mm_mfence()
+#elif defined(__GNUC__)
+#define pg_memory_barrier_impl()		\
+	__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory")
+#endif
+
+#define pg_read_barrier_impl()		pg_compiler_barrier()
+#define pg_write_barrier_impl()		pg_compiler_barrier()
+
+#if defined(__GNUC__)
+
+/*
+ * Implement TAS if not available as a __sync builtin on very old gcc
+ * versions.
+ */
+#ifndef HAVE_GCC__SYNC_INT32_TAS
+static __inline__ int
+tas(volatile slock_t *lock)
+	register slock_t _res = 1;
+
+	__asm__ __volatile__(
+		"	lock			\n"
+		"	xchgb	%0,%1	\n"
+:		"+q"(_res), "+m"(*lock)
+:
+:		"memory", "cc");
+	return (int) _res;
+}
+#endif
+
+
+#ifndef PG_HAS_SPIN_DELAY
+#define PG_HAS_SPIN_DELAY
+static __inline__ void
+pg_spin_delay_impl(void)
+{
+	/*
+	 * This sequence is equivalent to the PAUSE instruction ("rep" is
+	 * ignored by old IA32 processors if the following instruction is
+	 * not a string operation); the IA-32 Architecture Software
+	 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
+	 * PAUSE in the inner loop of a spin lock is necessary for good
+	 * performance:
+	 *
+	 *     The PAUSE instruction improves the performance of IA-32
+	 *     processors supporting Hyper-Threading Technology when
+	 *     executing spin-wait loops and other routines where one
+	 *     thread is accessing a shared lock or semaphore in a tight
+	 *     polling loop. When executing a spin-wait loop, the
+	 *     processor can suffer a severe performance penalty when
+	 *     exiting the loop because it detects a possible memory order
+	 *     violation and flushes the core processor's pipeline. The
+	 *     PAUSE instruction provides a hint to the processor that the
+	 *     code sequence is a spin-wait loop. The processor uses this
+	 *     hint to avoid the memory order violation and prevent the
+	 *     pipeline flush. In addition, the PAUSE instruction
+	 *     de-pipelines the spin-wait loop to prevent it from
+	 *     consuming execution resources excessively.
+	 */
+	__asm__ __volatile__(
+		" rep; nop			\n");
+}
+#endif
+
+#elif defined(WIN32_ONLY_COMPILER)
+static __forceinline void
+pg_spin_delay_impl(void)
+{
+	/* See comment for gcc code. Same code, MASM syntax */
+	__asm rep nop;
+}
+#endif
diff --git a/src/include/storage/atomics-arch-ia64.h b/src/include/storage/atomics-arch-ia64.h
new file mode 100644
index 0000000..2863431
--- /dev/null
+++ b/src/include/storage/atomics-arch-ia64.h
@@ -0,0 +1,27 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-ia64.h
+ *	  Atomic operations considerations specific to intel itanium
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-ia64.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Itanium is weakly ordered, so read and write barriers require a full
+ * fence.
+ */
+#if defined(__INTEL_COMPILER)
+#	define pg_memory_barrier_impl()		__mf()
+#elif defined(__GNUC__)
+#	define pg_memory_barrier_impl()		__asm__ __volatile__ ("mf" : : : "memory")
+#elif defined(__hpux)
+#	define pg_compiler_barrier_impl()	_Asm_sched_fence()
+#	define pg_memory_barrier_impl()		_Asm_mf()
+#endif
diff --git a/src/include/storage/atomics-arch-ppc.h b/src/include/storage/atomics-arch-ppc.h
new file mode 100644
index 0000000..f785c99
--- /dev/null
+++ b/src/include/storage/atomics-arch-ppc.h
@@ -0,0 +1,25 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-ppc.h
+ *	  Atomic operations considerations specific to PowerPC
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-ppc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#if defined(__GNUC__)
+/*
+ * lwsync orders loads with respect to each other, and similarly with stores.
+ * But a load can be performed before a subsequent store, so sync must be used
+ * for a full memory barrier.
+ */
+#define pg_memory_barrier_impl()	__asm__ __volatile__ ("sync" : : : "memory")
+#define pg_read_barrier_impl()		__asm__ __volatile__ ("lwsync" : : : "memory")
+#define pg_write_barrier_impl()		__asm__ __volatile__ ("lwsync" : : : "memory")
+#endif
diff --git a/src/include/storage/atomics-generic-acc.h b/src/include/storage/atomics-generic-acc.h
new file mode 100644
index 0000000..b1c9200
--- /dev/null
+++ b/src/include/storage/atomics-generic-acc.h
@@ -0,0 +1,99 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-generic-acc.h
+ *	  Atomic operations support when using HPs acc on HPUX
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * inline assembly for Itanium-based HP-UX:
+ *   http://h21007.www2.hp.com/portal/download/files/unprot/Itanium/inline_assem_ERS.pdf
+ * * Implementing Spinlocks on the Intel ® Itanium ® Architecture and PA-RISC
+ *   http://h21007.www2.hp.com/portal/download/files/unprot/itanium/spinlocks.pdf
+ *
+ * Itanium only supports a small set of numbers (6, -8, -4, -1, 1, 4, 8, 16)
+ * for atomic add/sub, so we just implement everything but compare_exchange via
+ * the compare_exchange fallbacks in atomics-generic.h.
+ *
+ * src/include/storage/atomics-generic-acc.h
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include <machine/sys/inline.h>
+
+/* IA64 always has 32/64 bit atomics */
+
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+typedef pg_atomic_uint32 pg_atomic_flag;
+
+typedef struct pg_atomic_uint64
+{
+	volatile uint64 value;
+} pg_atomic_uint64;
+
+/* declare outside the defined(PG_USE_INLINE) for visibility */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+
+
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+#define MINOR_FENCE (_Asm_fence) (_UP_CALL_FENCE | _UP_SYS_FENCE | \
+								 _DOWN_CALL_FENCE | _DOWN_SYS_FENCE )
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint32	current;
+
+	_Asm_mov_to_ar(_AREG_CCV, *expected, MINOR_FENCE);
+	/*
+	 * We want a barrier, not just release/acquire semantics.
+	 */
+	_Asm_mf();
+	/*
+	 * Notes:
+	 * DOWN_MEM_FENCE | _UP_MEM_FENCE prevents reordering by the compiler
+	 */
+	current =  _Asm_cmpxchg(_SZ_W, /* word */
+							_SEM_REL,
+							&ptr->value,
+							newval, _LDHINT_NONE,
+							_DOWN_MEM_FENCE | _UP_MEM_FENCE);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+
+	_Asm_mov_to_ar(_AREG_CCV, *expected, MINOR_FENCE);
+	_Asm_mf();
+	current =  _Asm_cmpxchg(_SZ_D, /* doubleword */
+							_SEM_REL,
+							&ptr->value,
+							newval, _LDHINT_NONE,
+							_DOWN_MEM_FENCE | _UP_MEM_FENCE);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#undef MINOR_FENCE
+
+#endif /* defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS) */
diff --git a/src/include/storage/atomics-generic-gcc.h b/src/include/storage/atomics-generic-gcc.h
new file mode 100644
index 0000000..0cc0b16
--- /dev/null
+++ b/src/include/storage/atomics-generic-gcc.h
@@ -0,0 +1,174 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-generic-gcc.h
+ *	  Atomic operations, implemented using gcc (or compatible) intrinsics.
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * Legacy __sync Built-in Functions for Atomic Memory Access
+ *   http://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fsync-Builtins.html
+ * * Built-in functions for memory model aware atomic operations
+ *   http://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fatomic-Builtins.html
+ *
+ * src/include/storage/atomics-generic-gcc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#error "should be included via atomics.h"
+#endif
+
+#define pg_compiler_barrier_impl()	__asm__ __volatile__("" ::: "memory");
+
+/*
+ * If we're on GCC 4.1.0 or higher, we should be able to get a memory
+ * barrier out of this compiler built-in.  But we prefer to rely on our
+ * own definitions where possible, and use this only as a fallback.
+ */
+#if !defined(pg_memory_barrier_impl)
+#	if defined(HAVE_GCC__PG_ATOMIC_INT64_CAS)
+#		define pg_memory_barrier_impl()		__atomic_thread_fence(__ATOMIC_SEQ_CST)
+#	elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
+#		define pg_memory_barrier_impl()		__sync_synchronize()
+#	endif
+#endif /* !defined(pg_memory_barrier_impl) */
+
+#if !defined(pg_read_barrier_impl) && defined(HAVE_GCC__PG_ATOMIC_INT64_CAS)
+#		define pg_read_barrier_impl()		__atomic_thread_fence(__ATOMIC_ACQUIRE)
+#endif
+
+#if !defined(pg_write_barrier_impl) && defined(HAVE_GCC__PG_ATOMIC_INT64_CAS)
+#		define pg_write_barrier_impl()		__atomic_thread_fence(__ATOMIC_RELEASE)
+#endif
+
+#ifdef HAVE_GCC__SYNC_INT32_TAS
+typedef struct pg_atomic_flag
+{
+	volatile uint32 value;
+} pg_atomic_flag;
+
+#define PG_ATOMIC_INIT_FLAG(flag) {0}
+#endif
+
+/* declare outside the defined(PG_USE_INLINE) for visibility */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+#ifdef HAVE_GCC__PG_ATOMIC_INT64_CAS
+/* declare outside the defined(PG_USE_INLINE) for visibility */
+#	define PG_HAVE_PG_ATOMIC_COMPARE_EXCHANGE_U64
+typedef struct pg_atomic_uint64
+{
+	volatile uint64 value;
+} pg_atomic_uint64;
+#endif /* HAVE_GCC__PG_ATOMIC_INT64_CAS */
+
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+#if !defined(PG_HAS_ATOMIC_TEST_SET_FLAG) && defined(HAVE_GCC__SYNC_INT32_TAS)
+#define PG_HAS_ATOMIC_TEST_SET_FLAG
+STATIC_IF_INLINE bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	uint32 ret;
+	/* XXX: only a acquire barrier, make it a full one for now */
+	pg_memory_barrier_impl();
+	/* some platform only support a 1 here */
+	ret = __sync_lock_test_and_set(&ptr->value, 1);
+	return ret == 0;
+}
+
+#define PG_HAS_ATOMIC_UNLOCKED_TEST_FLAG
+STATIC_IF_INLINE bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return ptr->value;
+}
+
+#define PG_HAS_ATOMIC_CLEAR_FLAG
+STATIC_IF_INLINE void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/* XXX: only a release barrier, make it a full one for now */
+	pg_memory_barrier_impl();
+	__sync_lock_release(&ptr->value);
+}
+
+#define PG_HAS_ATOMIC_INIT_FLAG
+STATIC_IF_INLINE void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_clear_flag_impl(ptr);
+}
+
+#endif /* !defined(PG_HAS_ATOMIC_TEST_SET_FLAG) && defined(HAVE_GCC__SYNC_INT32_TAS) */
+
+#if defined(HAVE_GCC__ATOMIC_INT32_CAS)
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	return __atomic_compare_exchange_n(&ptr->value, expected, newval, false,
+									   __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+}
+
+#elif defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint32	current;
+	current = __sync_val_compare_and_swap(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#else
+#	error "strange configuration"
+#endif
+
+#ifdef HAVE_GCC__ATOMIC_INT64_CAS
+
+/* if __atomic is supported for 32 it's also supported for 64bit */
+#if defined(HAVE_GCC__ATOMIC_INT32_CAS)
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	return __atomic_compare_exchange_n(&ptr->value, expected, newval, false,
+									   __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+}
+
+#elif defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+	current = __sync_val_compare_and_swap(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#else
+#	error "strange configuration"
+#endif
+
+#endif /* HAVE_GCC__ATOMIC_INT64_CAS */
+
+#endif /* defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS) */
diff --git a/src/include/storage/atomics-generic-msvc.h b/src/include/storage/atomics-generic-msvc.h
new file mode 100644
index 0000000..724d9ee
--- /dev/null
+++ b/src/include/storage/atomics-generic-msvc.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-generic-msvc.h
+ *	  Atomic operations support when using MSVC
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * Interlocked Variable Access
+ *   http://msdn.microsoft.com/en-us/library/ms684122%28VS.85%29.aspx
+ *
+ * src/include/storage/atomics-generic-msvc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <intrin.h>
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#error "should be included via atomics.h"
+#endif
+
+/* Should work on both MSVC and Borland. */
+#pragma intrinsic(_ReadWriteBarrier)
+#define pg_compiler_barrier_impl()	_ReadWriteBarrier()
+
+#ifndef pg_memory_barrier_impl
+#define pg_memory_barrier_impl()		MemoryBarrier()
+#endif
+
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint32	current;
+	current = InterlockedCompareExchange(&ptr->value, newval, expected);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+/*
+ * FIXME: Only available on Vista/2003 onwards, add test for that.
+ *
+ * Only supported on >486, but we require XP as a minimum baseline, which
+ * doesn't support the 486, so we don't need to add checks for that case.
+ */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint32	current;
+	current = InterlockedCompareExchange64(&ptr->value, newval, expected);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#endif /* defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS) */
diff --git a/src/include/storage/atomics-generic-sunpro.h b/src/include/storage/atomics-generic-sunpro.h
new file mode 100644
index 0000000..d512409
--- /dev/null
+++ b/src/include/storage/atomics-generic-sunpro.h
@@ -0,0 +1,66 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-generic-sunpro.h
+ *	  Atomic operations for solaris' CC
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * manpage for atomic_cas(3C)
+ *   http://www.unix.com/man-page/opensolaris/3c/atomic_cas/
+ *   http://docs.oracle.com/cd/E23824_01/html/821-1465/atomic-cas-3c.html
+ *
+ * src/include/storage/atomics-generic-sunpro.h
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include <atomic.h>
+
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+typedef pg_atomic_uint32 pg_atomic_flag;
+
+typedef struct pg_atomic_uint64
+{
+	volatile uint64 value;
+} pg_atomic_uint64;
+
+/* declare outside the defined(PG_USE_INLINE) for visibility */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint64	current;
+
+	current = atomic_cas_32(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+
+	current = atomic_cas_64(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#endif
diff --git a/src/include/storage/atomics-generic-xlc.h b/src/include/storage/atomics-generic-xlc.h
new file mode 100644
index 0000000..6a47fe2
--- /dev/null
+++ b/src/include/storage/atomics-generic-xlc.h
@@ -0,0 +1,84 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-generic-xlc.h
+ *	  Atomic operations for IBM's CC
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * Synchronization and atomic built-in functions
+ *   http://publib.boulder.ibm.com/infocenter/lnxpcomp/v8v101/topic/com.ibm.xlcpp8l.doc/compiler/ref/bif_sync.htm
+ *
+ * src/include/storage/atomics-generic-xlc.h
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include <atomic.h>
+
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+typedef pg_atomic_uint32 pg_atomic_flag;
+
+typedef struct pg_atomic_uint64
+{
+	volatile uint64 value;
+} pg_atomic_uint64;
+
+/* declare outside the defined(PG_USE_INLINE) for visibility */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+
+/* FIXME: check for 64bit mode, only supported there */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint64	current;
+
+	/*
+	 * xlc's documentation tells us:
+	 * "If __compare_and_swap is used as a locking primitive, insert a call to
+	 * the __isync built-in function at the start of any critical sections."
+	 */
+	__isync();
+
+	/*
+	 * XXX: __compare_and_swap is defined to take signed parameters, but that
+	 * shouldn't matter since we don't perform any arithmetic operations.
+	 */
+	current = (uint32)__compare_and_swap((volatile int*)ptr->value,
+										 (int)*expected, (int)newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#ifdef PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+
+	__isync();
+
+	current = (uint64)__compare_and_swaplp((volatile long*)ptr->value,
+										   (long)*expected, (long)newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+#endif /* defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) */
+
+#endif
diff --git a/src/include/storage/atomics-generic.h b/src/include/storage/atomics-generic.h
new file mode 100644
index 0000000..eff8495
--- /dev/null
+++ b/src/include/storage/atomics-generic.h
@@ -0,0 +1,402 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-generic.h
+ *	  Atomic operations.
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * src/include/storage/atomics-generic.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#	error "should be included via atomics.h"
+#endif
+
+/* Need a way to implement spinlocks */
+#if !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && \
+	!defined(PG_HAVE_ATOMIC_EXCHANGE_U32) && \
+	!defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+#	error "postgres requires atomic ops for locking to be provided"
+#endif
+
+#ifndef pg_memory_barrier_impl
+#	error "postgres requires a memory barrier implementation"
+#endif
+
+#ifndef pg_compiler_barrier_impl
+#	error "postgres requires a compiler barrier implementation"
+#endif
+
+/*
+ * If read or write barriers are undefined, we upgrade them to full memory
+ * barriers.
+ */
+#if !defined(pg_read_barrier_impl)
+#	define pg_read_barrier_impl pg_memory_barrier_impl
+#endif
+#if !defined(pg_read_barrier_impl)
+#	define pg_write_barrier_impl pg_memory_barrier_impl
+#endif
+
+#ifndef PG_HAS_SPIN_DELAY
+#define PG_HAS_SPIN_DELAY
+#define pg_spin_delay_impl()	((void)0)
+#endif
+
+
+/* provide fallback */
+#ifndef PG_HAS_ATOMIC_TEST_SET_FLAG
+typedef pg_atomic_uint32 pg_atomic_flag;
+#define PG_ATOMIC_INIT_FLAG(flag) {0}
+#endif
+
+#if !defined(PG_DISABLE_64_BIT_ATOMICS) && defined(PG_HAS_ATOMIC_COMPARE_EXCHANGE_U64)
+#	define PG_HAVE_64_BIT_ATOMICS
+#endif
+
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+#ifndef PG_HAS_ATOMIC_READ_U32
+#define PG_HAS_ATOMIC_READ_U32
+STATIC_IF_INLINE uint32
+pg_atomic_read_u32_impl(volatile pg_atomic_uint32 *ptr)
+{
+	return ptr->value;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_WRITE_U32
+#define PG_HAS_ATOMIC_WRITE_U32
+STATIC_IF_INLINE void
+pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	ptr->value = val;
+}
+#endif
+
+/*
+ * provide fallback for test_and_set using atomic_exchange if available
+ */
+#if !defined(PG_HAS_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_EXCHANGE_U32)
+
+#define PG_HAS_ATOMIC_INIT_FLAG
+STATIC_IF_INLINE void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_write_u32_impl(ptr, 0);
+}
+
+#define PG_HAS_ATOMIC_TEST_SET_FLAG
+STATIC_IF_INLINE bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return pg_atomic_exchange_u32_impl(ptr, &value, 1) == 0;
+}
+
+#define PG_HAS_ATOMIC_UNLOCKED_TEST_FLAG
+STATIC_IF_INLINE bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return (bool) pg_atomic_read_u32_impl(ptr, &value);
+}
+
+
+#define PG_HAS_ATOMIC_CLEAR_FLAG
+STATIC_IF_INLINE void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/* XXX: release semantics suffice? */
+	pg_memory_barrier();
+	pg_atomic_write_u32_impl(ptr, &value, 0);
+}
+
+/*
+ * provide fallback for test_and_set using atomic_compare_exchange if
+ * available.
+ */
+#elif !defined(PG_HAS_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+
+#define PG_HAS_ATOMIC_INIT_FLAG
+STATIC_IF_INLINE void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_write_u32_impl(ptr, 0);
+}
+
+#define PG_HAS_ATOMIC_TEST_SET_FLAG
+STATIC_IF_INLINE bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	uint32 value = 0;
+	return pg_atomic_compare_exchange_u32_impl(ptr, &value, 1);
+}
+
+#define PG_HAS_ATOMIC_UNLOCKED_TEST_FLAG
+STATIC_IF_INLINE bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return (bool) pg_atomic_read_u32_impl(ptr, &value);
+}
+
+#define PG_HAS_ATOMIC_CLEAR_FLAG
+STATIC_IF_INLINE void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/* XXX: release semantics suffice? */
+	pg_memory_barrier();
+	pg_atomic_write_u32_impl(ptr, &value, 0);
+}
+
+#elif !defined(PG_HAS_ATOMIC_TEST_SET_FLAG)
+#	error "No pg_atomic_test_and_set provided"
+#endif /* !defined(PG_HAS_ATOMIC_TEST_SET_FLAG) */
+
+
+#ifndef PG_HAS_ATOMIC_INIT_U32
+#define PG_HAS_ATOMIC_INIT_U32
+STATIC_IF_INLINE void
+pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_)
+{
+	pg_atomic_write_u32_impl(ptr, val_);
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_EXCHANGE_U32
+#define PG_HAS_ATOMIC_EXCHANGE_U32
+STATIC_IF_INLINE uint32
+pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 xchg_)
+{
+	uint32 old;
+	while (true)
+	{
+		old = pg_atomic_read_u32_impl(ptr);
+		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, xchg_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_ADD_U32
+#define PG_HAS_ATOMIC_FETCH_ADD_U32
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 add_)
+{
+	uint32 old;
+	while (true)
+	{
+		old = pg_atomic_read_u32_impl(ptr);
+		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old + add_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_SUB_U32
+#define PG_HAS_ATOMIC_FETCH_SUB_U32
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_sub_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 sub_)
+{
+	uint32 old;
+	while (true)
+	{
+		old = pg_atomic_read_u32_impl(ptr);
+		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old - sub_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_AND_U32
+#define PG_HAS_ATOMIC_FETCH_AND_U32
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_)
+{
+	uint32 old;
+	while (true)
+	{
+		old = pg_atomic_read_u32_impl(ptr);
+		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old & and_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_OR_U32
+#define PG_HAS_ATOMIC_FETCH_OR_U32
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_)
+{
+	uint32 old;
+	while (true)
+	{
+		old = pg_atomic_read_u32_impl(ptr);
+		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old | or_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_ADD_FETCH_U32
+#define PG_HAS_ATOMIC_ADD_FETCH_U32
+STATIC_IF_INLINE uint32
+pg_atomic_add_fetch_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 add_)
+{
+	return pg_atomic_fetch_add_u32_impl(ptr, add_) + add_;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_SUB_FETCH_U32
+#define PG_HAS_ATOMIC_SUB_FETCH_U32
+STATIC_IF_INLINE uint32
+pg_atomic_sub_fetch_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 sub_)
+{
+	return pg_atomic_fetch_sub_u32_impl(ptr, sub_) - sub_;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_ADD_UNTIL_U32
+#define PG_HAS_ATOMIC_FETCH_ADD_UNTIL_U32
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_add_until_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 add_, uint32 until)
+{
+	uint32 old;
+	while (true)
+	{
+		uint32 new_val;
+		old = pg_atomic_read_u32_impl(ptr);
+		if (old >= until)
+			break;
+
+		new_val = old + add_;
+		if (new_val > until)
+			new_val = until;
+
+		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, new_val))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifdef PG_HAS_ATOMIC_COMPARE_EXCHANGE_U64
+
+#ifndef PG_HAS_ATOMIC_INIT_U64
+#define PG_HAS_ATOMIC_INIT_U64
+STATIC_IF_INLINE void
+pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_)
+{
+	pg_atomic_write_u64_impl(ptr, val_);
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_READ_U64
+#define PG_HAS_ATOMIC_READ_U64
+STATIC_IF_INLINE uint64
+pg_atomic_read_u64_impl(volatile pg_atomic_uint64 *ptr)
+{
+	return ptr->value;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_WRITE_U64
+#define PG_HAS_ATOMIC_WRITE_U64
+STATIC_IF_INLINE void
+pg_atomic_write_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+	ptr->value = val;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_ADD_U64
+#define PG_HAS_ATOMIC_FETCH_ADD_U64
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 add_)
+{
+	uint64 old;
+	while (true)
+	{
+		old = pg_atomic_read_u64_impl(ptr);
+		if (pg_atomic_compare_exchange_64_impl(ptr, &old, old + add_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_SUB_U64
+#define PG_HAS_ATOMIC_FETCH_SUB_U64
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_sub_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 sub_)
+{
+	uint64 old;
+	while (true)
+	{
+		old = pg_atomic_read_u64_impl(ptr);
+		if (pg_atomic_compare_exchange_64_impl(ptr, &old, old - sub_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_AND_U64
+#define PG_HAS_ATOMIC_FETCH_AND_U64
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_)
+{
+	uint64 old;
+	while (true)
+	{
+		old = pg_atomic_read_64_impl(ptr);
+		if (pg_atomic_compare_exchange_u64_impl(ptr, &old, old & and_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_OR_U64
+#define PG_HAS_ATOMIC_FETCH_OR_U64
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_)
+{
+	uint64 old;
+	while (true)
+	{
+		old = pg_atomic_read_u64_impl(ptr);
+		if (pg_atomic_compare_exchange_64_impl(ptr, &old, old | or_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_ADD_FETCH_U64
+#define PG_HAS_ATOMIC_ADD_FETCH_U64
+STATIC_IF_INLINE
+pg_atomic_add_fetch_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 add_)
+{
+	return pg_atomic_fetch_add_u64_impl(ptr, add_) + add_;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_SUB_FETCH_U64
+#define PG_HAS_ATOMIC_SUB_FETCH_U64
+STATIC_IF_INLINE
+pg_atomic_sub_fetch_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 sub_)
+{
+	return pg_atomic_fetch_sub_u64_impl(ptr, sub_) - sub_;
+}
+#endif
+
+#endif /* PG_HAS_ATOMIC_COMPARE_EXCHANGE_U64 */
+
+#endif /* defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS) */
diff --git a/src/include/storage/atomics.h b/src/include/storage/atomics.h
new file mode 100644
index 0000000..39ed336
--- /dev/null
+++ b/src/include/storage/atomics.h
@@ -0,0 +1,543 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics.h
+ *	  Atomic operations.
+ *
+ * Hardware and compiler dependent functions for manipulating memory
+ * atomically and dealing with cache coherency. Used to implement
+ * locking facilities and replacements.
+ *
+ * To bring up postgres on a platform/compiler at the very least
+ * either one of
+ * * pg_atomic_test_set_flag(), pg_atomic_init_flag(), pg_atomic_clear_flag()
+ * * pg_atomic_compare_exchange_u32()
+ * * pg_atomic_exchange_u32()
+ * and
+ * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier()
+ * need to be implemented. There exist generic, hardware independent,
+ * implementations for several compilers which might be sufficient,
+ * although possibly not optimal, for a new platform.
+ *
+ * Use higher level functionality (lwlocks, spinlocks, heavyweight
+ * locks) whenever possible. Writing correct code using these
+ * facilities is hard.
+ *
+ * For an introduction to using memory barriers within the PostgreSQL backend,
+ * see src/backend/storage/lmgr/README.barrier
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/atomics.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ATOMICS_H
+#define ATOMICS_H
+
+#define INSIDE_ATOMICS_H
+
+/*
+ * Architecture specific implementations/considerations.
+ */
+#if defined(__arm__) || defined(__arm) || \
+	defined(__aarch64__) || defined(__aarch64)
+#	include "storage/atomics-arch-arm.h"
+#endif
+
+#if defined(__i386__) || defined(__i386)
+#	include "storage/atomics-arch-i368.h"
+#elif defined(__x86_64__)
+#	include "storage/atomics-arch-amd64.h"
+#elif defined(__ia64__) || defined(__ia64)
+#	include "storage/atomics-arch-amd64.h"
+#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
+#	include "storage/atomics-arch-ppc.h"
+#elif defined(__alpha) || defined(__alpha__)
+#	include "storage/atomics-arch-alpha.h"
+#elif defined(__hppa) || defined(__hppa__)
+#	include "storage/atomics-arch-hppa.h"
+#endif
+
+/*
+ * Compiler specific, but architecture independent implementations
+ */
+
+/* gcc or compatible, including icc */
+#if defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS)
+#	include "storage/atomics-generic-gcc.h"
+#elif defined(WIN32_ONLY_COMPILER)
+#	include "storage/atomics-generic-msvc.h"
+#elif defined(__hpux) && defined(__ia64) && !defined(__GNUC__)
+#	include "storage/atomics-generic-acc.h"
+#elif defined(__SUNPRO_C) && defined(__ia64) && !defined(__GNUC__)
+#	include "storage/atomics-generic-sunpro.h"
+#elif (defined(__IBMC__) || defined(__IBMCPP__)) && !defined(__GNUC__)
+#	include "storage/atomics-generic-xlc.h"
+#else
+#	error "unsupported compiler"
+#endif
+
+/* if we have spinlocks, but not atomic ops, emulate them */
+#if defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && \
+	!defined(PG_HAVE_ATOMIC_EXCHANGE_U32)
+#	define PG_USE_ATOMICS_EMULATION
+#	include "storage/atomics-generic-spinlock.h"
+#endif
+
+
+/*
+ * Provide fallback implementations for operations that aren't provided if
+ * possible.
+ */
+#include "storage/atomics-generic.h"
+
+/*
+ * pg_compiler_barrier - prevent the compiler from moving code
+ *
+ * A compiler barrier need not (and preferably should not) emit any actual
+ * machine code, but must act as an optimization fence: the compiler must not
+ * reorder loads or stores to main memory around the barrier.  However, the
+ * CPU may still reorder loads or stores at runtime, if the architecture's
+ * memory model permits this.
+ */
+#define pg_compiler_barrier()	pg_compiler_barrier_impl()
+
+/*
+ * pg_memory_barrier - prevent the CPU from reordering memory access
+ *
+ * A memory barrier must act as a compiler barrier, and in addition must
+ * guarantee that all loads and stores issued prior to the barrier are
+ * completed before any loads or stores issued after the barrier.  Unless
+ * loads and stores are totally ordered (which is not the case on most
+ * architectures) this requires issuing some sort of memory fencing
+ * instruction.
+ */
+#define pg_memory_barrier()	pg_memory_barrier_impl()
+
+/*
+ * pg_(read|write)_barrier - prevent the CPU from reordering memory access
+ *
+ * A read barrier must act as a compiler barrier, and in addition must
+ * guarantee that any loads issued prior to the barrier are completed before
+ * any loads issued after the barrier.	Similarly, a write barrier acts
+ * as a compiler barrier, and also orders stores.  Read and write barriers
+ * are thus weaker than a full memory barrier, but stronger than a compiler
+ * barrier.  In practice, on machines with strong memory ordering, read and
+ * write barriers may require nothing more than a compiler barrier.
+ */
+#define pg_read_barrier()	pg_read_barrier_impl()
+#define pg_write_barrier()	pg_write_barrier_impl()
+
+/*
+ * Spinloop delay -
+ */
+#define pg_spin_delay()	pg_spin_delay_impl()
+
+
+/*
+ * pg_atomic_init_flag - initialize lock
+ *
+ * No barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_init_flag(volatile pg_atomic_flag *ptr);
+
+/*
+ * pg_atomic_test_and_set_flag - TAS()
+ *
+ * Full barrier semantics. (XXX? Only acquire?)
+ */
+STATIC_IF_INLINE_DECLARE bool
+pg_atomic_test_set_flag(volatile pg_atomic_flag *ptr);
+
+/*
+ * pg_atomic_unlocked_test_flag - TAS()
+ *
+ * No barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE bool
+pg_atomic_unlocked_test_flag(volatile pg_atomic_flag *ptr);
+
+/*
+ * pg_atomic_clear_flag - release lock set by TAS()
+ *
+ * Full barrier semantics. (XXX? Only release?)
+ */
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_clear_flag(volatile pg_atomic_flag *ptr);
+
+/*
+ * pg_atomic_init_u32 - initialize atomic variable
+ *
+ * Has to be done before any usage except when the atomic variable is declared
+ * statically in which case the variable is initialized to 0.
+ *
+ * No barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val);
+
+/*
+ * pg_atomic_read_u32 - read from atomic variable
+ *
+ * No barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr);
+
+/*
+ * pg_atomic_write_u32 - write to atomic variable
+ *
+ * No barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val);
+
+/*
+ * pg_atomic_exchange_u32 - exchange newval with current value
+ *
+ * Returns the old value of 'ptr' before the swap.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval);
+
+/*
+ * pg_atomic_compare_exchange_u32 - CAS operation
+ *
+ * Atomically compare the current value of ptr with *expected and store newval
+ * iff ptr and *expected have the same value. The current value of *ptr will
+ * always be stored in *expected.
+ *
+ * Return whether the values have been exchanged.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE bool
+pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr,
+							   uint32 *expected, uint32 newval);
+
+/*
+ * pg_atomic_fetch_add_u32 - atomically add to variable
+ *
+ * Returns the the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, uint32 add_);
+
+/*
+ * pg_atomic_fetch_sub_u32 - atomically subtract from variable
+ *
+ * Returns the the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32 *ptr, uint32 sub_);
+
+/*
+ * pg_atomic_fetch_and_u32 - atomically bit-and and_ with variable
+ *
+ * Returns the the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_fetch_and_u32(volatile pg_atomic_uint32 *ptr, uint32 and_);
+
+/*
+ * pg_atomic_fetch_or_u32 - atomically bit-or or_ with variable
+ *
+ * Returns the the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_);
+
+/*
+ * pg_atomic_add_fetch_u32 - atomically add to variable
+ *
+ * Returns the the value of ptr after the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, uint32 add_);
+
+/*
+ * pg_atomic_sub_fetch_u32 - atomically subtract from variable
+ *
+ * Returns the the value of ptr after the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, uint32 sub_);
+
+/*
+ * pg_fetch_add_until_u32 - saturated addition to variable
+ *
+ * Returns the the value of ptr after the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_fetch_add_until_u32(volatile pg_atomic_uint32 *ptr, uint32 add_,
+							  uint32 until);
+
+/* ----
+ * The 64 bit operations have the same semantics as their 32bit counterparts if
+ * they are available.
+ * ---
+ */
+#ifdef PG_HAVE_64_BIT_ATOMICS
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val_);
+
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr);
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 newval);
+
+STATIC_IF_INLINE_DECLARE bool
+pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 *ptr,
+							   uint32 *expected, uint64 newval);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, uint64 add_);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64 *ptr, uint64 sub_);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_fetch_and_u64(volatile pg_atomic_uint64 *ptr, uint64 and_);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_fetch_or_u64(volatile pg_atomic_uint64 *ptr, uint64 or_);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_add_fetch_u64(volatile pg_atomic_uint64 *ptr, uint64 add_);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_sub_fetch_u64(volatile pg_atomic_uint64 *ptr, uint64 sub_);
+
+#endif /* PG_HAVE_64_BIT_ATOMICS */
+
+/*
+ * The following functions are wrapper functions around the platform specific
+ * implementation of the atomic operations performing common checks.
+ */
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+#define CHECK_POINTER_ALIGNMENT(ptr, bndr) \
+	Assert(TYPEALIGN((uintptr_t)(ptr), bndr))
+
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_init_flag(volatile pg_atomic_flag *ptr)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, sizeof(*ptr));
+
+	return pg_atomic_clear_flag_impl(ptr);
+}
+
+STATIC_IF_INLINE_DECLARE bool
+pg_atomic_test_set_flag(volatile pg_atomic_flag *ptr)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, sizeof(*ptr));
+
+	return pg_atomic_test_set_flag_impl(ptr);
+}
+
+STATIC_IF_INLINE_DECLARE bool
+pg_atomic_unlocked_test_flag(volatile pg_atomic_flag *ptr)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, sizeof(*ptr));
+
+	return pg_atomic_unlocked_test_flag_impl(ptr);
+}
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_clear_flag(volatile pg_atomic_flag *ptr)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, sizeof(*ptr));
+
+	pg_atomic_clear_flag_impl(ptr);
+}
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+
+	pg_atomic_init_u32_impl(ptr, val);
+}
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+
+	pg_atomic_write_u32_impl(ptr, val);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_read_u32_impl(ptr);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+
+	return pg_atomic_exchange_u32_impl(ptr, newval);
+}
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr,
+							   uint32 *expected, uint32 newval)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	CHECK_POINTER_ALIGNMENT(expected, 4);
+
+	return pg_atomic_compare_exchange_u32_impl(ptr, expected, newval);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, uint32 add_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_fetch_add_u32_impl(ptr, add_);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32 *ptr, uint32 sub_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_fetch_sub_u32_impl(ptr, sub_);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_and_u32(volatile pg_atomic_uint32 *ptr, uint32 and_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_fetch_and_u32_impl(ptr, and_);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_fetch_or_u32_impl(ptr, or_);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, uint32 add_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_add_fetch_u32_impl(ptr, add_);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, uint32 sub_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_sub_fetch_u32_impl(ptr, sub_);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_add_until_u32(volatile pg_atomic_uint32 *ptr, uint32 add_,
+							  uint32 until)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_fetch_add_until_u32_impl(ptr, add_, until);
+}
+
+#ifdef PG_HAVE_64_BIT_ATOMICS
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+
+	pg_atomic_init_u64_impl(ptr, val);
+}
+
+STATIC_IF_INLINE uint64
+pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_read_u64_impl(ptr);
+}
+
+STATIC_IF_INLINE void
+pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_write_u64_impl(ptr, val);
+}
+
+STATIC_IF_INLINE uint64
+pg_atomic_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 newval)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+
+	return pg_atomic_exchange_u64_impl(ptr, newval);
+}
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 *ptr,
+							   uint64 *expected, uint64 newval)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	CHECK_POINTER_ALIGNMENT(expected, 8);
+	return pg_atomic_compare_exchange_u64_impl(ptr, expected, newval);
+}
+
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, uint64 add_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_fetch_add_u64_impl(ptr, add_);
+}
+
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64 *ptr, uint64 sub_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_fetch_sub_u64_impl(ptr, sub_);
+}
+
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_and_u64(volatile pg_atomic_uint64 *ptr, uint64 and_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_fetch_and_u64_impl(ptr, and_);
+}
+
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_or_u64(volatile pg_atomic_uint64 *ptr, uint64 or_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_fetch_or_u64_impl(ptr, or_);
+}
+#endif /* PG_HAVE_64_BIT_ATOMICS */
+
+#endif /* defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS) */
+
+#undef INSIDE_ATOMICS_H
+
+#endif /* ATOMICS_H */
diff --git a/src/include/storage/barrier.h b/src/include/storage/barrier.h
index 16a9a0c..607a3c9 100644
--- a/src/include/storage/barrier.h
+++ b/src/include/storage/barrier.h
@@ -13,137 +13,12 @@
 #ifndef BARRIER_H
 #define BARRIER_H
 
-#include "storage/s_lock.h"
-
-extern slock_t dummy_spinlock;
-
-/*
- * A compiler barrier need not (and preferably should not) emit any actual
- * machine code, but must act as an optimization fence: the compiler must not
- * reorder loads or stores to main memory around the barrier.  However, the
- * CPU may still reorder loads or stores at runtime, if the architecture's
- * memory model permits this.
- *
- * A memory barrier must act as a compiler barrier, and in addition must
- * guarantee that all loads and stores issued prior to the barrier are
- * completed before any loads or stores issued after the barrier.  Unless
- * loads and stores are totally ordered (which is not the case on most
- * architectures) this requires issuing some sort of memory fencing
- * instruction.
- *
- * A read barrier must act as a compiler barrier, and in addition must
- * guarantee that any loads issued prior to the barrier are completed before
- * any loads issued after the barrier.	Similarly, a write barrier acts
- * as a compiler barrier, and also orders stores.  Read and write barriers
- * are thus weaker than a full memory barrier, but stronger than a compiler
- * barrier.  In practice, on machines with strong memory ordering, read and
- * write barriers may require nothing more than a compiler barrier.
- *
- * For an introduction to using memory barriers within the PostgreSQL backend,
- * see src/backend/storage/lmgr/README.barrier
- */
-
-#if defined(DISABLE_BARRIERS)
-
-/*
- * Fall through to the spinlock-based implementation.
- */
-#elif defined(__INTEL_COMPILER)
-
-/*
- * icc defines __GNUC__, but doesn't support gcc's inline asm syntax
- */
-#if defined(__ia64__) || defined(__ia64)
-#define pg_memory_barrier()		__mf()
-#elif defined(__i386__) || defined(__x86_64__)
-#define pg_memory_barrier()		_mm_mfence()
-#endif
-
-#define pg_compiler_barrier()	__memory_barrier()
-#elif defined(__GNUC__)
-
-/* This works on any architecture, since it's only talking to GCC itself. */
-#define pg_compiler_barrier()	__asm__ __volatile__("" : : : "memory")
-
-#if defined(__i386__)
-
 /*
- * i386 does not allow loads to be reordered with other loads, or stores to be
- * reordered with other stores, but a load can be performed before a subsequent
- * store.
- *
- * "lock; addl" has worked for longer than "mfence".
+ * This used to be a separate file, full of compiler/architecture
+ * dependent defines, but it's not included in the atomics.h
+ * infrastructure and just kept for backward compatibility.
  */
-#define pg_memory_barrier()		\
-	__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory")
-#define pg_read_barrier()		pg_compiler_barrier()
-#define pg_write_barrier()		pg_compiler_barrier()
-#elif defined(__x86_64__)		/* 64 bit x86 */
-
-/*
- * x86_64 has similar ordering characteristics to i386.
- *
- * Technically, some x86-ish chips support uncached memory access and/or
- * special instructions that are weakly ordered.  In those cases we'd need
- * the read and write barriers to be lfence and sfence.  But since we don't
- * do those things, a compiler barrier should be enough.
- */
-#define pg_memory_barrier()		\
-	__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory")
-#define pg_read_barrier()		pg_compiler_barrier()
-#define pg_write_barrier()		pg_compiler_barrier()
-#elif defined(__ia64__) || defined(__ia64)
-
-/*
- * Itanium is weakly ordered, so read and write barriers require a full
- * fence.
- */
-#define pg_memory_barrier()		__asm__ __volatile__ ("mf" : : : "memory")
-#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
-
-/*
- * lwsync orders loads with respect to each other, and similarly with stores.
- * But a load can be performed before a subsequent store, so sync must be used
- * for a full memory barrier.
- */
-#define pg_memory_barrier()		__asm__ __volatile__ ("sync" : : : "memory")
-#define pg_read_barrier()		__asm__ __volatile__ ("lwsync" : : : "memory")
-#define pg_write_barrier()		__asm__ __volatile__ ("lwsync" : : : "memory")
-#elif defined(__alpha) || defined(__alpha__)	/* Alpha */
-
-/*
- * Unlike all other known architectures, Alpha allows dependent reads to be
- * reordered, but we don't currently find it necessary to provide a conditional
- * read barrier to cover that case.  We might need to add that later.
- */
-#define pg_memory_barrier()		__asm__ __volatile__ ("mb" : : : "memory")
-#define pg_read_barrier()		__asm__ __volatile__ ("rmb" : : : "memory")
-#define pg_write_barrier()		__asm__ __volatile__ ("wmb" : : : "memory")
-#elif defined(__hppa) || defined(__hppa__)		/* HP PA-RISC */
-
-/* HPPA doesn't do either read or write reordering */
-#define pg_memory_barrier()		pg_compiler_barrier()
-#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
-
-/*
- * If we're on GCC 4.1.0 or higher, we should be able to get a memory
- * barrier out of this compiler built-in.  But we prefer to rely on our
- * own definitions where possible, and use this only as a fallback.
- */
-#define pg_memory_barrier()		__sync_synchronize()
-#endif
-#elif defined(__ia64__) || defined(__ia64)
-
-#define pg_compiler_barrier()	_Asm_sched_fence()
-#define pg_memory_barrier()		_Asm_mf()
-#elif defined(WIN32_ONLY_COMPILER)
-
-/* Should work on both MSVC and Borland. */
-#include <intrin.h>
-#pragma intrinsic(_ReadWriteBarrier)
-#define pg_compiler_barrier()	_ReadWriteBarrier()
-#define pg_memory_barrier()		MemoryBarrier()
-#endif
+#include "storage/atomics.h"
 
 /*
  * If we have no memory barrier implementation for this architecture, we
@@ -157,6 +32,10 @@ extern slock_t dummy_spinlock;
  * fence.  But all of our actual implementations seem OK in this regard.
  */
 #if !defined(pg_memory_barrier)
+#include "storage/s_lock.h"
+
+extern slock_t dummy_spinlock;
+
 #define pg_memory_barrier() \
 	do { S_LOCK(&dummy_spinlock); S_UNLOCK(&dummy_spinlock); } while (0)
 #endif
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 7dcd5d9..73aebfa 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -21,10 +21,6 @@
  *	void S_UNLOCK(slock_t *lock)
  *		Unlock a previously acquired lock.
  *
- *	bool S_LOCK_FREE(slock_t *lock)
- *		Tests if the lock is free. Returns TRUE if free, FALSE if locked.
- *		This does *not* change the state of the lock.
- *
  *	void SPIN_DELAY(void)
  *		Delay operation to occur inside spinlock wait loop.
  *
@@ -94,879 +90,17 @@
 #ifndef S_LOCK_H
 #define S_LOCK_H
 
+#include "storage/atomics.h"
 #include "storage/pg_sema.h"
 
-#ifdef HAVE_SPINLOCKS	/* skip spinlocks if requested */
-
-
-#if defined(__GNUC__) || defined(__INTEL_COMPILER)
-/*************************************************************************
- * All the gcc inlines
- * Gcc consistently defines the CPU as __cpu__.
- * Other compilers use __cpu or __cpu__ so we test for both in those cases.
- */
-
-/*----------
- * Standard gcc asm format (assuming "volatile slock_t *lock"):
-
-	__asm__ __volatile__(
-		"	instruction	\n"
-		"	instruction	\n"
-		"	instruction	\n"
-:		"=r"(_res), "+m"(*lock)		// return register, in/out lock value
-:		"r"(lock)					// lock pointer, in input register
-:		"memory", "cc");			// show clobbered registers here
-
- * The output-operands list (after first colon) should always include
- * "+m"(*lock), whether or not the asm code actually refers to this
- * operand directly.  This ensures that gcc believes the value in the
- * lock variable is used and set by the asm code.  Also, the clobbers
- * list (after third colon) should always include "memory"; this prevents
- * gcc from thinking it can cache the values of shared-memory fields
- * across the asm code.  Add "cc" if your asm code changes the condition
- * code register, and also list any temp registers the code uses.
- *----------
- */
-
-
-#ifdef __i386__		/* 32-bit i386 */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register slock_t _res = 1;
-
-	/*
-	 * Use a non-locking test before asserting the bus lock.  Note that the
-	 * extra test appears to be a small loss on some x86 platforms and a small
-	 * win on others; it's by no means clear that we should keep it.
-	 *
-	 * When this was last tested, we didn't have separate TAS() and TAS_SPIN()
-	 * macros.  Nowadays it probably would be better to do a non-locking test
-	 * in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
-	 * testing to verify that.  Without some empirical evidence, better to
-	 * leave it alone.
-	 */
-	__asm__ __volatile__(
-		"	cmpb	$0,%1	\n"
-		"	jne		1f		\n"
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-		"1: \n"
-:		"+q"(_res), "+m"(*lock)
-:
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * This sequence is equivalent to the PAUSE instruction ("rep" is
-	 * ignored by old IA32 processors if the following instruction is
-	 * not a string operation); the IA-32 Architecture Software
-	 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
-	 * PAUSE in the inner loop of a spin lock is necessary for good
-	 * performance:
-	 *
-	 *     The PAUSE instruction improves the performance of IA-32
-	 *     processors supporting Hyper-Threading Technology when
-	 *     executing spin-wait loops and other routines where one
-	 *     thread is accessing a shared lock or semaphore in a tight
-	 *     polling loop. When executing a spin-wait loop, the
-	 *     processor can suffer a severe performance penalty when
-	 *     exiting the loop because it detects a possible memory order
-	 *     violation and flushes the core processor's pipeline. The
-	 *     PAUSE instruction provides a hint to the processor that the
-	 *     code sequence is a spin-wait loop. The processor uses this
-	 *     hint to avoid the memory order violation and prevent the
-	 *     pipeline flush. In addition, the PAUSE instruction
-	 *     de-pipelines the spin-wait loop to prevent it from
-	 *     consuming execution resources excessively.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __i386__ */
-
-
-#ifdef __x86_64__		/* AMD Opteron, Intel EM64T */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-/*
- * On Intel EM64T, it's a win to use a non-locking test before the xchg proper,
- * but only when spinning.
- *
- * See also Implementing Scalable Atomic Locks for Multi-Core Intel(tm) EM64T
- * and IA32, by Michael Chynoweth and Mary R. Lee. As of this writing, it is
- * available at:
- * http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
- */
-#define TAS_SPIN(lock)    (*(lock) ? 1 : TAS(lock))
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register slock_t _res = 1;
-
-	__asm__ __volatile__(
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-:		"+q"(_res), "+m"(*lock)
-:
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
-	 * Opteron, but it may be of some use on EM64T, so we keep it.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __x86_64__ */
-
-
-#if defined(__ia64__) || defined(__ia64)
-/*
- * Intel Itanium, gcc or Intel's compiler.
- *
- * Itanium has weak memory ordering, but we rely on the compiler to enforce
- * strict ordering of accesses to volatile data.  In particular, while the
- * xchg instruction implicitly acts as a memory barrier with 'acquire'
- * semantics, we do not have an explicit memory fence instruction in the
- * S_UNLOCK macro.  We use a regular assignment to clear the spinlock, and
- * trust that the compiler marks the generated store instruction with the
- * ".rel" opcode.
- *
- * Testing shows that assumption to hold on gcc, although I could not find
- * any explicit statement on that in the gcc manual.  In Intel's compiler,
- * the -m[no-]serialize-volatile option controls that, and testing shows that
- * it is enabled by default.
- */
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/* On IA64, it's a win to use a non-locking test before the xchg proper */
-#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
-
-#ifndef __INTEL_COMPILER
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	long int	ret;
-
-	__asm__ __volatile__(
-		"	xchg4 	%0=%1,%2	\n"
-:		"=r"(ret), "+m"(*lock)
-:		"r"(1)
-:		"memory");
-	return (int) ret;
-}
-
-#else /* __INTEL_COMPILER */
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	int		ret;
-
-	ret = _InterlockedExchange(lock,1);	/* this is a xchg asm macro */
-
-	return ret;
-}
-
-#endif /* __INTEL_COMPILER */
-#endif	 /* __ia64__ || __ia64 */
-
-
-/*
- * On ARM, we use __sync_lock_test_and_set(int *, int) if available, and if
- * not fall back on the SWPB instruction.  SWPB does not work on ARMv6 or
- * later, so the compiler builtin is preferred if available.  Note also that
- * the int-width variant of the builtin works on more chips than other widths.
- */
-#if defined(__arm__) || defined(__arm)
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-#ifdef HAVE_GCC_INT_ATOMICS
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#else /* !HAVE_GCC_INT_ATOMICS */
-
-typedef unsigned char slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register slock_t _res = 1;
-
-	__asm__ __volatile__(
-		"	swpb 	%0, %0, [%2]	\n"
-:		"+r"(_res), "+m"(*lock)
-:		"r"(lock)
-:		"memory");
-	return (int) _res;
-}
-
-#endif	 /* HAVE_GCC_INT_ATOMICS */
-#endif	 /* __arm__ */
-
-
-/*
- * On ARM64, we use __sync_lock_test_and_set(int *, int) if available.
- */
-#if defined(__aarch64__) || defined(__aarch64)
-#ifdef HAVE_GCC_INT_ATOMICS
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#endif	 /* HAVE_GCC_INT_ATOMICS */
-#endif	 /* __aarch64__ */
-
-
-/* S/390 and S/390x Linux (32- and 64-bit zSeries) */
-#if defined(__s390__) || defined(__s390x__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock)	   tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	int			_res = 0;
-
-	__asm__	__volatile__(
-		"	cs 	%0,%3,0(%2)		\n"
-:		"+d"(_res), "+m"(*lock)
-:		"a"(lock), "d"(1)
-:		"memory", "cc");
-	return _res;
-}
-
-#endif	 /* __s390__ || __s390x__ */
-
-
-#if defined(__sparc__)		/* Sparc */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register slock_t _res;
-
-	/*
-	 *	See comment in /pg/backend/port/tas/solaris_sparc.s for why this
-	 *	uses "ldstub", and that file uses "cas".  gcc currently generates
-	 *	sparcv7-targeted binaries, so "cas" use isn't possible.
-	 */
-	__asm__ __volatile__(
-		"	ldstub	[%2], %0	\n"
-:		"=r"(_res), "+m"(*lock)
-:		"r"(lock)
-:		"memory");
-	return (int) _res;
-}
-
-#endif	 /* __sparc__ */
-
-
-/* PowerPC */
-#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/* On PPC, it's a win to use a non-locking test before the lwarx */
-#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
-
-/*
- * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
- * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
- * On newer machines, we can use lwsync instead for better performance.
- */
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t _t;
-	int _res;
-
-	__asm__ __volatile__(
-#ifdef USE_PPC_LWARX_MUTEX_HINT
-"	lwarx   %0,0,%3,1	\n"
-#else
-"	lwarx   %0,0,%3		\n"
-#endif
-"	cmpwi   %0,0		\n"
-"	bne     1f			\n"
-"	addi    %0,%0,1		\n"
-"	stwcx.  %0,0,%3		\n"
-"	beq     2f         	\n"
-"1:	li      %1,1		\n"
-"	b		3f			\n"
-"2:						\n"
-#ifdef USE_PPC_LWSYNC
-"	lwsync				\n"
-#else
-"	isync				\n"
-#endif
-"	li      %1,0		\n"
-"3:						\n"
-
-:	"=&r"(_t), "=r"(_res), "+m"(*lock)
-:	"r"(lock)
-:	"memory", "cc");
-	return _res;
-}
-
-/*
- * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction.
- * On newer machines, we can use lwsync instead for better performance.
- */
-#ifdef USE_PPC_LWSYNC
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("	lwsync \n"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#else
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("	sync \n"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#endif /* USE_PPC_LWSYNC */
-
-#endif /* powerpc */
-
-
-/* Linux Motorola 68k */
-#if (defined(__mc68000__) || defined(__m68k__)) && defined(__linux__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register int rv;
-
-	__asm__	__volatile__(
-		"	clrl	%0		\n"
-		"	tas		%1		\n"
-		"	sne		%0		\n"
-:		"=d"(rv), "+m"(*lock)
-:
-:		"memory", "cc");
-	return rv;
-}
-
-#endif	 /* (__mc68000__ || __m68k__) && __linux__ */
-
-
-/*
- * VAXen -- even multiprocessor ones
- * (thanks to Tom Ivar Helbekkmo)
- */
-#if defined(__vax__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register int	_res;
-
-	__asm__ __volatile__(
-		"	movl 	$1, %0			\n"
-		"	bbssi	$0, (%2), 1f	\n"
-		"	clrl	%0				\n"
-		"1: \n"
-:		"=&r"(_res), "+m"(*lock)
-:		"r"(lock)
-:		"memory");
-	return _res;
-}
-
-#endif	 /* __vax__ */
-
-#if defined(__alpha) || defined(__alpha__)	/* Alpha */
-/*
- * Correct multi-processor locking methods are explained in section 5.5.3
- * of the Alpha AXP Architecture Handbook, which at this writing can be
- * found at ftp://ftp.netbsd.org/pub/NetBSD/misc/dec-docs/index.html.
- * For gcc we implement the handbook's code directly with inline assembler.
- */
-#define HAS_TEST_AND_SET
-
-typedef unsigned long slock_t;
-
-#define TAS(lock)  tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register slock_t _res;
-
-	__asm__	__volatile__(
-		"	ldq		$0, %1	\n"
-		"	bne		$0, 2f	\n"
-		"	ldq_l	%0, %1	\n"
-		"	bne		%0, 2f	\n"
-		"	mov		1,  $0	\n"
-		"	stq_c	$0, %1	\n"
-		"	beq		$0, 2f	\n"
-		"	mb				\n"
-		"	br		3f		\n"
-		"2:	mov		1, %0	\n"
-		"3:					\n"
-:		"=&r"(_res), "+m"(*lock)
-:
-:		"memory", "0");
-	return (int) _res;
-}
-
-#define S_UNLOCK(lock)	\
-do \
-{\
-	__asm__ __volatile__ ("	mb \n"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* __alpha || __alpha__ */
-
-
-#if defined(__mips__) && !defined(__sgi)	/* non-SGI MIPS */
-/* Note: on SGI we use the OS' mutex ABI, see below */
-/* Note: R10000 processors require a separate SYNC */
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register volatile slock_t *_l = lock;
-	register int _res;
-	register int _tmp;
-
-	__asm__ __volatile__(
-		"       .set push           \n"
-		"       .set mips2          \n"
-		"       .set noreorder      \n"
-		"       .set nomacro        \n"
-		"       ll      %0, %2      \n"
-		"       or      %1, %0, 1   \n"
-		"       sc      %1, %2      \n"
-		"       xori    %1, 1       \n"
-		"       or      %0, %0, %1  \n"
-		"       sync                \n"
-		"       .set pop              "
-:		"=&r" (_res), "=&r" (_tmp), "+R" (*_l)
-:
-:		"memory");
-	return _res;
-}
-
-/* MIPS S_UNLOCK is almost standard but requires a "sync" instruction */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__( \
-		"       .set push           \n" \
-		"       .set mips2          \n" \
-		"       .set noreorder      \n" \
-		"       .set nomacro        \n" \
-		"       sync                \n" \
-		"       .set pop              "); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* __mips__ && !__sgi */
-
-
-#if defined(__m32r__) && defined(HAVE_SYS_TAS_H)	/* Renesas' M32R */
-#define HAS_TEST_AND_SET
-
-#include <sys/tas.h>
-
-typedef int slock_t;
-
-#define TAS(lock) tas(lock)
-
-#endif /* __m32r__ */
-
-
-#if defined(__sh__)				/* Renesas' SuperH */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register int _res;
-
-	/*
-	 * This asm is coded as if %0 could be any register, but actually SuperH
-	 * restricts the target of xor-immediate to be R0.  That's handled by
-	 * the "z" constraint on _res.
-	 */
-	__asm__ __volatile__(
-		"	tas.b @%2    \n"
-		"	movt  %0     \n"
-		"	xor   #1,%0  \n"
-:		"=z"(_res), "+m"(*lock)
-:		"r"(lock)
-:		"memory", "t");
-	return _res;
-}
-
-#endif	 /* __sh__ */
-
-
-/* These live in s_lock.c, but only for gcc */
-
-
-#if defined(__m68k__) && !defined(__linux__)	/* non-Linux Motorola 68k */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-#endif
-
-
-#endif	/* defined(__GNUC__) || defined(__INTEL_COMPILER) */
-
-
-
-/*
- * ---------------------------------------------------------------------
- * Platforms that use non-gcc inline assembly:
- * ---------------------------------------------------------------------
- */
-
-#if !defined(HAS_TEST_AND_SET)	/* We didn't trigger above, let's try here */
-
-
-#if defined(USE_UNIVEL_CC)		/* Unixware compiler */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock)	tas(lock)
-
-asm int
-tas(volatile slock_t *s_lock)
-{
-/* UNIVEL wants %mem in column 1, so we don't pg_indent this file */
-%mem s_lock
-	pushl %ebx
-	movl s_lock, %ebx
-	movl $255, %eax
-	lock
-	xchgb %al, (%ebx)
-	popl %ebx
-}
-
-#endif	 /* defined(USE_UNIVEL_CC) */
-
-
-#if defined(__alpha) || defined(__alpha__)	/* Tru64 Unix Alpha compiler */
-/*
- * The Tru64 compiler doesn't support gcc-style inline asm, but it does
- * have some builtin functions that accomplish much the same results.
- * For simplicity, slock_t is defined as long (ie, quadword) on Alpha
- * regardless of the compiler in use.  LOCK_LONG and UNLOCK_LONG only
- * operate on an int (ie, longword), but that's OK as long as we define
- * S_INIT_LOCK to zero out the whole quadword.
- */
-#define HAS_TEST_AND_SET
-
-typedef unsigned long slock_t;
-
-#include <alpha/builtins.h>
-#define S_INIT_LOCK(lock)  (*(lock) = 0)
-#define TAS(lock)		   (__LOCK_LONG_RETRY((lock), 1) == 0)
-#define S_UNLOCK(lock)	   __UNLOCK_LONG(lock)
-
-#endif	 /* __alpha || __alpha__ */
-
-
-#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC, GCC and HP compilers */
-/*
- * HP's PA-RISC
- *
- * See src/backend/port/hpux/tas.c.template for details about LDCWX.  Because
- * LDCWX requires a 16-byte-aligned address, we declare slock_t as a 16-byte
- * struct.  The active word in the struct is whichever has the aligned address;
- * the other three words just sit at -1.
- *
- * When using gcc, we can inline the required assembly code.
- */
-#define HAS_TEST_AND_SET
-
-typedef struct
-{
-	int			sema[4];
-} slock_t;
-
-#define TAS_ACTIVE_WORD(lock)	((volatile int *) (((uintptr_t) (lock) + 15) & ~15))
-
-#if defined(__GNUC__)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	volatile int *lockword = TAS_ACTIVE_WORD(lock);
-	register int lockval;
-
-	__asm__ __volatile__(
-		"	ldcwx	0(0,%2),%0	\n"
-:		"=r"(lockval), "+m"(*lockword)
-:		"r"(lockword)
-:		"memory");
-	return (lockval == 0);
-}
-
-#endif /* __GNUC__ */
-
-#define S_UNLOCK(lock)	(*TAS_ACTIVE_WORD(lock) = -1)
-
-#define S_INIT_LOCK(lock) \
-	do { \
-		volatile slock_t *lock_ = (lock); \
-		lock_->sema[0] = -1; \
-		lock_->sema[1] = -1; \
-		lock_->sema[2] = -1; \
-		lock_->sema[3] = -1; \
-	} while (0)
-
-#define S_LOCK_FREE(lock)	(*TAS_ACTIVE_WORD(lock) != 0)
-
-#endif	 /* __hppa || __hppa__ */
-
-
-#if defined(__hpux) && defined(__ia64) && !defined(__GNUC__)
-/*
- * HP-UX on Itanium, non-gcc compiler
- *
- * We assume that the compiler enforces strict ordering of loads/stores on
- * volatile data (see comments on the gcc-version earlier in this file).
- * Note that this assumption does *not* hold if you use the
- * +Ovolatile=__unordered option on the HP-UX compiler, so don't do that.
- *
- * See also Implementing Spinlocks on the Intel Itanium Architecture and
- * PA-RISC, by Tor Ekqvist and David Graves, for more information.  As of
- * this writing, version 1.0 of the manual is available at:
- * http://h21007.www2.hp.com/portal/download/files/unprot/itanium/spinlocks.pdf
- */
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#include <ia64/sys/inline.h>
-#define TAS(lock) _Asm_xchg(_SZ_W, lock, 1, _LDHINT_NONE)
-/* On IA64, it's a win to use a non-locking test before the xchg proper */
-#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
-
-#endif	/* HPUX on IA64, non gcc */
-
-#if defined(_AIX)	/* AIX */
-/*
- * AIX (POWER)
- */
-#define HAS_TEST_AND_SET
-
-#include <sys/atomic_op.h>
-
-typedef int slock_t;
-
-#define TAS(lock)			_check_lock((slock_t *) (lock), 0, 1)
-#define S_UNLOCK(lock)		_clear_lock((slock_t *) (lock), 0)
-#endif	 /* _AIX */
-
-
-/* These are in s_lock.c */
-
-#if defined(__SUNPRO_C) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc))
-#define HAS_TEST_AND_SET
-
-#if defined(__i386) || defined(__x86_64__) || defined(__sparcv9) || defined(__sparcv8plus)
-typedef unsigned int slock_t;
-#else
-typedef unsigned char slock_t;
-#endif
-
-extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with,
-									  slock_t cmp);
-
-#define TAS(a) (pg_atomic_cas((a), 1, 0) != 0)
-#endif
-
-
-#ifdef WIN32_ONLY_COMPILER
-typedef LONG slock_t;
-
-#define HAS_TEST_AND_SET
-#define TAS(lock) (InterlockedCompareExchange(lock, 1, 0))
-
-#define SPIN_DELAY() spin_delay()
-
-/* If using Visual C++ on Win64, inline assembly is unavailable.
- * Use a _mm_pause instrinsic instead of rep nop.
- */
-#if defined(_WIN64)
-static __forceinline void
-spin_delay(void)
-{
-	_mm_pause();
-}
-#else
-static __forceinline void
-spin_delay(void)
-{
-	/* See comment for gcc code. Same code, MASM syntax */
-	__asm rep nop;
-}
-#endif
-
-#endif
-
-
-#endif	/* !defined(HAS_TEST_AND_SET) */
-
-
-/* Blow up if we didn't have any way to do spinlocks */
-#ifndef HAS_TEST_AND_SET
-#error PostgreSQL does not have native spinlock support on this platform.  To continue the compilation, rerun configure using --disable-spinlocks.  However, performance will be poor.  Please report this to pgsql-bugs@postgresql.org.
-#endif
-
-
-#else	/* !HAVE_SPINLOCKS */
-
-
-/*
- * Fake spinlock implementation using semaphores --- slow and prone
- * to fall foul of kernel limits on number of semaphores, so don't use this
- * unless you must!  The subroutines appear in spin.c.
- */
-typedef PGSemaphoreData slock_t;
-
-extern bool s_lock_free_sema(volatile slock_t *lock);
-extern void s_unlock_sema(volatile slock_t *lock);
-extern void s_init_lock_sema(volatile slock_t *lock);
-extern int	tas_sema(volatile slock_t *lock);
-
-#define S_LOCK_FREE(lock)	s_lock_free_sema(lock)
-#define S_UNLOCK(lock)	 s_unlock_sema(lock)
-#define S_INIT_LOCK(lock)	s_init_lock_sema(lock)
-#define TAS(lock)	tas_sema(lock)
-
-
-#endif	/* HAVE_SPINLOCKS */
-
-
-/*
- * Default Definitions - override these above as needed.
- */
-
-#if !defined(S_LOCK)
+typedef pg_atomic_flag slock_t;
+#define TAS(lock)			(!pg_atomic_test_set_flag(lock))
+#define TAS_SPIN(lock)		(pg_atomic_unlocked_test_flag(lock) ? 1 : TAS(lock))
+#define S_UNLOCK(lock)		pg_atomic_clear_flag(lock)
+#define S_INIT_LOCK(lock)	pg_atomic_init_flag(lock)
+#define SPIN_DELAY()		pg_spin_delay()
 #define S_LOCK(lock) \
 	(TAS(lock) ? s_lock((lock), __FILE__, __LINE__) : 0)
-#endif	 /* S_LOCK */
-
-#if !defined(S_LOCK_FREE)
-#define S_LOCK_FREE(lock)	(*(lock) == 0)
-#endif	 /* S_LOCK_FREE */
-
-#if !defined(S_UNLOCK)
-#define S_UNLOCK(lock)		(*((volatile slock_t *) (lock)) = 0)
-#endif	 /* S_UNLOCK */
-
-#if !defined(S_INIT_LOCK)
-#define S_INIT_LOCK(lock)	S_UNLOCK(lock)
-#endif	 /* S_INIT_LOCK */
-
-#if !defined(SPIN_DELAY)
-#define SPIN_DELAY()	((void) 0)
-#endif	 /* SPIN_DELAY */
-
-#if !defined(TAS)
-extern int	tas(volatile slock_t *lock);		/* in port/.../tas.s, or
-												 * s_lock.c */
-
-#define TAS(lock)		tas(lock)
-#endif	 /* TAS */
-
-#if !defined(TAS_SPIN)
-#define TAS_SPIN(lock)	TAS(lock)
-#endif	 /* TAS_SPIN */
-
 
 /*
  * Platform-independent out-of-line support routines
diff --git a/src/test/regress/expected/lock.out b/src/test/regress/expected/lock.out
index 0d7c3ba..fd27344 100644
--- a/src/test/regress/expected/lock.out
+++ b/src/test/regress/expected/lock.out
@@ -60,3 +60,11 @@ DROP TABLE lock_tbl2;
 DROP TABLE lock_tbl1;
 DROP SCHEMA lock_schema1 CASCADE;
 DROP ROLE regress_rol_lock1;
+-- atomic ops tests
+RESET search_path;
+SELECT test_atomic_ops();
+ test_atomic_ops 
+-----------------
+ t
+(1 row)
+
diff --git a/src/test/regress/input/create_function_1.source b/src/test/regress/input/create_function_1.source
index aef1518..1fded84 100644
--- a/src/test/regress/input/create_function_1.source
+++ b/src/test/regress/input/create_function_1.source
@@ -57,6 +57,11 @@ CREATE FUNCTION make_tuple_indirect (record)
         AS '@libdir@/regress@DLSUFFIX@'
         LANGUAGE C STRICT;
 
+CREATE FUNCTION test_atomic_ops()
+    RETURNS bool
+    AS '@libdir@/regress@DLSUFFIX@'
+    LANGUAGE C;
+
 -- Things that shouldn't work:
 
 CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
diff --git a/src/test/regress/output/create_function_1.source b/src/test/regress/output/create_function_1.source
index 9761d12..9926c90 100644
--- a/src/test/regress/output/create_function_1.source
+++ b/src/test/regress/output/create_function_1.source
@@ -51,6 +51,10 @@ CREATE FUNCTION make_tuple_indirect (record)
         RETURNS record
         AS '@libdir@/regress@DLSUFFIX@'
         LANGUAGE C STRICT;
+CREATE FUNCTION test_atomic_ops()
+    RETURNS bool
+    AS '@libdir@/regress@DLSUFFIX@'
+    LANGUAGE C;
 -- Things that shouldn't work:
 CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
     AS 'SELECT ''not an integer'';';
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 3bd8a15..1db996a 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -16,6 +16,7 @@
 #include "commands/trigger.h"
 #include "executor/executor.h"
 #include "executor/spi.h"
+#include "storage/atomics.h"
 #include "utils/builtins.h"
 #include "utils/geo_decls.h"
 #include "utils/rel.h"
@@ -40,6 +41,7 @@ extern int	oldstyle_length(int n, text *t);
 extern Datum int44in(PG_FUNCTION_ARGS);
 extern Datum int44out(PG_FUNCTION_ARGS);
 extern Datum make_tuple_indirect(PG_FUNCTION_ARGS);
+extern Datum test_atomic_ops(PG_FUNCTION_ARGS);
 
 #ifdef PG_MODULE_MAGIC
 PG_MODULE_MAGIC;
@@ -829,3 +831,214 @@ make_tuple_indirect(PG_FUNCTION_ARGS)
 
 	PG_RETURN_HEAPTUPLEHEADER(newtup->t_data);
 }
+
+
+static pg_atomic_flag global_flag = PG_ATOMIC_INIT_FLAG();
+
+static void
+test_atomic_flag(void)
+{
+	pg_atomic_flag flag;
+
+	/* check the global flag */
+	if (pg_atomic_unlocked_test_flag(&global_flag))
+		elog(ERROR, "global_flag: unduly set");
+
+	if (!pg_atomic_test_set_flag(&global_flag))
+		elog(ERROR, "global_flag: couldn't set");
+
+	if (pg_atomic_test_set_flag(&global_flag))
+		elog(ERROR, "global_flag: set spuriously #1");
+
+	pg_atomic_clear_flag(&global_flag);
+
+	if (!pg_atomic_test_set_flag(&global_flag))
+		elog(ERROR, "global_flag: couldn't set");
+
+	/* check the local flag */
+	pg_atomic_init_flag(&flag);
+
+	if (pg_atomic_unlocked_test_flag(&flag))
+		elog(ERROR, "flag: unduly set");
+
+	if (!pg_atomic_test_set_flag(&flag))
+		elog(ERROR, "flag: couldn't set");
+
+	if (pg_atomic_test_set_flag(&flag))
+		elog(ERROR, "flag: set spuriously #2");
+
+	pg_atomic_clear_flag(&flag);
+
+	if (!pg_atomic_test_set_flag(&flag))
+		elog(ERROR, "flag: couldn't set");
+}
+
+#ifdef PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static void
+test_atomic_uint32(void)
+{
+	pg_atomic_uint32 var;
+	uint32 expected;
+	int i;
+
+	pg_atomic_init_u32(&var, 0);
+
+	if (pg_atomic_read_u32(&var) != 0)
+		elog(ERROR, "atomic_read_u32() #1 wrong");
+
+	pg_atomic_write_u32(&var, 3);
+
+	if (pg_atomic_read_u32(&var) != 3)
+		elog(ERROR, "atomic_read_u32() #2 wrong");
+
+	if (pg_atomic_fetch_add_u32(&var, 1) != 3)
+		elog(ERROR, "atomic_fetch_add_u32() #1 wrong");
+
+	if (pg_atomic_fetch_sub_u32(&var, 1) != 4)
+		elog(ERROR, "atomic_fetch_sub_u32() #1 wrong");
+
+	if (pg_atomic_sub_fetch_u32(&var, 3) != 0)
+		elog(ERROR, "atomic_sub_fetch_u32() #1 wrong");
+
+	if (pg_atomic_add_fetch_u32(&var, 10) != 10)
+		elog(ERROR, "atomic_add_fetch_u32() #1 wrong");
+
+	if (pg_atomic_exchange_u32(&var, 5) != 10)
+		elog(ERROR, "pg_atomic_exchange_u32() #1 wrong");
+
+	if (pg_atomic_exchange_u32(&var, 0) != 5)
+		elog(ERROR, "pg_atomic_exchange_u32() #0 wrong");
+
+	/* fail exchange because of old expected */
+	expected = 10;
+	if (pg_atomic_compare_exchange_u32(&var, &expected, 1))
+		elog(ERROR, "atomic_compare_exchange_u32() changed value spuriously");
+
+	/* CAS is allowed to fail due to interrupts, try a couple of times */
+	for (i = 0; i < 1000; i++)
+	{
+		expected = 0;
+		if (!pg_atomic_compare_exchange_u32(&var, &expected, 1))
+			break;
+	}
+	if (i == 1000)
+		elog(ERROR, "atomic_compare_exchange_u32() never succeeded");
+	if (pg_atomic_read_u32(&var) != 1)
+		elog(ERROR, "atomic_compare_exchange_u32() didn't set value properly");
+
+	pg_atomic_write_u32(&var, 0);
+
+	/* try setting flagbits */
+	if (pg_atomic_fetch_or_u32(&var, 1) & 1)
+		elog(ERROR, "pg_atomic_fetch_or_u32() #1 wrong");
+
+	if (!(pg_atomic_fetch_or_u32(&var, 2) & 1))
+		elog(ERROR, "pg_atomic_fetch_or_u32() #2 wrong");
+
+	if (pg_atomic_read_u32(&var) != 3)
+		elog(ERROR, "invalid result after pg_atomic_fetch_or_u32()");
+
+	/* try clearing flagbits */
+	if ((pg_atomic_fetch_and_u32(&var, ~2) & 3) != 3)
+		elog(ERROR, "pg_atomic_fetch_and_u32() #1 wrong");
+
+	if (pg_atomic_fetch_and_u32(&var, ~1) != 1)
+		elog(ERROR, "pg_atomic_fetch_and_u32() #2 wrong: is %u",
+			 pg_atomic_read_u32(&var));
+	/* no bits set anymore */
+	if (pg_atomic_fetch_and_u32(&var, ~0) != 0)
+		elog(ERROR, "pg_atomic_fetch_and_u32() #3 wrong");
+}
+#endif /* PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 */
+
+#ifdef PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static void
+test_atomic_uint64(void)
+{
+	pg_atomic_uint64 var;
+	uint64 expected;
+	int i;
+
+	pg_atomic_init_u64(&var, 0);
+
+	if (pg_atomic_read_u64(&var) != 0)
+		elog(ERROR, "atomic_read_u64() #1 wrong");
+
+	pg_atomic_write_u64(&var, 3);
+
+	if (pg_atomic_read_u64(&var) != 3)
+		elog(ERROR, "atomic_read_u64() #2 wrong");
+
+	if (pg_atomic_fetch_add_u64(&var, 1) != 3)
+		elog(ERROR, "atomic_fetch_add_u64() #1 wrong");
+
+	if (pg_atomic_fetch_sub_u64(&var, 1) != 4)
+		elog(ERROR, "atomic_fetch_sub_u64() #1 wrong");
+
+	if (pg_atomic_sub_fetch_u64(&var, 3) != 0)
+		elog(ERROR, "atomic_sub_fetch_u64() #1 wrong");
+
+	if (pg_atomic_add_fetch_u64(&var, 10) != 10)
+		elog(ERROR, "atomic_add_fetch_u64() #1 wrong");
+
+	if (pg_atomic_exchange_u64(&var, 5) != 10)
+		elog(ERROR, "pg_atomic_exchange_u64() #1 wrong");
+
+	if (pg_atomic_exchange_u64(&var, 0) != 5)
+		elog(ERROR, "pg_atomic_exchange_u64() #0 wrong");
+
+	/* fail exchange because of old expected */
+	expected = 10;
+	if (pg_atomic_compare_exchange_u64(&var, &expected, 1))
+		elog(ERROR, "atomic_compare_exchange_u64() changed value spuriously");
+
+	/* CAS is allowed to fail due to interrupts, try a couple of times */
+	for (i = 0; i < 100; i++)
+	{
+		expected = 0;
+		if (!pg_atomic_compare_exchange_u64(&var, &expected, 1))
+			break;
+	}
+	if (i == 100)
+		elog(ERROR, "atomic_compare_exchange_u64() never succeeded");
+	if (pg_atomic_read_u64(&var) != 1)
+		elog(ERROR, "atomic_compare_exchange_u64() didn't set value properly");
+
+	pg_atomic_write_u64(&var, 0);
+
+	/* try setting flagbits */
+	if (pg_atomic_fetch_or_u64(&var, 1) & 1)
+		elog(ERROR, "pg_atomic_fetch_or_u64() #1 wrong");
+
+	if (!(pg_atomic_fetch_or_u64(&var, 2) & 1))
+		elog(ERROR, "pg_atomic_fetch_or_u64() #2 wrong");
+
+	if (pg_atomic_read_u64(&var) != 3)
+		elog(ERROR, "invalid result after pg_atomic_fetch_or_u64()");
+
+	/* try clearing flagbits */
+	if ((pg_atomic_fetch_and_u64(&var, ~2) & 3) != 3)
+		elog(ERROR, "pg_atomic_fetch_and_u64() #1 wrong");
+
+	if (pg_atomic_fetch_and_u64(&var, ~1) != 1)
+		elog(ERROR, "pg_atomic_fetch_and_u64() #2 wrong: is %u",
+			 pg_atomic_read_u64(&var));
+	/* no bits set anymore */
+	if (pg_atomic_fetch_and_u64(&var, ~0) != 0)
+		elog(ERROR, "pg_atomic_fetch_and_u64() #3 wrong");
+}
+#endif /* PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 */
+
+
+extern Datum test_atomic_ops(PG_FUNCTION_ARGS)
+{
+	test_atomic_flag();
+#ifdef PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+	test_atomic_uint32();
+#endif
+#ifdef PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+	test_atomic_uint64();
+#endif
+
+	PG_RETURN_BOOL(true);
+}
diff --git a/src/test/regress/sql/lock.sql b/src/test/regress/sql/lock.sql
index dda212f..567e8bc 100644
--- a/src/test/regress/sql/lock.sql
+++ b/src/test/regress/sql/lock.sql
@@ -64,3 +64,8 @@ DROP TABLE lock_tbl2;
 DROP TABLE lock_tbl1;
 DROP SCHEMA lock_schema1 CASCADE;
 DROP ROLE regress_rol_lock1;
+
+
+-- atomic ops tests
+RESET search_path;
+SELECT test_atomic_ops();
-- 
1.8.5.rc1.dirty