>From 17f35a602a315fb7793e56e2905e0d09ad774524 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Mon, 18 Nov 2013 14:45:11 +0100
Subject: [PATCH 2/4] Very basic atomic ops implementation

Only gcc has been tested, stub implementations for
* msvc
* HUPX acc
* IBM xlc
* Sun/Oracle Sunpro compiler
are included.

All implementations only provide the bare minimum of operations for
now and rely on emulating more complex operations via
compare_and_swap().

Most of s_lock.h is removed and replaced by usage of the atomics ops.
---
 config/c-compiler.m4                             |  77 ++
 configure                                        | 321 +++++++--
 configure.in                                     |  17 +-
 src/backend/storage/lmgr/Makefile                |   2 +-
 src/backend/storage/lmgr/atomics.c               |  26 +
 src/backend/storage/lmgr/spin.c                  |   8 -
 src/include/c.h                                  |   7 +
 src/include/pg_config.h.in                       |  11 +-
 src/include/storage/atomics-arch-alpha.h         |  25 +
 src/include/storage/atomics-arch-amd64.h         |  56 ++
 src/include/storage/atomics-arch-arm.h           |  29 +
 src/include/storage/atomics-arch-hppa.h          |  17 +
 src/include/storage/atomics-arch-i386.h          |  96 +++
 src/include/storage/atomics-arch-ia64.h          |  27 +
 src/include/storage/atomics-arch-ppc.h           |  25 +
 src/include/storage/atomics-generic-acc.h        |  99 +++
 src/include/storage/atomics-generic-gcc.h        | 174 +++++
 src/include/storage/atomics-generic-msvc.h       |  83 +++
 src/include/storage/atomics-generic-sunpro.h     |  66 ++
 src/include/storage/atomics-generic-xlc.h        |  84 +++
 src/include/storage/atomics-generic.h            | 402 +++++++++++
 src/include/storage/atomics.h                    | 596 +++++++++++++++
 src/include/storage/barrier.h                    | 137 +---
 src/include/storage/s_lock.h                     | 880 +----------------------
 src/test/regress/expected/lock.out               |   8 +
 src/test/regress/input/create_function_1.source  |   5 +
 src/test/regress/output/create_function_1.source |   4 +
 src/test/regress/regress.c                       | 213 ++++++
 src/test/regress/sql/lock.sql                    |   5 +
 29 files changed, 2412 insertions(+), 1088 deletions(-)
 create mode 100644 src/backend/storage/lmgr/atomics.c
 create mode 100644 src/include/storage/atomics-arch-alpha.h
 create mode 100644 src/include/storage/atomics-arch-amd64.h
 create mode 100644 src/include/storage/atomics-arch-arm.h
 create mode 100644 src/include/storage/atomics-arch-hppa.h
 create mode 100644 src/include/storage/atomics-arch-i386.h
 create mode 100644 src/include/storage/atomics-arch-ia64.h
 create mode 100644 src/include/storage/atomics-arch-ppc.h
 create mode 100644 src/include/storage/atomics-generic-acc.h
 create mode 100644 src/include/storage/atomics-generic-gcc.h
 create mode 100644 src/include/storage/atomics-generic-msvc.h
 create mode 100644 src/include/storage/atomics-generic-sunpro.h
 create mode 100644 src/include/storage/atomics-generic-xlc.h
 create mode 100644 src/include/storage/atomics-generic.h
 create mode 100644 src/include/storage/atomics.h

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 4ba3236..c24cb59 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -289,3 +289,80 @@ if test x"$Ac_cachevar" = x"yes"; then
 fi
 undefine([Ac_cachevar])dnl
 ])# PGAC_PROG_CC_LDFLAGS_OPT
+
+AC_DEFUN([PGAC_HAVE_GCC__SYNC_INT32_ATOMICS],
+[AC_CACHE_CHECK(for __builtin_constant_p, pgac_cv__builtin_constant_p,
+[AC_TRY_COMPILE([static int x; static int y[__builtin_constant_p(x) ? x : 1];],
+[],
+[pgac_cv__builtin_constant_p=yes],
+[pgac_cv__builtin_constant_p=no])])
+if test x"$pgac_cv__builtin_constant_p" = xyes ; then
+AC_DEFINE(HAVE__BUILTIN_CONSTANT_P, 1,
+          [Define to 1 if your compiler understands __builtin_constant_p.])
+fi])# PGAC_C_BUILTIN_CONSTANT_P
+
+# PGAC_HAVE_GCC__SYNC_INT32_TAS
+# -------------------------
+# Check if the C compiler understands __sync_lock_test_and_set(),
+# and define HAVE_GCC__SYNC_INT32_TAS
+#
+# NB: There are platforms where test_and_set is available but compare_and_swap
+# is not, so test this separa
+AC_DEFUN([PGAC_HAVE_GCC__SYNC_INT32_TAS],
+[AC_CACHE_CHECK(for builtin locking functions, pgac_cv_gcc_sync_int32_tas,
+[AC_TRY_LINK([],
+  [int lock = 0;
+   __sync_lock_test_and_set(&lock, 1);
+   __sync_lock_release(&lock);],
+  [pgac_cv_gcc_sync_int32_tas="yes"],
+  [pgac_cv_gcc_sync_int32_tas="no"])])
+if test x"$pgac_cv_gcc_sync_int32_tas" = x"yes"; then
+  AC_DEFINE(HAVE_GCC__SYNC_INT32_TAS, 1, [Define to 1 if you have __sync_lock_test_and_set(int *) and friends.])
+fi])# PGAC_HAVE_GCC__SYNC_INT32_TAS
+
+# PGAC_HAVE_GCC__SYNC_INT32_CAS
+# -------------------------
+# Check if the C compiler understands __sync_compare_and_swap() for 32bit
+# types, and define HAVE_GCC__SYNC_INT32_CAS if so.
+AC_DEFUN([PGAC_HAVE_GCC__SYNC_INT32_CAS],
+[AC_CACHE_CHECK(for builtin __sync int32 atomic operations, pgac_cv_gcc_sync_int32_cas,
+[AC_TRY_LINK([],
+  [int val = 0;
+   __sync_val_compare_and_swap(&val, 0, 37);],
+  [pgac_cv_gcc_sync_int32_cas="yes"],
+  [pgac_cv_gcc_sync_int32_cas="no"])])
+if test x"$pgac_cv_gcc_sync_int32_cas" = x"yes"; then
+  AC_DEFINE(HAVE_GCC__SYNC_INT32_CAS, 1, [Define to 1 if you have __sync_compare_and_swap(int *, int, int).])
+fi])# PGAC_HAVE_GCC__SYNC_INT32_CAS
+
+# PGAC_HAVE_GCC__SYNC_INT64_CAS
+# -------------------------
+# Check if the C compiler understands __sync_compare_and_swap() for 64bit
+# types, and define HAVE_GCC__SYNC_INT64_CAS if so.
+AC_DEFUN([PGAC_HAVE_GCC__SYNC_INT64_CAS],
+[AC_CACHE_CHECK(for builtin __sync int64 atomic operations, pgac_cv_gcc_sync_int64_cas,
+[AC_TRY_LINK([],
+  [PG_INT64_TYPE lock = 0;
+   __sync_val_compare_and_swap(&lock, 0, (PG_INT64_TYPE) 37);],
+  [pgac_cv_gcc_sync_int64_cas="yes"],
+  [pgac_cv_gcc_sync_int64_cas="no"])])
+if test x"$pgac_cv_gcc_sync_int64_cas" = x"yes"; then
+  AC_DEFINE(HAVE_GCC__SYNC_INT64_CAS, 1, [Define to 1 if you have __sync_compare_and_swap(int64 *, int64, int64).])
+fi])# PGAC_HAVE_GCC__SYNC_INT64_CAS
+
+# PGAC_HAVE_GCC__ATOMIC_INT32_CAS
+# -------------------------
+
+# Check if the C compiler understands __atomic_compare_exchange_n() for 32bit
+# types, and define HAVE_GCC__ATOMIC_INT32_CAS if so.
+AC_DEFUN([PGAC_HAVE_GCC__ATOMIC_INT32_CAS],
+[AC_CACHE_CHECK(for builtin __atomic int32 atomic operations, pgac_cv_gcc_atomic_int32_cas,
+[AC_TRY_LINK([],
+  [int val = 0;
+   int expect = 0;
+   __atomic_compare_exchange_n(&val, &expect, 37, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);],
+  [pgac_cv_gcc_atomic_int32_cas="yes"],
+  [pgac_cv_gcc_atomic_int32_cas="no"])])
+if test x"$pgac_cv_gcc_atomic_int32_cas" = x"yes"; then
+  AC_DEFINE(HAVE_GCC__ATOMIC_INT32_CAS, 1, [Define to 1 if you have __atomic_compare_exchange_n(int *, int *, int).])
+fi])# PGAC_HAVE_GCC__ATOMIC_INT32_CAS
diff --git a/configure b/configure
index 74e34eb..04cd0ec 100755
--- a/configure
+++ b/configure
@@ -22984,71 +22984,6 @@ fi
 done
 
 
-{ $as_echo "$as_me:$LINENO: checking for builtin locking functions" >&5
-$as_echo_n "checking for builtin locking functions... " >&6; }
-if test "${pgac_cv_gcc_int_atomics+set}" = set; then
-  $as_echo_n "(cached) " >&6
-else
-  cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
-
-int
-main ()
-{
-int lock = 0;
-   __sync_lock_test_and_set(&lock, 1);
-   __sync_lock_release(&lock);
-  ;
-  return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (ac_try="$ac_link"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
-$as_echo "$ac_try_echo") >&5
-  (eval "$ac_link") 2>conftest.er1
-  ac_status=$?
-  grep -v '^ *+' conftest.er1 >conftest.err
-  rm -f conftest.er1
-  cat conftest.err >&5
-  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext && {
-	 test "$cross_compiling" = yes ||
-	 $as_test_x conftest$ac_exeext
-       }; then
-  pgac_cv_gcc_int_atomics="yes"
-else
-  $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-	pgac_cv_gcc_int_atomics="no"
-fi
-
-rm -rf conftest.dSYM
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
-      conftest$ac_exeext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:$LINENO: result: $pgac_cv_gcc_int_atomics" >&5
-$as_echo "$pgac_cv_gcc_int_atomics" >&6; }
-if test x"$pgac_cv_gcc_int_atomics" = x"yes"; then
-
-cat >>confdefs.h <<\_ACEOF
-#define HAVE_GCC_INT_ATOMICS 1
-_ACEOF
-
-fi
-
 # Lastly, restore full LIBS list and check for readline/libedit symbols
 LIBS="$LIBS_including_readline"
 
@@ -28815,6 +28750,262 @@ _ACEOF
 fi
 
 
+# Check for various atomic operations now that we have checked how to declare
+# 64bit integers.
+{ $as_echo "$as_me:$LINENO: checking for builtin locking functions" >&5
+$as_echo_n "checking for builtin locking functions... " >&6; }
+if test "${pgac_cv_gcc_sync_int32_tas+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+int lock = 0;
+   __sync_lock_test_and_set(&lock, 1);
+   __sync_lock_release(&lock);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 $as_test_x conftest$ac_exeext
+       }; then
+  pgac_cv_gcc_sync_int32_tas="yes"
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	pgac_cv_gcc_sync_int32_tas="no"
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:$LINENO: result: $pgac_cv_gcc_sync_int32_tas" >&5
+$as_echo "$pgac_cv_gcc_sync_int32_tas" >&6; }
+if test x"$pgac_cv_gcc_sync_int32_tas" = x"yes"; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_GCC__SYNC_INT32_TAS 1
+_ACEOF
+
+fi
+{ $as_echo "$as_me:$LINENO: checking for builtin __sync int32 atomic operations" >&5
+$as_echo_n "checking for builtin __sync int32 atomic operations... " >&6; }
+if test "${pgac_cv_gcc_sync_int32_cas+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+int val = 0;
+   __sync_val_compare_and_swap(&val, 0, 37);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 $as_test_x conftest$ac_exeext
+       }; then
+  pgac_cv_gcc_sync_int32_cas="yes"
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	pgac_cv_gcc_sync_int32_cas="no"
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:$LINENO: result: $pgac_cv_gcc_sync_int32_cas" >&5
+$as_echo "$pgac_cv_gcc_sync_int32_cas" >&6; }
+if test x"$pgac_cv_gcc_sync_int32_cas" = x"yes"; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_GCC__SYNC_INT32_CAS 1
+_ACEOF
+
+fi
+{ $as_echo "$as_me:$LINENO: checking for builtin __sync int64 atomic operations" >&5
+$as_echo_n "checking for builtin __sync int64 atomic operations... " >&6; }
+if test "${pgac_cv_gcc_sync_int64_cas+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+PG_INT64_TYPE lock = 0;
+   __sync_val_compare_and_swap(&lock, 0, (PG_INT64_TYPE) 37);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 $as_test_x conftest$ac_exeext
+       }; then
+  pgac_cv_gcc_sync_int64_cas="yes"
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	pgac_cv_gcc_sync_int64_cas="no"
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:$LINENO: result: $pgac_cv_gcc_sync_int64_cas" >&5
+$as_echo "$pgac_cv_gcc_sync_int64_cas" >&6; }
+if test x"$pgac_cv_gcc_sync_int64_cas" = x"yes"; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_GCC__SYNC_INT64_CAS 1
+_ACEOF
+
+fi
+{ $as_echo "$as_me:$LINENO: checking for builtin __atomic int32 atomic operations" >&5
+$as_echo_n "checking for builtin __atomic int32 atomic operations... " >&6; }
+if test "${pgac_cv_gcc_atomic_int32_cas+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+int val = 0;
+   int expect = 0;
+   __atomic_compare_exchange_n(&val, &expect, 37, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 $as_test_x conftest$ac_exeext
+       }; then
+  pgac_cv_gcc_atomic_int32_cas="yes"
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	pgac_cv_gcc_atomic_int32_cas="no"
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:$LINENO: result: $pgac_cv_gcc_atomic_int32_cas" >&5
+$as_echo "$pgac_cv_gcc_atomic_int32_cas" >&6; }
+if test x"$pgac_cv_gcc_atomic_int32_cas" = x"yes"; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_GCC__ATOMIC_INT32_CAS 1
+_ACEOF
+
+fi
 
 if test "$PORTNAME" != "win32"
 then
diff --git a/configure.in b/configure.in
index 304399e..e9d8309 100644
--- a/configure.in
+++ b/configure.in
@@ -1453,17 +1453,6 @@ fi
 AC_CHECK_FUNCS([strtoll strtoq], [break])
 AC_CHECK_FUNCS([strtoull strtouq], [break])
 
-AC_CACHE_CHECK([for builtin locking functions], pgac_cv_gcc_int_atomics,
-[AC_TRY_LINK([],
-  [int lock = 0;
-   __sync_lock_test_and_set(&lock, 1);
-   __sync_lock_release(&lock);],
-  [pgac_cv_gcc_int_atomics="yes"],
-  [pgac_cv_gcc_int_atomics="no"])])
-if test x"$pgac_cv_gcc_int_atomics" = x"yes"; then
-  AC_DEFINE(HAVE_GCC_INT_ATOMICS, 1, [Define to 1 if you have __sync_lock_test_and_set(int *) and friends.])
-fi
-
 # Lastly, restore full LIBS list and check for readline/libedit symbols
 LIBS="$LIBS_including_readline"
 
@@ -1738,6 +1727,12 @@ AC_CHECK_TYPES([int8, uint8, int64, uint64], [], [],
 # C, but is missing on some old platforms.
 AC_CHECK_TYPES(sig_atomic_t, [], [], [#include <signal.h>])
 
+# Check for various atomic operations now that we have checked how to declare
+# 64bit integers.
+PGAC_HAVE_GCC__SYNC_INT32_TAS
+PGAC_HAVE_GCC__SYNC_INT32_CAS
+PGAC_HAVE_GCC__SYNC_INT64_CAS
+PGAC_HAVE_GCC__ATOMIC_INT32_CAS
 
 if test "$PORTNAME" != "win32"
 then
diff --git a/src/backend/storage/lmgr/Makefile b/src/backend/storage/lmgr/Makefile
index e12a854..be95d50 100644
--- a/src/backend/storage/lmgr/Makefile
+++ b/src/backend/storage/lmgr/Makefile
@@ -12,7 +12,7 @@ subdir = src/backend/storage/lmgr
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o predicate.o
+OBJS = atomics.o lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o predicate.o
 
 include $(top_srcdir)/src/backend/common.mk
 
diff --git a/src/backend/storage/lmgr/atomics.c b/src/backend/storage/lmgr/atomics.c
new file mode 100644
index 0000000..af95153
--- /dev/null
+++ b/src/backend/storage/lmgr/atomics.c
@@ -0,0 +1,26 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics.c
+ *	   Non-Inline parts of the atomics implementation
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/storage/lmgr/atomics.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+/*
+ * We want the functions below to be inline; but if the compiler doesn't
+ * support that, fall back on providing them as regular functions.	See
+ * STATIC_IF_INLINE in c.h.
+ */
+#define ATOMICS_INCLUDE_DEFINITIONS
+
+#include "storage/atomics.h"
+
+#ifdef PG_USE_ATOMICS_EMULATION
+#endif
diff --git a/src/backend/storage/lmgr/spin.c b/src/backend/storage/lmgr/spin.c
index f054be8..469dd84 100644
--- a/src/backend/storage/lmgr/spin.c
+++ b/src/backend/storage/lmgr/spin.c
@@ -86,14 +86,6 @@ s_unlock_sema(volatile slock_t *lock)
 	PGSemaphoreUnlock((PGSemaphore) lock);
 }
 
-bool
-s_lock_free_sema(volatile slock_t *lock)
-{
-	/* We don't currently use S_LOCK_FREE anyway */
-	elog(ERROR, "spin.c does not support S_LOCK_FREE()");
-	return false;
-}
-
 int
 tas_sema(volatile slock_t *lock)
 {
diff --git a/src/include/c.h b/src/include/c.h
index 6e19c6d..347f394 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -866,6 +866,13 @@ typedef NameData *Name;
 #define STATIC_IF_INLINE
 #endif   /* PG_USE_INLINE */
 
+#ifdef PG_USE_INLINE
+#define STATIC_IF_INLINE_DECLARE static inline
+#else
+#define STATIC_IF_INLINE_DECLARE extern
+#endif   /* PG_USE_INLINE */
+
+
 /* ----------------------------------------------------------------
  *				Section 8:	random stuff
  * ----------------------------------------------------------------
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 5eac52d..3dd76a2 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -173,8 +173,17 @@
 /* Define to 1 if your compiler understands __FUNCTION__. */
 #undef HAVE_FUNCNAME__FUNCTION
 
+/* Define to 1 if you have __atomic_compare_exchange_n(int *, int *, int). */
+#undef HAVE_GCC__ATOMIC_INT32_CAS
+
+/* Define to 1 if you have __sync_compare_and_swap(int *, int, int). */
+#undef HAVE_GCC__SYNC_INT32_CAS
+
 /* Define to 1 if you have __sync_lock_test_and_set(int *) and friends. */
-#undef HAVE_GCC_INT_ATOMICS
+#undef HAVE_GCC__SYNC_INT32_TAS
+
+/* Define to 1 if you have __sync_compare_and_swap(int64 *, int64, int64). */
+#undef HAVE_GCC__SYNC_INT64_CAS
 
 /* Define to 1 if you have the `getaddrinfo' function. */
 #undef HAVE_GETADDRINFO
diff --git a/src/include/storage/atomics-arch-alpha.h b/src/include/storage/atomics-arch-alpha.h
new file mode 100644
index 0000000..06b7125
--- /dev/null
+++ b/src/include/storage/atomics-arch-alpha.h
@@ -0,0 +1,25 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-alpha.h
+ *	  Atomic operations considerations specific to Alpha
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-alpha.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#if defined(__GNUC__)
+/*
+ * Unlike all other known architectures, Alpha allows dependent reads to be
+ * reordered, but we don't currently find it necessary to provide a conditional
+ * read barrier to cover that case.  We might need to add that later.
+ */
+#define pg_memory_barrier()		__asm__ __volatile__ ("mb" : : : "memory")
+#define pg_read_barrier()		__asm__ __volatile__ ("rmb" : : : "memory")
+#define pg_write_barrier()		__asm__ __volatile__ ("wmb" : : : "memory")
+#endif
diff --git a/src/include/storage/atomics-arch-amd64.h b/src/include/storage/atomics-arch-amd64.h
new file mode 100644
index 0000000..90f4abc
--- /dev/null
+++ b/src/include/storage/atomics-arch-amd64.h
@@ -0,0 +1,56 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-amd64.h
+ *	  Atomic operations considerations specific to intel/amd x86-64
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-amd64.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * x86_64 has similar ordering characteristics to i386.
+ *
+ * Technically, some x86-ish chips support uncached memory access and/or
+ * special instructions that are weakly ordered.  In those cases we'd need
+ * the read and write barriers to be lfence and sfence.  But since we don't
+ * do those things, a compiler barrier should be enough.
+ */
+#if defined(__INTEL_COMPILER)
+#	define pg_memory_barrier_impl()		_mm_mfence()
+#elif defined(__GNUC__)
+#	define pg_memory_barrier_impl()		\
+	__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory")
+#endif
+
+#define pg_read_barrier_impl()		pg_compiler_barrier()
+#define pg_write_barrier_impl()		pg_compiler_barrier()
+
+#if defined(__GNUC__)
+
+#ifndef PG_HAS_SPIN_DELAY
+#define PG_HAS_SPIN_DELAY
+static __inline__ void
+pg_spin_delay_impl(void)
+{
+	/*
+	 * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
+	 * Opteron, but it may be of some use on EM64T, so we keep it.
+	 */
+	__asm__ __volatile__(
+		" rep; nop			\n");
+}
+#endif
+
+#elif defined(WIN32_ONLY_COMPILER)
+static __forceinline void
+pg_spin_delay_impl(void)
+{
+	_mm_pause();
+}
+#endif
diff --git a/src/include/storage/atomics-arch-arm.h b/src/include/storage/atomics-arch-arm.h
new file mode 100644
index 0000000..a00f8f5
--- /dev/null
+++ b/src/include/storage/atomics-arch-arm.h
@@ -0,0 +1,29 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-arm.h
+ *	  Atomic operations considerations specific to ARM
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-arm.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#error "should be included via atomics.h"
+#endif
+
+
+/*
+ * FIXME: Disable 32bit atomics for ARMs before v6 or error out
+ */
+
+/*
+ * 64 bit atomics on arm are implemented using kernel fallbacks and might be
+ * slow, so disable entirely for now.
+ */
+#define PG_DISABLE_64_BIT_ATOMICS
diff --git a/src/include/storage/atomics-arch-hppa.h b/src/include/storage/atomics-arch-hppa.h
new file mode 100644
index 0000000..8913801
--- /dev/null
+++ b/src/include/storage/atomics-arch-hppa.h
@@ -0,0 +1,17 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-alpha.h
+ *	  Atomic operations considerations specific to HPPA
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-hppa.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* HPPA doesn't do either read or write reordering */
+#define pg_memory_barrier_impl()		pg_compiler_barrier_impl()
diff --git a/src/include/storage/atomics-arch-i386.h b/src/include/storage/atomics-arch-i386.h
new file mode 100644
index 0000000..11d1b1e
--- /dev/null
+++ b/src/include/storage/atomics-arch-i386.h
@@ -0,0 +1,96 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-i386.h
+ *	  Atomic operations considerations specific to intel x86
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-i386.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * i386 does not allow loads to be reordered with other loads, or stores to be
+ * reordered with other stores, but a load can be performed before a subsequent
+ * store.
+ *
+ * "lock; addl" has worked for longer than "mfence".
+ */
+
+#if defined(__INTEL_COMPILER)
+#define pg_memory_barrier_impl()		_mm_mfence()
+#elif defined(__GNUC__)
+#define pg_memory_barrier_impl()		\
+	__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory")
+#endif
+
+#define pg_read_barrier_impl()		pg_compiler_barrier()
+#define pg_write_barrier_impl()		pg_compiler_barrier()
+
+#if defined(__GNUC__)
+
+/*
+ * Implement TAS if not available as a __sync builtin on very old gcc
+ * versions.
+ */
+#ifndef HAVE_GCC__SYNC_INT32_TAS
+static __inline__ int
+tas(volatile slock_t *lock)
+	register slock_t _res = 1;
+
+	__asm__ __volatile__(
+		"	lock			\n"
+		"	xchgb	%0,%1	\n"
+:		"+q"(_res), "+m"(*lock)
+:
+:		"memory", "cc");
+	return (int) _res;
+}
+#endif
+
+
+#ifndef PG_HAS_SPIN_DELAY
+#define PG_HAS_SPIN_DELAY
+static __inline__ void
+pg_spin_delay_impl(void)
+{
+	/*
+	 * This sequence is equivalent to the PAUSE instruction ("rep" is
+	 * ignored by old IA32 processors if the following instruction is
+	 * not a string operation); the IA-32 Architecture Software
+	 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
+	 * PAUSE in the inner loop of a spin lock is necessary for good
+	 * performance:
+	 *
+	 *     The PAUSE instruction improves the performance of IA-32
+	 *     processors supporting Hyper-Threading Technology when
+	 *     executing spin-wait loops and other routines where one
+	 *     thread is accessing a shared lock or semaphore in a tight
+	 *     polling loop. When executing a spin-wait loop, the
+	 *     processor can suffer a severe performance penalty when
+	 *     exiting the loop because it detects a possible memory order
+	 *     violation and flushes the core processor's pipeline. The
+	 *     PAUSE instruction provides a hint to the processor that the
+	 *     code sequence is a spin-wait loop. The processor uses this
+	 *     hint to avoid the memory order violation and prevent the
+	 *     pipeline flush. In addition, the PAUSE instruction
+	 *     de-pipelines the spin-wait loop to prevent it from
+	 *     consuming execution resources excessively.
+	 */
+	__asm__ __volatile__(
+		" rep; nop			\n");
+}
+#endif
+
+#elif defined(WIN32_ONLY_COMPILER)
+static __forceinline void
+pg_spin_delay_impl(void)
+{
+	/* See comment for gcc code. Same code, MASM syntax */
+	__asm rep nop;
+}
+#endif
diff --git a/src/include/storage/atomics-arch-ia64.h b/src/include/storage/atomics-arch-ia64.h
new file mode 100644
index 0000000..2863431
--- /dev/null
+++ b/src/include/storage/atomics-arch-ia64.h
@@ -0,0 +1,27 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-ia64.h
+ *	  Atomic operations considerations specific to intel itanium
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-ia64.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Itanium is weakly ordered, so read and write barriers require a full
+ * fence.
+ */
+#if defined(__INTEL_COMPILER)
+#	define pg_memory_barrier_impl()		__mf()
+#elif defined(__GNUC__)
+#	define pg_memory_barrier_impl()		__asm__ __volatile__ ("mf" : : : "memory")
+#elif defined(__hpux)
+#	define pg_compiler_barrier_impl()	_Asm_sched_fence()
+#	define pg_memory_barrier_impl()		_Asm_mf()
+#endif
diff --git a/src/include/storage/atomics-arch-ppc.h b/src/include/storage/atomics-arch-ppc.h
new file mode 100644
index 0000000..f785c99
--- /dev/null
+++ b/src/include/storage/atomics-arch-ppc.h
@@ -0,0 +1,25 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-arch-ppc.h
+ *	  Atomic operations considerations specific to PowerPC
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/storage/atomics-arch-ppc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#if defined(__GNUC__)
+/*
+ * lwsync orders loads with respect to each other, and similarly with stores.
+ * But a load can be performed before a subsequent store, so sync must be used
+ * for a full memory barrier.
+ */
+#define pg_memory_barrier_impl()	__asm__ __volatile__ ("sync" : : : "memory")
+#define pg_read_barrier_impl()		__asm__ __volatile__ ("lwsync" : : : "memory")
+#define pg_write_barrier_impl()		__asm__ __volatile__ ("lwsync" : : : "memory")
+#endif
diff --git a/src/include/storage/atomics-generic-acc.h b/src/include/storage/atomics-generic-acc.h
new file mode 100644
index 0000000..b1c9200
--- /dev/null
+++ b/src/include/storage/atomics-generic-acc.h
@@ -0,0 +1,99 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-generic-acc.h
+ *	  Atomic operations support when using HPs acc on HPUX
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * inline assembly for Itanium-based HP-UX:
+ *   http://h21007.www2.hp.com/portal/download/files/unprot/Itanium/inline_assem_ERS.pdf
+ * * Implementing Spinlocks on the Intel Â® Itanium Â® Architecture and PA-RISC
+ *   http://h21007.www2.hp.com/portal/download/files/unprot/itanium/spinlocks.pdf
+ *
+ * Itanium only supports a small set of numbers (6, -8, -4, -1, 1, 4, 8, 16)
+ * for atomic add/sub, so we just implement everything but compare_exchange via
+ * the compare_exchange fallbacks in atomics-generic.h.
+ *
+ * src/include/storage/atomics-generic-acc.h
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include <machine/sys/inline.h>
+
+/* IA64 always has 32/64 bit atomics */
+
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+typedef pg_atomic_uint32 pg_atomic_flag;
+
+typedef struct pg_atomic_uint64
+{
+	volatile uint64 value;
+} pg_atomic_uint64;
+
+/* declare outside the defined(PG_USE_INLINE) for visibility */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+
+
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+#define MINOR_FENCE (_Asm_fence) (_UP_CALL_FENCE | _UP_SYS_FENCE | \
+								 _DOWN_CALL_FENCE | _DOWN_SYS_FENCE )
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint32	current;
+
+	_Asm_mov_to_ar(_AREG_CCV, *expected, MINOR_FENCE);
+	/*
+	 * We want a barrier, not just release/acquire semantics.
+	 */
+	_Asm_mf();
+	/*
+	 * Notes:
+	 * DOWN_MEM_FENCE | _UP_MEM_FENCE prevents reordering by the compiler
+	 */
+	current =  _Asm_cmpxchg(_SZ_W, /* word */
+							_SEM_REL,
+							&ptr->value,
+							newval, _LDHINT_NONE,
+							_DOWN_MEM_FENCE | _UP_MEM_FENCE);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+
+	_Asm_mov_to_ar(_AREG_CCV, *expected, MINOR_FENCE);
+	_Asm_mf();
+	current =  _Asm_cmpxchg(_SZ_D, /* doubleword */
+							_SEM_REL,
+							&ptr->value,
+							newval, _LDHINT_NONE,
+							_DOWN_MEM_FENCE | _UP_MEM_FENCE);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#undef MINOR_FENCE
+
+#endif /* defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS) */
diff --git a/src/include/storage/atomics-generic-gcc.h b/src/include/storage/atomics-generic-gcc.h
new file mode 100644
index 0000000..3675b9d
--- /dev/null
+++ b/src/include/storage/atomics-generic-gcc.h
@@ -0,0 +1,174 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-generic-gcc.h
+ *	  Atomic operations, implemented using gcc (or compatible) intrinsics.
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * Legacy __sync Built-in Functions for Atomic Memory Access
+ *   http://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fsync-Builtins.html
+ * * Built-in functions for memory model aware atomic operations
+ *   http://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fatomic-Builtins.html
+ *
+ * src/include/storage/atomics-generic-gcc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#error "should be included via atomics.h"
+#endif
+
+#define pg_compiler_barrier_impl()	__asm__ __volatile__("" ::: "memory");
+
+/*
+ * If we're on GCC 4.1.0 or higher, we should be able to get a memory
+ * barrier out of this compiler built-in.  But we prefer to rely on our
+ * own definitions where possible, and use this only as a fallback.
+ */
+#if !defined(pg_memory_barrier_impl)
+#	if defined(HAVE_GCC__PG_ATOMIC_INT64_CAS)
+#		define pg_memory_barrier_impl()		__atomic_thread_fence(__ATOMIC_SEQ_CST)
+#	elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
+#		define pg_memory_barrier_impl()		__sync_synchronize()
+#	endif
+#endif /* !defined(pg_memory_barrier_impl) */
+
+#if !defined(pg_read_barrier_impl) && defined(HAVE_GCC__PG_ATOMIC_INT64_CAS)
+#		define pg_read_barrier_impl()		__atomic_thread_fence(__ATOMIC_ACQUIRE)
+#endif
+
+#if !defined(pg_write_barrier_impl) && defined(HAVE_GCC__PG_ATOMIC_INT64_CAS)
+#		define pg_write_barrier_impl()		__atomic_thread_fence(__ATOMIC_RELEASE)
+#endif
+
+#ifdef HAVE_GCC__SYNC_INT32_TAS
+typedef struct pg_atomic_flag
+{
+	volatile uint32 value;
+} pg_atomic_flag;
+
+#define PG_ATOMIC_INIT_FLAG() {0}
+#endif
+
+/* declare outside the defined(PG_USE_INLINE) for visibility */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+#ifdef HAVE_GCC__PG_ATOMIC_INT64_CAS
+/* declare outside the defined(PG_USE_INLINE) for visibility */
+#	define PG_HAVE_PG_ATOMIC_COMPARE_EXCHANGE_U64
+typedef struct pg_atomic_uint64
+{
+	volatile uint64 value;
+} pg_atomic_uint64;
+#endif /* HAVE_GCC__PG_ATOMIC_INT64_CAS */
+
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+#if !defined(PG_HAS_ATOMIC_TEST_SET_FLAG) && defined(HAVE_GCC__SYNC_INT32_TAS)
+#define PG_HAS_ATOMIC_TEST_SET_FLAG
+STATIC_IF_INLINE bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	uint32 ret;
+	/* XXX: only a acquire barrier, make it a full one for now */
+	pg_memory_barrier_impl();
+	/* some platform only support a 1 here */
+	ret = __sync_lock_test_and_set(&ptr->value, 1);
+	return ret == 0;
+}
+
+#define PG_HAS_ATOMIC_UNLOCKED_TEST_FLAG
+STATIC_IF_INLINE bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return ptr->value;
+}
+
+#define PG_HAS_ATOMIC_CLEAR_FLAG
+STATIC_IF_INLINE void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/* XXX: only a release barrier, make it a full one for now */
+	pg_memory_barrier_impl();
+	__sync_lock_release(&ptr->value);
+}
+
+#define PG_HAS_ATOMIC_INIT_FLAG
+STATIC_IF_INLINE void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_clear_flag_impl(ptr);
+}
+
+#endif /* !defined(PG_HAS_ATOMIC_TEST_SET_FLAG) && defined(HAVE_GCC__SYNC_INT32_TAS) */
+
+#if defined(HAVE_GCC__ATOMIC_INT32_CAS)
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	return __atomic_compare_exchange_n(&ptr->value, expected, newval, false,
+									   __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+}
+
+#elif defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint32	current;
+	current = __sync_val_compare_and_swap(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#else
+#	error "strange configuration"
+#endif
+
+#ifdef HAVE_GCC__ATOMIC_INT64_CAS
+
+/* if __atomic is supported for 32 it's also supported for 64bit */
+#if defined(HAVE_GCC__ATOMIC_INT32_CAS)
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	return __atomic_compare_exchange_n(&ptr->value, expected, newval, false,
+									   __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+}
+
+#elif defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+	current = __sync_val_compare_and_swap(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#else
+#	error "strange configuration"
+#endif
+
+#endif /* HAVE_GCC__ATOMIC_INT64_CAS */
+
+#endif /* defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS) */
diff --git a/src/include/storage/atomics-generic-msvc.h b/src/include/storage/atomics-generic-msvc.h
new file mode 100644
index 0000000..baead62
--- /dev/null
+++ b/src/include/storage/atomics-generic-msvc.h
@@ -0,0 +1,83 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-generic-msvc.h
+ *	  Atomic operations support when using MSVC
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * Interlocked Variable Access
+ *   http://msdn.microsoft.com/en-us/library/ms684122%28VS.85%29.aspx
+ *
+ * src/include/storage/atomics-generic-msvc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <intrin.h>
+#include <windows.h>
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#error "should be included via atomics.h"
+#endif
+
+/* Should work on both MSVC and Borland. */
+#pragma intrinsic(_ReadWriteBarrier)
+#define pg_compiler_barrier_impl()	_ReadWriteBarrier()
+
+#ifndef pg_memory_barrier_impl
+#define pg_memory_barrier_impl()	MemoryBarrier()
+#endif
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+#define PG_HAVE_PG_ATOMIC_COMPARE_EXCHANGE_U64
+typedef struct pg_atomic_uint64
+{
+	volatile uint64 value;
+} pg_atomic_uint64;
+
+
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint32	current;
+	current = InterlockedCompareExchange(&ptr->value, newval, *expected);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#ifdef PG_HAVE_PG_ATOMIC_COMPARE_EXCHANGE_U64
+/*
+ * The non-intrinsics version is only available in vista upwards, so use the
+ * intrisc version. Only supported on >486, but we require XP as a minimum
+ * baseline, which doesn't support the 486, so we don't need to add checks for
+ * that case.
+ */
+#pragma intrinsic(_InterlockedCompareExchange64)
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint32	current;
+	current = _InterlockedCompareExchange64(&ptr->value, newval, *expected);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+#endif /* PG_HAVE_PG_ATOMIC_COMPARE_EXCHANGE_U64 */
+
+#endif /* defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS) */
diff --git a/src/include/storage/atomics-generic-sunpro.h b/src/include/storage/atomics-generic-sunpro.h
new file mode 100644
index 0000000..d512409
--- /dev/null
+++ b/src/include/storage/atomics-generic-sunpro.h
@@ -0,0 +1,66 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-generic-sunpro.h
+ *	  Atomic operations for solaris' CC
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * manpage for atomic_cas(3C)
+ *   http://www.unix.com/man-page/opensolaris/3c/atomic_cas/
+ *   http://docs.oracle.com/cd/E23824_01/html/821-1465/atomic-cas-3c.html
+ *
+ * src/include/storage/atomics-generic-sunpro.h
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include <atomic.h>
+
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+typedef pg_atomic_uint32 pg_atomic_flag;
+
+typedef struct pg_atomic_uint64
+{
+	volatile uint64 value;
+} pg_atomic_uint64;
+
+/* declare outside the defined(PG_USE_INLINE) for visibility */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint64	current;
+
+	current = atomic_cas_32(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+
+	current = atomic_cas_64(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#endif
diff --git a/src/include/storage/atomics-generic-xlc.h b/src/include/storage/atomics-generic-xlc.h
new file mode 100644
index 0000000..6a47fe2
--- /dev/null
+++ b/src/include/storage/atomics-generic-xlc.h
@@ -0,0 +1,84 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-generic-xlc.h
+ *	  Atomic operations for IBM's CC
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * Synchronization and atomic built-in functions
+ *   http://publib.boulder.ibm.com/infocenter/lnxpcomp/v8v101/topic/com.ibm.xlcpp8l.doc/compiler/ref/bif_sync.htm
+ *
+ * src/include/storage/atomics-generic-xlc.h
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include <atomic.h>
+
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+typedef pg_atomic_uint32 pg_atomic_flag;
+
+typedef struct pg_atomic_uint64
+{
+	volatile uint64 value;
+} pg_atomic_uint64;
+
+/* declare outside the defined(PG_USE_INLINE) for visibility */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+
+/* FIXME: check for 64bit mode, only supported there */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint64	current;
+
+	/*
+	 * xlc's documentation tells us:
+	 * "If __compare_and_swap is used as a locking primitive, insert a call to
+	 * the __isync built-in function at the start of any critical sections."
+	 */
+	__isync();
+
+	/*
+	 * XXX: __compare_and_swap is defined to take signed parameters, but that
+	 * shouldn't matter since we don't perform any arithmetic operations.
+	 */
+	current = (uint32)__compare_and_swap((volatile int*)ptr->value,
+										 (int)*expected, (int)newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#ifdef PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+
+	__isync();
+
+	current = (uint64)__compare_and_swaplp((volatile long*)ptr->value,
+										   (long)*expected, (long)newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+#endif /* defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) */
+
+#endif
diff --git a/src/include/storage/atomics-generic.h b/src/include/storage/atomics-generic.h
new file mode 100644
index 0000000..6c0858d
--- /dev/null
+++ b/src/include/storage/atomics-generic.h
@@ -0,0 +1,402 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics-generic.h
+ *	  Atomic operations.
+ *
+ * Portions Copyright (c) 2013, PostgreSQL Global Development Group
+ *
+ * src/include/storage/atomics-generic.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#	error "should be included via atomics.h"
+#endif
+
+/* Need a way to implement spinlocks */
+#if !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && \
+	!defined(PG_HAVE_ATOMIC_EXCHANGE_U32) && \
+	!defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+#	error "postgres requires atomic ops for locking to be provided"
+#endif
+
+#ifndef pg_memory_barrier_impl
+#	error "postgres requires a memory barrier implementation"
+#endif
+
+#ifndef pg_compiler_barrier_impl
+#	error "postgres requires a compiler barrier implementation"
+#endif
+
+/*
+ * If read or write barriers are undefined, we upgrade them to full memory
+ * barriers.
+ */
+#if !defined(pg_read_barrier_impl)
+#	define pg_read_barrier_impl pg_memory_barrier_impl
+#endif
+#if !defined(pg_write_barrier_impl)
+#	define pg_write_barrier_impl pg_memory_barrier_impl
+#endif
+
+#ifndef PG_HAS_SPIN_DELAY
+#define PG_HAS_SPIN_DELAY
+#define pg_spin_delay_impl()	((void)0)
+#endif
+
+
+/* provide fallback */
+#ifndef PG_HAS_ATOMIC_TEST_SET_FLAG
+typedef pg_atomic_uint32 pg_atomic_flag;
+#define PG_ATOMIC_INIT_FLAG() {0}
+#endif
+
+#if !defined(PG_DISABLE_64_BIT_ATOMICS) && defined(PG_HAS_ATOMIC_COMPARE_EXCHANGE_U64)
+#	define PG_HAVE_64_BIT_ATOMICS
+#endif
+
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+#ifndef PG_HAS_ATOMIC_READ_U32
+#define PG_HAS_ATOMIC_READ_U32
+STATIC_IF_INLINE uint32
+pg_atomic_read_u32_impl(volatile pg_atomic_uint32 *ptr)
+{
+	return ptr->value;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_WRITE_U32
+#define PG_HAS_ATOMIC_WRITE_U32
+STATIC_IF_INLINE void
+pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	ptr->value = val;
+}
+#endif
+
+/*
+ * provide fallback for test_and_set using atomic_exchange if available
+ */
+#if !defined(PG_HAS_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_EXCHANGE_U32)
+
+#define PG_HAS_ATOMIC_INIT_FLAG
+STATIC_IF_INLINE void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_write_u32_impl(ptr, 0);
+}
+
+#define PG_HAS_ATOMIC_TEST_SET_FLAG
+STATIC_IF_INLINE bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return pg_atomic_exchange_u32_impl(ptr, &value, 1) == 0;
+}
+
+#define PG_HAS_ATOMIC_UNLOCKED_TEST_FLAG
+STATIC_IF_INLINE bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return (bool) pg_atomic_read_u32_impl(ptr);
+}
+
+
+#define PG_HAS_ATOMIC_CLEAR_FLAG
+STATIC_IF_INLINE void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/* XXX: release semantics suffice? */
+	pg_memory_barrier_impl();
+	pg_atomic_write_u32_impl(ptr, 0);
+}
+
+/*
+ * provide fallback for test_and_set using atomic_compare_exchange if
+ * available.
+ */
+#elif !defined(PG_HAS_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+
+#define PG_HAS_ATOMIC_INIT_FLAG
+STATIC_IF_INLINE void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_write_u32_impl(ptr, 0);
+}
+
+#define PG_HAS_ATOMIC_TEST_SET_FLAG
+STATIC_IF_INLINE bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	uint32 value = 0;
+	return pg_atomic_compare_exchange_u32_impl(ptr, &value, 1);
+}
+
+#define PG_HAS_ATOMIC_UNLOCKED_TEST_FLAG
+STATIC_IF_INLINE bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return (bool) pg_atomic_read_u32_impl(ptr);
+}
+
+#define PG_HAS_ATOMIC_CLEAR_FLAG
+STATIC_IF_INLINE void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/* XXX: release semantics suffice? */
+	pg_memory_barrier_impl();
+	pg_atomic_write_u32_impl(ptr, 0);
+}
+
+#elif !defined(PG_HAS_ATOMIC_TEST_SET_FLAG)
+#	error "No pg_atomic_test_and_set provided"
+#endif /* !defined(PG_HAS_ATOMIC_TEST_SET_FLAG) */
+
+
+#ifndef PG_HAS_ATOMIC_INIT_U32
+#define PG_HAS_ATOMIC_INIT_U32
+STATIC_IF_INLINE void
+pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_)
+{
+	pg_atomic_write_u32_impl(ptr, val_);
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_EXCHANGE_U32
+#define PG_HAS_ATOMIC_EXCHANGE_U32
+STATIC_IF_INLINE uint32
+pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 xchg_)
+{
+	uint32 old;
+	while (true)
+	{
+		old = pg_atomic_read_u32_impl(ptr);
+		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, xchg_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_ADD_U32
+#define PG_HAS_ATOMIC_FETCH_ADD_U32
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 add_)
+{
+	uint32 old;
+	while (true)
+	{
+		old = pg_atomic_read_u32_impl(ptr);
+		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old + add_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_SUB_U32
+#define PG_HAS_ATOMIC_FETCH_SUB_U32
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_sub_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 sub_)
+{
+	uint32 old;
+	while (true)
+	{
+		old = pg_atomic_read_u32_impl(ptr);
+		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old - sub_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_AND_U32
+#define PG_HAS_ATOMIC_FETCH_AND_U32
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_)
+{
+	uint32 old;
+	while (true)
+	{
+		old = pg_atomic_read_u32_impl(ptr);
+		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old & and_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_OR_U32
+#define PG_HAS_ATOMIC_FETCH_OR_U32
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_)
+{
+	uint32 old;
+	while (true)
+	{
+		old = pg_atomic_read_u32_impl(ptr);
+		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, old | or_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_ADD_FETCH_U32
+#define PG_HAS_ATOMIC_ADD_FETCH_U32
+STATIC_IF_INLINE uint32
+pg_atomic_add_fetch_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 add_)
+{
+	return pg_atomic_fetch_add_u32_impl(ptr, add_) + add_;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_SUB_FETCH_U32
+#define PG_HAS_ATOMIC_SUB_FETCH_U32
+STATIC_IF_INLINE uint32
+pg_atomic_sub_fetch_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 sub_)
+{
+	return pg_atomic_fetch_sub_u32_impl(ptr, sub_) - sub_;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_ADD_UNTIL_U32
+#define PG_HAS_ATOMIC_FETCH_ADD_UNTIL_U32
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_add_until_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 add_, uint32 until)
+{
+	uint32 old;
+	while (true)
+	{
+		uint32 new_val;
+		old = pg_atomic_read_u32_impl(ptr);
+		if (old >= until)
+			break;
+
+		new_val = old + add_;
+		if (new_val > until)
+			new_val = until;
+
+		if (pg_atomic_compare_exchange_u32_impl(ptr, &old, new_val))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifdef PG_HAS_ATOMIC_COMPARE_EXCHANGE_U64
+
+#ifndef PG_HAS_ATOMIC_INIT_U64
+#define PG_HAS_ATOMIC_INIT_U64
+STATIC_IF_INLINE void
+pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_)
+{
+	pg_atomic_write_u64_impl(ptr, val_);
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_READ_U64
+#define PG_HAS_ATOMIC_READ_U64
+STATIC_IF_INLINE uint64
+pg_atomic_read_u64_impl(volatile pg_atomic_uint64 *ptr)
+{
+	return ptr->value;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_WRITE_U64
+#define PG_HAS_ATOMIC_WRITE_U64
+STATIC_IF_INLINE void
+pg_atomic_write_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+	ptr->value = val;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_ADD_U64
+#define PG_HAS_ATOMIC_FETCH_ADD_U64
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 add_)
+{
+	uint64 old;
+	while (true)
+	{
+		old = pg_atomic_read_u64_impl(ptr);
+		if (pg_atomic_compare_exchange_64_impl(ptr, &old, old + add_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_SUB_U64
+#define PG_HAS_ATOMIC_FETCH_SUB_U64
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_sub_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 sub_)
+{
+	uint64 old;
+	while (true)
+	{
+		old = pg_atomic_read_u64_impl(ptr);
+		if (pg_atomic_compare_exchange_64_impl(ptr, &old, old - sub_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_AND_U64
+#define PG_HAS_ATOMIC_FETCH_AND_U64
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_)
+{
+	uint64 old;
+	while (true)
+	{
+		old = pg_atomic_read_64_impl(ptr);
+		if (pg_atomic_compare_exchange_u64_impl(ptr, &old, old & and_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_FETCH_OR_U64
+#define PG_HAS_ATOMIC_FETCH_OR_U64
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_)
+{
+	uint64 old;
+	while (true)
+	{
+		old = pg_atomic_read_u64_impl(ptr);
+		if (pg_atomic_compare_exchange_64_impl(ptr, &old, old | or_))
+			break;
+	}
+	return old;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_ADD_FETCH_U64
+#define PG_HAS_ATOMIC_ADD_FETCH_U64
+STATIC_IF_INLINE
+pg_atomic_add_fetch_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 add_)
+{
+	return pg_atomic_fetch_add_u64_impl(ptr, add_) + add_;
+}
+#endif
+
+#ifndef PG_HAS_ATOMIC_SUB_FETCH_U64
+#define PG_HAS_ATOMIC_SUB_FETCH_U64
+STATIC_IF_INLINE
+pg_atomic_sub_fetch_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 sub_)
+{
+	return pg_atomic_fetch_sub_u64_impl(ptr, sub_) - sub_;
+}
+#endif
+
+#endif /* PG_HAS_ATOMIC_COMPARE_EXCHANGE_U64 */
+
+#endif /* defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS) */
diff --git a/src/include/storage/atomics.h b/src/include/storage/atomics.h
new file mode 100644
index 0000000..d16d6eb
--- /dev/null
+++ b/src/include/storage/atomics.h
@@ -0,0 +1,596 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics.h
+ *	  Generic atomic operations support.
+ *
+ * Hardware and compiler dependent functions for manipulating memory
+ * atomically and dealing with cache coherency. Used to implement locking
+ * facilities and other concurrency safe constructs.
+ *
+ * There's three data types which can be manipulated atomically:
+ *
+ * * pg_atomic_flag - supports atomic test/clear operations, useful for
+ *      implementing spinlocks and similar constructs.
+ *
+ * * pg_atomic_uint32 - unsigned 32bit integer that can correctly manipulated
+ *      by several processes at the same time.
+ *
+ * * pg_atomic_uint64 - optional 64bit variant of pg_atomic_uint32. Support
+ *      can be tested by checking for PG_HAVE_64_BIT_ATOMICS.
+ *
+ * The values stored in theses cannot be accessed using the API provided in
+ * thise file, i.e. it is not possible to write statements like `var +=
+ * 10`. Instead, for this example, one would have to use
+ * `pg_atomic_fetch_add_u32(&var, 10)`.
+ *
+ * This restriction has several reasons: Primarily it doesn't allow non-atomic
+ * math to be performed on these values which wouldn't be concurrency
+ * safe. Secondly it allows to implement these types ontop of facilities -
+ * like C11's atomics - that don't provide direct access. Lastly it serves as
+ * a useful hint what code should be looked at more carefully because of
+ * concurrency concerns.
+ *
+ * To be useful they usually will need to be placed in memory shared between
+ * processes or threads, most frequently by embedding them in structs. Be
+ * careful to align atomic variables to their own size!
+ *
+ * Before variables of one these types can be used they needs to be
+ * initialized using the type's initialization function (pg_atomic_init_flag,
+ * pg_atomic_init_u32 and pg_atomic_init_u64) or in case of global
+ * pg_atomic_flag variables using the PG_ATOMIC_INIT_FLAG macro. These
+ * initializations have to be performed before any (possibly concurrent)
+ * access is possible, otherwise the results are undefined.
+ *
+ * For several mathematical operations two variants exist: One that returns
+ * the old value before the operation was performed, and one that that returns
+ * the new value. *_fetch_<op> variants will return the old value,
+ * *_<op>_fetch the new one.
+ *
+ *
+ * To bring up postgres on a platform/compiler at the very least
+ * either one of
+ * * pg_atomic_test_set_flag(), pg_atomic_init_flag(), pg_atomic_clear_flag()
+ * * pg_atomic_compare_exchange_u32()
+ * * pg_atomic_exchange_u32()
+ * and all of
+ * * pg_compiler_barrier(), pg_memory_barrier()a
+ * need to be implemented. There exist generic, hardware independent,
+ * implementations for several compilers which might be sufficient, although
+ * possibly not optimal, for a new platform.
+ *
+ * To add support for a new architecture review whether the compilers used on
+ * that platform already provide adequate support in the files included
+ * "Compiler specific" section below. If not either add an include for a new
+ * file adding generic support for your compiler there and/or add/update an
+ * architecture specific include in the "Architecture specific" section below.
+ *
+ * Operations that aren't implemented by either architecture or compiler
+ * specific code are implemented ontop of compare_exchange() loops or
+ * spinlocks.
+ *
+ * Use higher level functionality (lwlocks, spinlocks, heavyweight locks)
+ * whenever possible. Writing correct code using these facilities is hard.
+ *
+ * For an introduction to using memory barriers within the PostgreSQL backend,
+ * see src/backend/storage/lmgr/README.barrier
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/atomics.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ATOMICS_H
+#define ATOMICS_H
+
+#define INSIDE_ATOMICS_H
+
+/*
+ * Architecture specific implementations/considerations.
+ */
+#if defined(__arm__) || defined(__arm) || \
+	defined(__aarch64__) || defined(__aarch64)
+#	include "storage/atomics-arch-arm.h"
+#elif defined(__i386__) || defined(__i386)
+#	include "storage/atomics-arch-i386.h"
+#elif defined(__x86_64__)
+#	include "storage/atomics-arch-amd64.h"
+#elif defined(__ia64__) || defined(__ia64)
+#	include "storage/atomics-arch-amd64.h"
+#elif defined(__ppc__) || defined(__powerpc__) || \
+	defined(__ppc64__) || defined(__powerpc64__)
+#	include "storage/atomics-arch-ppc.h"
+#elif defined(__alpha) || defined(__alpha__)
+#	include "storage/atomics-arch-alpha.h"
+#elif defined(__hppa) || defined(__hppa__)
+#	include "storage/atomics-arch-hppa.h"
+#endif
+
+/*
+ * Compiler specific, but architecture independent implementations
+ */
+
+/* gcc or compatible, including icc */
+#if defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS)
+#	include "storage/atomics-generic-gcc.h"
+/* MS Visual Studio */
+#elif defined(WIN32_ONLY_COMPILER)
+#	include "storage/atomics-generic-msvc.h"
+/* presumably only ac++ */
+#elif defined(__hpux) && defined(__ia64) && !defined(__GNUC__)
+#	include "storage/atomics-generic-acc.h"
+/* sun studio */
+#elif defined(__SUNPRO_C) && defined(__ia64) && !defined(__GNUC__)
+#	include "storage/atomics-generic-sunpro.h"
+/* IBM's xlc */
+#elif (defined(__IBMC__) || defined(__IBMCPP__)) && !defined(__GNUC__)
+#	include "storage/atomics-generic-xlc.h"
+#else
+#	error "unsupported compiler"
+#endif
+
+/* if we have spinlocks, but not atomic ops, emulate them */
+#if defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && \
+	!defined(PG_HAVE_ATOMIC_EXCHANGE_U32)
+#	define PG_USE_ATOMICS_EMULATION
+#	include "storage/atomics-generic-spinlock.h"
+#endif
+
+
+/*
+ * Provide fallback implementations for operations that aren't provided if
+ * possible.
+ */
+#include "storage/atomics-generic.h"
+
+/*
+ * pg_compiler_barrier - prevent the compiler from moving code
+ *
+ * A compiler barrier need not (and preferably should not) emit any actual
+ * machine code, but must act as an optimization fence: the compiler must not
+ * reorder loads or stores to main memory around the barrier.  However, the
+ * CPU may still reorder loads or stores at runtime, if the architecture's
+ * memory model permits this.
+ */
+#define pg_compiler_barrier()	pg_compiler_barrier_impl()
+
+/*
+ * pg_memory_barrier - prevent the CPU from reordering memory access
+ *
+ * A memory barrier must act as a compiler barrier, and in addition must
+ * guarantee that all loads and stores issued prior to the barrier are
+ * completed before any loads or stores issued after the barrier.  Unless
+ * loads and stores are totally ordered (which is not the case on most
+ * architectures) this requires issuing some sort of memory fencing
+ * instruction.
+ */
+#define pg_memory_barrier()	pg_memory_barrier_impl()
+
+/*
+ * pg_(read|write)_barrier - prevent the CPU from reordering memory access
+ *
+ * A read barrier must act as a compiler barrier, and in addition must
+ * guarantee that any loads issued prior to the barrier are completed before
+ * any loads issued after the barrier.	Similarly, a write barrier acts
+ * as a compiler barrier, and also orders stores.  Read and write barriers
+ * are thus weaker than a full memory barrier, but stronger than a compiler
+ * barrier.  In practice, on machines with strong memory ordering, read and
+ * write barriers may require nothing more than a compiler barrier.
+ */
+#define pg_read_barrier()	pg_read_barrier_impl()
+#define pg_write_barrier()	pg_write_barrier_impl()
+
+/*
+ * Spinloop delay -
+ */
+#define pg_spin_delay()	pg_spin_delay_impl()
+
+
+/*
+ * pg_atomic_init_flag - initialize lock
+ *
+ * No barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_init_flag(volatile pg_atomic_flag *ptr);
+
+/*
+ * pg_atomic_test_and_set_flag - TAS()
+ *
+ * Full barrier semantics. (XXX? Only acquire?)
+ */
+STATIC_IF_INLINE_DECLARE bool
+pg_atomic_test_set_flag(volatile pg_atomic_flag *ptr);
+
+/*
+ * pg_atomic_unlocked_test_flag - TAS()
+ *
+ * No barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE bool
+pg_atomic_unlocked_test_flag(volatile pg_atomic_flag *ptr);
+
+/*
+ * pg_atomic_clear_flag - release lock set by TAS()
+ *
+ * Full barrier semantics. (XXX? Only release?)
+ */
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_clear_flag(volatile pg_atomic_flag *ptr);
+
+/*
+ * pg_atomic_init_u32 - initialize atomic variable
+ *
+ * Has to be done before any usage except when the atomic variable is declared
+ * statically in which case the variable is initialized to 0.
+ *
+ * No barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val);
+
+/*
+ * pg_atomic_read_u32 - read from atomic variable
+ *
+ * No barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr);
+
+/*
+ * pg_atomic_write_u32 - write to atomic variable
+ *
+ * No barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val);
+
+/*
+ * pg_atomic_exchange_u32 - exchange newval with current value
+ *
+ * Returns the old value of 'ptr' before the swap.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval);
+
+/*
+ * pg_atomic_compare_exchange_u32 - CAS operation
+ *
+ * Atomically compare the current value of ptr with *expected and store newval
+ * iff ptr and *expected have the same value. The current value of *ptr will
+ * always be stored in *expected.
+ *
+ * Return whether the values have been exchanged.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE bool
+pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr,
+							   uint32 *expected, uint32 newval);
+
+/*
+ * pg_atomic_fetch_add_u32 - atomically add to variable
+ *
+ * Returns the the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, uint32 add_);
+
+/*
+ * pg_atomic_fetch_sub_u32 - atomically subtract from variable
+ *
+ * Returns the the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32 *ptr, uint32 sub_);
+
+/*
+ * pg_atomic_fetch_and_u32 - atomically bit-and and_ with variable
+ *
+ * Returns the the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_fetch_and_u32(volatile pg_atomic_uint32 *ptr, uint32 and_);
+
+/*
+ * pg_atomic_fetch_or_u32 - atomically bit-or or_ with variable
+ *
+ * Returns the the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_);
+
+/*
+ * pg_atomic_add_fetch_u32 - atomically add to variable
+ *
+ * Returns the the value of ptr after the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, uint32 add_);
+
+/*
+ * pg_atomic_sub_fetch_u32 - atomically subtract from variable
+ *
+ * Returns the the value of ptr after the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, uint32 sub_);
+
+/*
+ * pg_fetch_add_until_u32 - saturated addition to variable
+ *
+ * Returns the the value of ptr after the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_fetch_add_until_u32(volatile pg_atomic_uint32 *ptr, uint32 add_,
+							  uint32 until);
+
+/* ----
+ * The 64 bit operations have the same semantics as their 32bit counterparts
+ * if they are available. Test for their existance using the
+ * PG_HAVE_64_BIT_ATOMICS define.
+ * ----
+ */
+#ifdef PG_HAVE_64_BIT_ATOMICS
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val_);
+
+STATIC_IF_INLINE_DECLARE uint32
+pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr);
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 newval);
+
+STATIC_IF_INLINE_DECLARE bool
+pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 *ptr,
+							   uint32 *expected, uint64 newval);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, uint64 add_);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64 *ptr, uint64 sub_);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_fetch_and_u64(volatile pg_atomic_uint64 *ptr, uint64 and_);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_fetch_or_u64(volatile pg_atomic_uint64 *ptr, uint64 or_);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_add_fetch_u64(volatile pg_atomic_uint64 *ptr, uint64 add_);
+
+STATIC_IF_INLINE_DECLARE uint64
+pg_atomic_sub_fetch_u64(volatile pg_atomic_uint64 *ptr, uint64 sub_);
+
+#endif /* PG_HAVE_64_BIT_ATOMICS */
+
+/*
+ * The following functions are wrapper functions around the platform specific
+ * implementation of the atomic operations performing common checks.
+ */
+#if defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS)
+
+#define CHECK_POINTER_ALIGNMENT(ptr, bndr) \
+	Assert(TYPEALIGN((uintptr_t)(ptr), bndr))
+
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_init_flag(volatile pg_atomic_flag *ptr)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, sizeof(*ptr));
+
+	pg_atomic_clear_flag_impl(ptr);
+}
+
+STATIC_IF_INLINE_DECLARE bool
+pg_atomic_test_set_flag(volatile pg_atomic_flag *ptr)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, sizeof(*ptr));
+
+	return pg_atomic_test_set_flag_impl(ptr);
+}
+
+STATIC_IF_INLINE_DECLARE bool
+pg_atomic_unlocked_test_flag(volatile pg_atomic_flag *ptr)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, sizeof(*ptr));
+
+	return pg_atomic_unlocked_test_flag_impl(ptr);
+}
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_clear_flag(volatile pg_atomic_flag *ptr)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, sizeof(*ptr));
+
+	pg_atomic_clear_flag_impl(ptr);
+}
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+
+	pg_atomic_init_u32_impl(ptr, val);
+}
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+
+	pg_atomic_write_u32_impl(ptr, val);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_read_u32_impl(ptr);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+
+	return pg_atomic_exchange_u32_impl(ptr, newval);
+}
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr,
+							   uint32 *expected, uint32 newval)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	CHECK_POINTER_ALIGNMENT(expected, 4);
+
+	return pg_atomic_compare_exchange_u32_impl(ptr, expected, newval);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, uint32 add_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_fetch_add_u32_impl(ptr, add_);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32 *ptr, uint32 sub_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_fetch_sub_u32_impl(ptr, sub_);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_and_u32(volatile pg_atomic_uint32 *ptr, uint32 and_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_fetch_and_u32_impl(ptr, and_);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_fetch_or_u32_impl(ptr, or_);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, uint32 add_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_add_fetch_u32_impl(ptr, add_);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, uint32 sub_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_sub_fetch_u32_impl(ptr, sub_);
+}
+
+STATIC_IF_INLINE uint32
+pg_atomic_fetch_add_until_u32(volatile pg_atomic_uint32 *ptr, uint32 add_,
+							  uint32 until)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 4);
+	return pg_atomic_fetch_add_until_u32_impl(ptr, add_, until);
+}
+
+#ifdef PG_HAVE_64_BIT_ATOMICS
+
+STATIC_IF_INLINE_DECLARE void
+pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+
+	pg_atomic_init_u64_impl(ptr, val);
+}
+
+STATIC_IF_INLINE uint64
+pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_read_u64_impl(ptr);
+}
+
+STATIC_IF_INLINE void
+pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_write_u64_impl(ptr, val);
+}
+
+STATIC_IF_INLINE uint64
+pg_atomic_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 newval)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+
+	return pg_atomic_exchange_u64_impl(ptr, newval);
+}
+
+STATIC_IF_INLINE bool
+pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 *ptr,
+							   uint64 *expected, uint64 newval)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	CHECK_POINTER_ALIGNMENT(expected, 8);
+	return pg_atomic_compare_exchange_u64_impl(ptr, expected, newval);
+}
+
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, uint64 add_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_fetch_add_u64_impl(ptr, add_);
+}
+
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64 *ptr, uint64 sub_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_fetch_sub_u64_impl(ptr, sub_);
+}
+
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_and_u64(volatile pg_atomic_uint64 *ptr, uint64 and_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_fetch_and_u64_impl(ptr, and_);
+}
+
+STATIC_IF_INLINE uint64
+pg_atomic_fetch_or_u64(volatile pg_atomic_uint64 *ptr, uint64 or_)
+{
+	CHECK_POINTER_ALIGNMENT(ptr, 8);
+	return pg_atomic_fetch_or_u64_impl(ptr, or_);
+}
+#endif /* PG_HAVE_64_BIT_ATOMICS */
+
+#endif /* defined(PG_USE_INLINE) || defined(ATOMICS_INCLUDE_DEFINITIONS) */
+
+#undef INSIDE_ATOMICS_H
+
+#endif /* ATOMICS_H */
diff --git a/src/include/storage/barrier.h b/src/include/storage/barrier.h
index 16a9a0c..607a3c9 100644
--- a/src/include/storage/barrier.h
+++ b/src/include/storage/barrier.h
@@ -13,137 +13,12 @@
 #ifndef BARRIER_H
 #define BARRIER_H
 
-#include "storage/s_lock.h"
-
-extern slock_t dummy_spinlock;
-
-/*
- * A compiler barrier need not (and preferably should not) emit any actual
- * machine code, but must act as an optimization fence: the compiler must not
- * reorder loads or stores to main memory around the barrier.  However, the
- * CPU may still reorder loads or stores at runtime, if the architecture's
- * memory model permits this.
- *
- * A memory barrier must act as a compiler barrier, and in addition must
- * guarantee that all loads and stores issued prior to the barrier are
- * completed before any loads or stores issued after the barrier.  Unless
- * loads and stores are totally ordered (which is not the case on most
- * architectures) this requires issuing some sort of memory fencing
- * instruction.
- *
- * A read barrier must act as a compiler barrier, and in addition must
- * guarantee that any loads issued prior to the barrier are completed before
- * any loads issued after the barrier.	Similarly, a write barrier acts
- * as a compiler barrier, and also orders stores.  Read and write barriers
- * are thus weaker than a full memory barrier, but stronger than a compiler
- * barrier.  In practice, on machines with strong memory ordering, read and
- * write barriers may require nothing more than a compiler barrier.
- *
- * For an introduction to using memory barriers within the PostgreSQL backend,
- * see src/backend/storage/lmgr/README.barrier
- */
-
-#if defined(DISABLE_BARRIERS)
-
-/*
- * Fall through to the spinlock-based implementation.
- */
-#elif defined(__INTEL_COMPILER)
-
-/*
- * icc defines __GNUC__, but doesn't support gcc's inline asm syntax
- */
-#if defined(__ia64__) || defined(__ia64)
-#define pg_memory_barrier()		__mf()
-#elif defined(__i386__) || defined(__x86_64__)
-#define pg_memory_barrier()		_mm_mfence()
-#endif
-
-#define pg_compiler_barrier()	__memory_barrier()
-#elif defined(__GNUC__)
-
-/* This works on any architecture, since it's only talking to GCC itself. */
-#define pg_compiler_barrier()	__asm__ __volatile__("" : : : "memory")
-
-#if defined(__i386__)
-
 /*
- * i386 does not allow loads to be reordered with other loads, or stores to be
- * reordered with other stores, but a load can be performed before a subsequent
- * store.
- *
- * "lock; addl" has worked for longer than "mfence".
+ * This used to be a separate file, full of compiler/architecture
+ * dependent defines, but it's not included in the atomics.h
+ * infrastructure and just kept for backward compatibility.
  */
-#define pg_memory_barrier()		\
-	__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory")
-#define pg_read_barrier()		pg_compiler_barrier()
-#define pg_write_barrier()		pg_compiler_barrier()
-#elif defined(__x86_64__)		/* 64 bit x86 */
-
-/*
- * x86_64 has similar ordering characteristics to i386.
- *
- * Technically, some x86-ish chips support uncached memory access and/or
- * special instructions that are weakly ordered.  In those cases we'd need
- * the read and write barriers to be lfence and sfence.  But since we don't
- * do those things, a compiler barrier should be enough.
- */
-#define pg_memory_barrier()		\
-	__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory")
-#define pg_read_barrier()		pg_compiler_barrier()
-#define pg_write_barrier()		pg_compiler_barrier()
-#elif defined(__ia64__) || defined(__ia64)
-
-/*
- * Itanium is weakly ordered, so read and write barriers require a full
- * fence.
- */
-#define pg_memory_barrier()		__asm__ __volatile__ ("mf" : : : "memory")
-#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
-
-/*
- * lwsync orders loads with respect to each other, and similarly with stores.
- * But a load can be performed before a subsequent store, so sync must be used
- * for a full memory barrier.
- */
-#define pg_memory_barrier()		__asm__ __volatile__ ("sync" : : : "memory")
-#define pg_read_barrier()		__asm__ __volatile__ ("lwsync" : : : "memory")
-#define pg_write_barrier()		__asm__ __volatile__ ("lwsync" : : : "memory")
-#elif defined(__alpha) || defined(__alpha__)	/* Alpha */
-
-/*
- * Unlike all other known architectures, Alpha allows dependent reads to be
- * reordered, but we don't currently find it necessary to provide a conditional
- * read barrier to cover that case.  We might need to add that later.
- */
-#define pg_memory_barrier()		__asm__ __volatile__ ("mb" : : : "memory")
-#define pg_read_barrier()		__asm__ __volatile__ ("rmb" : : : "memory")
-#define pg_write_barrier()		__asm__ __volatile__ ("wmb" : : : "memory")
-#elif defined(__hppa) || defined(__hppa__)		/* HP PA-RISC */
-
-/* HPPA doesn't do either read or write reordering */
-#define pg_memory_barrier()		pg_compiler_barrier()
-#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
-
-/*
- * If we're on GCC 4.1.0 or higher, we should be able to get a memory
- * barrier out of this compiler built-in.  But we prefer to rely on our
- * own definitions where possible, and use this only as a fallback.
- */
-#define pg_memory_barrier()		__sync_synchronize()
-#endif
-#elif defined(__ia64__) || defined(__ia64)
-
-#define pg_compiler_barrier()	_Asm_sched_fence()
-#define pg_memory_barrier()		_Asm_mf()
-#elif defined(WIN32_ONLY_COMPILER)
-
-/* Should work on both MSVC and Borland. */
-#include <intrin.h>
-#pragma intrinsic(_ReadWriteBarrier)
-#define pg_compiler_barrier()	_ReadWriteBarrier()
-#define pg_memory_barrier()		MemoryBarrier()
-#endif
+#include "storage/atomics.h"
 
 /*
  * If we have no memory barrier implementation for this architecture, we
@@ -157,6 +32,10 @@ extern slock_t dummy_spinlock;
  * fence.  But all of our actual implementations seem OK in this regard.
  */
 #if !defined(pg_memory_barrier)
+#include "storage/s_lock.h"
+
+extern slock_t dummy_spinlock;
+
 #define pg_memory_barrier() \
 	do { S_LOCK(&dummy_spinlock); S_UNLOCK(&dummy_spinlock); } while (0)
 #endif
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 7dcd5d9..73aebfa 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -21,10 +21,6 @@
  *	void S_UNLOCK(slock_t *lock)
  *		Unlock a previously acquired lock.
  *
- *	bool S_LOCK_FREE(slock_t *lock)
- *		Tests if the lock is free. Returns TRUE if free, FALSE if locked.
- *		This does *not* change the state of the lock.
- *
  *	void SPIN_DELAY(void)
  *		Delay operation to occur inside spinlock wait loop.
  *
@@ -94,879 +90,17 @@
 #ifndef S_LOCK_H
 #define S_LOCK_H
 
+#include "storage/atomics.h"
 #include "storage/pg_sema.h"
 
-#ifdef HAVE_SPINLOCKS	/* skip spinlocks if requested */
-
-
-#if defined(__GNUC__) || defined(__INTEL_COMPILER)
-/*************************************************************************
- * All the gcc inlines
- * Gcc consistently defines the CPU as __cpu__.
- * Other compilers use __cpu or __cpu__ so we test for both in those cases.
- */
-
-/*----------
- * Standard gcc asm format (assuming "volatile slock_t *lock"):
-
-	__asm__ __volatile__(
-		"	instruction	\n"
-		"	instruction	\n"
-		"	instruction	\n"
-:		"=r"(_res), "+m"(*lock)		// return register, in/out lock value
-:		"r"(lock)					// lock pointer, in input register
-:		"memory", "cc");			// show clobbered registers here
-
- * The output-operands list (after first colon) should always include
- * "+m"(*lock), whether or not the asm code actually refers to this
- * operand directly.  This ensures that gcc believes the value in the
- * lock variable is used and set by the asm code.  Also, the clobbers
- * list (after third colon) should always include "memory"; this prevents
- * gcc from thinking it can cache the values of shared-memory fields
- * across the asm code.  Add "cc" if your asm code changes the condition
- * code register, and also list any temp registers the code uses.
- *----------
- */
-
-
-#ifdef __i386__		/* 32-bit i386 */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register slock_t _res = 1;
-
-	/*
-	 * Use a non-locking test before asserting the bus lock.  Note that the
-	 * extra test appears to be a small loss on some x86 platforms and a small
-	 * win on others; it's by no means clear that we should keep it.
-	 *
-	 * When this was last tested, we didn't have separate TAS() and TAS_SPIN()
-	 * macros.  Nowadays it probably would be better to do a non-locking test
-	 * in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
-	 * testing to verify that.  Without some empirical evidence, better to
-	 * leave it alone.
-	 */
-	__asm__ __volatile__(
-		"	cmpb	$0,%1	\n"
-		"	jne		1f		\n"
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-		"1: \n"
-:		"+q"(_res), "+m"(*lock)
-:
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * This sequence is equivalent to the PAUSE instruction ("rep" is
-	 * ignored by old IA32 processors if the following instruction is
-	 * not a string operation); the IA-32 Architecture Software
-	 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
-	 * PAUSE in the inner loop of a spin lock is necessary for good
-	 * performance:
-	 *
-	 *     The PAUSE instruction improves the performance of IA-32
-	 *     processors supporting Hyper-Threading Technology when
-	 *     executing spin-wait loops and other routines where one
-	 *     thread is accessing a shared lock or semaphore in a tight
-	 *     polling loop. When executing a spin-wait loop, the
-	 *     processor can suffer a severe performance penalty when
-	 *     exiting the loop because it detects a possible memory order
-	 *     violation and flushes the core processor's pipeline. The
-	 *     PAUSE instruction provides a hint to the processor that the
-	 *     code sequence is a spin-wait loop. The processor uses this
-	 *     hint to avoid the memory order violation and prevent the
-	 *     pipeline flush. In addition, the PAUSE instruction
-	 *     de-pipelines the spin-wait loop to prevent it from
-	 *     consuming execution resources excessively.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __i386__ */
-
-
-#ifdef __x86_64__		/* AMD Opteron, Intel EM64T */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-/*
- * On Intel EM64T, it's a win to use a non-locking test before the xchg proper,
- * but only when spinning.
- *
- * See also Implementing Scalable Atomic Locks for Multi-Core Intel(tm) EM64T
- * and IA32, by Michael Chynoweth and Mary R. Lee. As of this writing, it is
- * available at:
- * http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
- */
-#define TAS_SPIN(lock)    (*(lock) ? 1 : TAS(lock))
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register slock_t _res = 1;
-
-	__asm__ __volatile__(
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-:		"+q"(_res), "+m"(*lock)
-:
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
-	 * Opteron, but it may be of some use on EM64T, so we keep it.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __x86_64__ */
-
-
-#if defined(__ia64__) || defined(__ia64)
-/*
- * Intel Itanium, gcc or Intel's compiler.
- *
- * Itanium has weak memory ordering, but we rely on the compiler to enforce
- * strict ordering of accesses to volatile data.  In particular, while the
- * xchg instruction implicitly acts as a memory barrier with 'acquire'
- * semantics, we do not have an explicit memory fence instruction in the
- * S_UNLOCK macro.  We use a regular assignment to clear the spinlock, and
- * trust that the compiler marks the generated store instruction with the
- * ".rel" opcode.
- *
- * Testing shows that assumption to hold on gcc, although I could not find
- * any explicit statement on that in the gcc manual.  In Intel's compiler,
- * the -m[no-]serialize-volatile option controls that, and testing shows that
- * it is enabled by default.
- */
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/* On IA64, it's a win to use a non-locking test before the xchg proper */
-#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
-
-#ifndef __INTEL_COMPILER
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	long int	ret;
-
-	__asm__ __volatile__(
-		"	xchg4 	%0=%1,%2	\n"
-:		"=r"(ret), "+m"(*lock)
-:		"r"(1)
-:		"memory");
-	return (int) ret;
-}
-
-#else /* __INTEL_COMPILER */
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	int		ret;
-
-	ret = _InterlockedExchange(lock,1);	/* this is a xchg asm macro */
-
-	return ret;
-}
-
-#endif /* __INTEL_COMPILER */
-#endif	 /* __ia64__ || __ia64 */
-
-
-/*
- * On ARM, we use __sync_lock_test_and_set(int *, int) if available, and if
- * not fall back on the SWPB instruction.  SWPB does not work on ARMv6 or
- * later, so the compiler builtin is preferred if available.  Note also that
- * the int-width variant of the builtin works on more chips than other widths.
- */
-#if defined(__arm__) || defined(__arm)
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-#ifdef HAVE_GCC_INT_ATOMICS
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#else /* !HAVE_GCC_INT_ATOMICS */
-
-typedef unsigned char slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register slock_t _res = 1;
-
-	__asm__ __volatile__(
-		"	swpb 	%0, %0, [%2]	\n"
-:		"+r"(_res), "+m"(*lock)
-:		"r"(lock)
-:		"memory");
-	return (int) _res;
-}
-
-#endif	 /* HAVE_GCC_INT_ATOMICS */
-#endif	 /* __arm__ */
-
-
-/*
- * On ARM64, we use __sync_lock_test_and_set(int *, int) if available.
- */
-#if defined(__aarch64__) || defined(__aarch64)
-#ifdef HAVE_GCC_INT_ATOMICS
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#endif	 /* HAVE_GCC_INT_ATOMICS */
-#endif	 /* __aarch64__ */
-
-
-/* S/390 and S/390x Linux (32- and 64-bit zSeries) */
-#if defined(__s390__) || defined(__s390x__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock)	   tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	int			_res = 0;
-
-	__asm__	__volatile__(
-		"	cs 	%0,%3,0(%2)		\n"
-:		"+d"(_res), "+m"(*lock)
-:		"a"(lock), "d"(1)
-:		"memory", "cc");
-	return _res;
-}
-
-#endif	 /* __s390__ || __s390x__ */
-
-
-#if defined(__sparc__)		/* Sparc */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register slock_t _res;
-
-	/*
-	 *	See comment in /pg/backend/port/tas/solaris_sparc.s for why this
-	 *	uses "ldstub", and that file uses "cas".  gcc currently generates
-	 *	sparcv7-targeted binaries, so "cas" use isn't possible.
-	 */
-	__asm__ __volatile__(
-		"	ldstub	[%2], %0	\n"
-:		"=r"(_res), "+m"(*lock)
-:		"r"(lock)
-:		"memory");
-	return (int) _res;
-}
-
-#endif	 /* __sparc__ */
-
-
-/* PowerPC */
-#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/* On PPC, it's a win to use a non-locking test before the lwarx */
-#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
-
-/*
- * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
- * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
- * On newer machines, we can use lwsync instead for better performance.
- */
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t _t;
-	int _res;
-
-	__asm__ __volatile__(
-#ifdef USE_PPC_LWARX_MUTEX_HINT
-"	lwarx   %0,0,%3,1	\n"
-#else
-"	lwarx   %0,0,%3		\n"
-#endif
-"	cmpwi   %0,0		\n"
-"	bne     1f			\n"
-"	addi    %0,%0,1		\n"
-"	stwcx.  %0,0,%3		\n"
-"	beq     2f         	\n"
-"1:	li      %1,1		\n"
-"	b		3f			\n"
-"2:						\n"
-#ifdef USE_PPC_LWSYNC
-"	lwsync				\n"
-#else
-"	isync				\n"
-#endif
-"	li      %1,0		\n"
-"3:						\n"
-
-:	"=&r"(_t), "=r"(_res), "+m"(*lock)
-:	"r"(lock)
-:	"memory", "cc");
-	return _res;
-}
-
-/*
- * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction.
- * On newer machines, we can use lwsync instead for better performance.
- */
-#ifdef USE_PPC_LWSYNC
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("	lwsync \n"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#else
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("	sync \n"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#endif /* USE_PPC_LWSYNC */
-
-#endif /* powerpc */
-
-
-/* Linux Motorola 68k */
-#if (defined(__mc68000__) || defined(__m68k__)) && defined(__linux__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register int rv;
-
-	__asm__	__volatile__(
-		"	clrl	%0		\n"
-		"	tas		%1		\n"
-		"	sne		%0		\n"
-:		"=d"(rv), "+m"(*lock)
-:
-:		"memory", "cc");
-	return rv;
-}
-
-#endif	 /* (__mc68000__ || __m68k__) && __linux__ */
-
-
-/*
- * VAXen -- even multiprocessor ones
- * (thanks to Tom Ivar Helbekkmo)
- */
-#if defined(__vax__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register int	_res;
-
-	__asm__ __volatile__(
-		"	movl 	$1, %0			\n"
-		"	bbssi	$0, (%2), 1f	\n"
-		"	clrl	%0				\n"
-		"1: \n"
-:		"=&r"(_res), "+m"(*lock)
-:		"r"(lock)
-:		"memory");
-	return _res;
-}
-
-#endif	 /* __vax__ */
-
-#if defined(__alpha) || defined(__alpha__)	/* Alpha */
-/*
- * Correct multi-processor locking methods are explained in section 5.5.3
- * of the Alpha AXP Architecture Handbook, which at this writing can be
- * found at ftp://ftp.netbsd.org/pub/NetBSD/misc/dec-docs/index.html.
- * For gcc we implement the handbook's code directly with inline assembler.
- */
-#define HAS_TEST_AND_SET
-
-typedef unsigned long slock_t;
-
-#define TAS(lock)  tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register slock_t _res;
-
-	__asm__	__volatile__(
-		"	ldq		$0, %1	\n"
-		"	bne		$0, 2f	\n"
-		"	ldq_l	%0, %1	\n"
-		"	bne		%0, 2f	\n"
-		"	mov		1,  $0	\n"
-		"	stq_c	$0, %1	\n"
-		"	beq		$0, 2f	\n"
-		"	mb				\n"
-		"	br		3f		\n"
-		"2:	mov		1, %0	\n"
-		"3:					\n"
-:		"=&r"(_res), "+m"(*lock)
-:
-:		"memory", "0");
-	return (int) _res;
-}
-
-#define S_UNLOCK(lock)	\
-do \
-{\
-	__asm__ __volatile__ ("	mb \n"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* __alpha || __alpha__ */
-
-
-#if defined(__mips__) && !defined(__sgi)	/* non-SGI MIPS */
-/* Note: on SGI we use the OS' mutex ABI, see below */
-/* Note: R10000 processors require a separate SYNC */
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register volatile slock_t *_l = lock;
-	register int _res;
-	register int _tmp;
-
-	__asm__ __volatile__(
-		"       .set push           \n"
-		"       .set mips2          \n"
-		"       .set noreorder      \n"
-		"       .set nomacro        \n"
-		"       ll      %0, %2      \n"
-		"       or      %1, %0, 1   \n"
-		"       sc      %1, %2      \n"
-		"       xori    %1, 1       \n"
-		"       or      %0, %0, %1  \n"
-		"       sync                \n"
-		"       .set pop              "
-:		"=&r" (_res), "=&r" (_tmp), "+R" (*_l)
-:
-:		"memory");
-	return _res;
-}
-
-/* MIPS S_UNLOCK is almost standard but requires a "sync" instruction */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__( \
-		"       .set push           \n" \
-		"       .set mips2          \n" \
-		"       .set noreorder      \n" \
-		"       .set nomacro        \n" \
-		"       sync                \n" \
-		"       .set pop              "); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* __mips__ && !__sgi */
-
-
-#if defined(__m32r__) && defined(HAVE_SYS_TAS_H)	/* Renesas' M32R */
-#define HAS_TEST_AND_SET
-
-#include <sys/tas.h>
-
-typedef int slock_t;
-
-#define TAS(lock) tas(lock)
-
-#endif /* __m32r__ */
-
-
-#if defined(__sh__)				/* Renesas' SuperH */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	register int _res;
-
-	/*
-	 * This asm is coded as if %0 could be any register, but actually SuperH
-	 * restricts the target of xor-immediate to be R0.  That's handled by
-	 * the "z" constraint on _res.
-	 */
-	__asm__ __volatile__(
-		"	tas.b @%2    \n"
-		"	movt  %0     \n"
-		"	xor   #1,%0  \n"
-:		"=z"(_res), "+m"(*lock)
-:		"r"(lock)
-:		"memory", "t");
-	return _res;
-}
-
-#endif	 /* __sh__ */
-
-
-/* These live in s_lock.c, but only for gcc */
-
-
-#if defined(__m68k__) && !defined(__linux__)	/* non-Linux Motorola 68k */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-#endif
-
-
-#endif	/* defined(__GNUC__) || defined(__INTEL_COMPILER) */
-
-
-
-/*
- * ---------------------------------------------------------------------
- * Platforms that use non-gcc inline assembly:
- * ---------------------------------------------------------------------
- */
-
-#if !defined(HAS_TEST_AND_SET)	/* We didn't trigger above, let's try here */
-
-
-#if defined(USE_UNIVEL_CC)		/* Unixware compiler */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock)	tas(lock)
-
-asm int
-tas(volatile slock_t *s_lock)
-{
-/* UNIVEL wants %mem in column 1, so we don't pg_indent this file */
-%mem s_lock
-	pushl %ebx
-	movl s_lock, %ebx
-	movl $255, %eax
-	lock
-	xchgb %al, (%ebx)
-	popl %ebx
-}
-
-#endif	 /* defined(USE_UNIVEL_CC) */
-
-
-#if defined(__alpha) || defined(__alpha__)	/* Tru64 Unix Alpha compiler */
-/*
- * The Tru64 compiler doesn't support gcc-style inline asm, but it does
- * have some builtin functions that accomplish much the same results.
- * For simplicity, slock_t is defined as long (ie, quadword) on Alpha
- * regardless of the compiler in use.  LOCK_LONG and UNLOCK_LONG only
- * operate on an int (ie, longword), but that's OK as long as we define
- * S_INIT_LOCK to zero out the whole quadword.
- */
-#define HAS_TEST_AND_SET
-
-typedef unsigned long slock_t;
-
-#include <alpha/builtins.h>
-#define S_INIT_LOCK(lock)  (*(lock) = 0)
-#define TAS(lock)		   (__LOCK_LONG_RETRY((lock), 1) == 0)
-#define S_UNLOCK(lock)	   __UNLOCK_LONG(lock)
-
-#endif	 /* __alpha || __alpha__ */
-
-
-#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC, GCC and HP compilers */
-/*
- * HP's PA-RISC
- *
- * See src/backend/port/hpux/tas.c.template for details about LDCWX.  Because
- * LDCWX requires a 16-byte-aligned address, we declare slock_t as a 16-byte
- * struct.  The active word in the struct is whichever has the aligned address;
- * the other three words just sit at -1.
- *
- * When using gcc, we can inline the required assembly code.
- */
-#define HAS_TEST_AND_SET
-
-typedef struct
-{
-	int			sema[4];
-} slock_t;
-
-#define TAS_ACTIVE_WORD(lock)	((volatile int *) (((uintptr_t) (lock) + 15) & ~15))
-
-#if defined(__GNUC__)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	volatile int *lockword = TAS_ACTIVE_WORD(lock);
-	register int lockval;
-
-	__asm__ __volatile__(
-		"	ldcwx	0(0,%2),%0	\n"
-:		"=r"(lockval), "+m"(*lockword)
-:		"r"(lockword)
-:		"memory");
-	return (lockval == 0);
-}
-
-#endif /* __GNUC__ */
-
-#define S_UNLOCK(lock)	(*TAS_ACTIVE_WORD(lock) = -1)
-
-#define S_INIT_LOCK(lock) \
-	do { \
-		volatile slock_t *lock_ = (lock); \
-		lock_->sema[0] = -1; \
-		lock_->sema[1] = -1; \
-		lock_->sema[2] = -1; \
-		lock_->sema[3] = -1; \
-	} while (0)
-
-#define S_LOCK_FREE(lock)	(*TAS_ACTIVE_WORD(lock) != 0)
-
-#endif	 /* __hppa || __hppa__ */
-
-
-#if defined(__hpux) && defined(__ia64) && !defined(__GNUC__)
-/*
- * HP-UX on Itanium, non-gcc compiler
- *
- * We assume that the compiler enforces strict ordering of loads/stores on
- * volatile data (see comments on the gcc-version earlier in this file).
- * Note that this assumption does *not* hold if you use the
- * +Ovolatile=__unordered option on the HP-UX compiler, so don't do that.
- *
- * See also Implementing Spinlocks on the Intel Itanium Architecture and
- * PA-RISC, by Tor Ekqvist and David Graves, for more information.  As of
- * this writing, version 1.0 of the manual is available at:
- * http://h21007.www2.hp.com/portal/download/files/unprot/itanium/spinlocks.pdf
- */
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#include <ia64/sys/inline.h>
-#define TAS(lock) _Asm_xchg(_SZ_W, lock, 1, _LDHINT_NONE)
-/* On IA64, it's a win to use a non-locking test before the xchg proper */
-#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
-
-#endif	/* HPUX on IA64, non gcc */
-
-#if defined(_AIX)	/* AIX */
-/*
- * AIX (POWER)
- */
-#define HAS_TEST_AND_SET
-
-#include <sys/atomic_op.h>
-
-typedef int slock_t;
-
-#define TAS(lock)			_check_lock((slock_t *) (lock), 0, 1)
-#define S_UNLOCK(lock)		_clear_lock((slock_t *) (lock), 0)
-#endif	 /* _AIX */
-
-
-/* These are in s_lock.c */
-
-#if defined(__SUNPRO_C) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc))
-#define HAS_TEST_AND_SET
-
-#if defined(__i386) || defined(__x86_64__) || defined(__sparcv9) || defined(__sparcv8plus)
-typedef unsigned int slock_t;
-#else
-typedef unsigned char slock_t;
-#endif
-
-extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with,
-									  slock_t cmp);
-
-#define TAS(a) (pg_atomic_cas((a), 1, 0) != 0)
-#endif
-
-
-#ifdef WIN32_ONLY_COMPILER
-typedef LONG slock_t;
-
-#define HAS_TEST_AND_SET
-#define TAS(lock) (InterlockedCompareExchange(lock, 1, 0))
-
-#define SPIN_DELAY() spin_delay()
-
-/* If using Visual C++ on Win64, inline assembly is unavailable.
- * Use a _mm_pause instrinsic instead of rep nop.
- */
-#if defined(_WIN64)
-static __forceinline void
-spin_delay(void)
-{
-	_mm_pause();
-}
-#else
-static __forceinline void
-spin_delay(void)
-{
-	/* See comment for gcc code. Same code, MASM syntax */
-	__asm rep nop;
-}
-#endif
-
-#endif
-
-
-#endif	/* !defined(HAS_TEST_AND_SET) */
-
-
-/* Blow up if we didn't have any way to do spinlocks */
-#ifndef HAS_TEST_AND_SET
-#error PostgreSQL does not have native spinlock support on this platform.  To continue the compilation, rerun configure using --disable-spinlocks.  However, performance will be poor.  Please report this to pgsql-bugs@postgresql.org.
-#endif
-
-
-#else	/* !HAVE_SPINLOCKS */
-
-
-/*
- * Fake spinlock implementation using semaphores --- slow and prone
- * to fall foul of kernel limits on number of semaphores, so don't use this
- * unless you must!  The subroutines appear in spin.c.
- */
-typedef PGSemaphoreData slock_t;
-
-extern bool s_lock_free_sema(volatile slock_t *lock);
-extern void s_unlock_sema(volatile slock_t *lock);
-extern void s_init_lock_sema(volatile slock_t *lock);
-extern int	tas_sema(volatile slock_t *lock);
-
-#define S_LOCK_FREE(lock)	s_lock_free_sema(lock)
-#define S_UNLOCK(lock)	 s_unlock_sema(lock)
-#define S_INIT_LOCK(lock)	s_init_lock_sema(lock)
-#define TAS(lock)	tas_sema(lock)
-
-
-#endif	/* HAVE_SPINLOCKS */
-
-
-/*
- * Default Definitions - override these above as needed.
- */
-
-#if !defined(S_LOCK)
+typedef pg_atomic_flag slock_t;
+#define TAS(lock)			(!pg_atomic_test_set_flag(lock))
+#define TAS_SPIN(lock)		(pg_atomic_unlocked_test_flag(lock) ? 1 : TAS(lock))
+#define S_UNLOCK(lock)		pg_atomic_clear_flag(lock)
+#define S_INIT_LOCK(lock)	pg_atomic_init_flag(lock)
+#define SPIN_DELAY()		pg_spin_delay()
 #define S_LOCK(lock) \
 	(TAS(lock) ? s_lock((lock), __FILE__, __LINE__) : 0)
-#endif	 /* S_LOCK */
-
-#if !defined(S_LOCK_FREE)
-#define S_LOCK_FREE(lock)	(*(lock) == 0)
-#endif	 /* S_LOCK_FREE */
-
-#if !defined(S_UNLOCK)
-#define S_UNLOCK(lock)		(*((volatile slock_t *) (lock)) = 0)
-#endif	 /* S_UNLOCK */
-
-#if !defined(S_INIT_LOCK)
-#define S_INIT_LOCK(lock)	S_UNLOCK(lock)
-#endif	 /* S_INIT_LOCK */
-
-#if !defined(SPIN_DELAY)
-#define SPIN_DELAY()	((void) 0)
-#endif	 /* SPIN_DELAY */
-
-#if !defined(TAS)
-extern int	tas(volatile slock_t *lock);		/* in port/.../tas.s, or
-												 * s_lock.c */
-
-#define TAS(lock)		tas(lock)
-#endif	 /* TAS */
-
-#if !defined(TAS_SPIN)
-#define TAS_SPIN(lock)	TAS(lock)
-#endif	 /* TAS_SPIN */
-
 
 /*
  * Platform-independent out-of-line support routines
diff --git a/src/test/regress/expected/lock.out b/src/test/regress/expected/lock.out
index 0d7c3ba..fd27344 100644
--- a/src/test/regress/expected/lock.out
+++ b/src/test/regress/expected/lock.out
@@ -60,3 +60,11 @@ DROP TABLE lock_tbl2;
 DROP TABLE lock_tbl1;
 DROP SCHEMA lock_schema1 CASCADE;
 DROP ROLE regress_rol_lock1;
+-- atomic ops tests
+RESET search_path;
+SELECT test_atomic_ops();
+ test_atomic_ops 
+-----------------
+ t
+(1 row)
+
diff --git a/src/test/regress/input/create_function_1.source b/src/test/regress/input/create_function_1.source
index aef1518..1fded84 100644
--- a/src/test/regress/input/create_function_1.source
+++ b/src/test/regress/input/create_function_1.source
@@ -57,6 +57,11 @@ CREATE FUNCTION make_tuple_indirect (record)
         AS '@libdir@/regress@DLSUFFIX@'
         LANGUAGE C STRICT;
 
+CREATE FUNCTION test_atomic_ops()
+    RETURNS bool
+    AS '@libdir@/regress@DLSUFFIX@'
+    LANGUAGE C;
+
 -- Things that shouldn't work:
 
 CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
diff --git a/src/test/regress/output/create_function_1.source b/src/test/regress/output/create_function_1.source
index 9761d12..9926c90 100644
--- a/src/test/regress/output/create_function_1.source
+++ b/src/test/regress/output/create_function_1.source
@@ -51,6 +51,10 @@ CREATE FUNCTION make_tuple_indirect (record)
         RETURNS record
         AS '@libdir@/regress@DLSUFFIX@'
         LANGUAGE C STRICT;
+CREATE FUNCTION test_atomic_ops()
+    RETURNS bool
+    AS '@libdir@/regress@DLSUFFIX@'
+    LANGUAGE C;
 -- Things that shouldn't work:
 CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
     AS 'SELECT ''not an integer'';';
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 3bd8a15..1db996a 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -16,6 +16,7 @@
 #include "commands/trigger.h"
 #include "executor/executor.h"
 #include "executor/spi.h"
+#include "storage/atomics.h"
 #include "utils/builtins.h"
 #include "utils/geo_decls.h"
 #include "utils/rel.h"
@@ -40,6 +41,7 @@ extern int	oldstyle_length(int n, text *t);
 extern Datum int44in(PG_FUNCTION_ARGS);
 extern Datum int44out(PG_FUNCTION_ARGS);
 extern Datum make_tuple_indirect(PG_FUNCTION_ARGS);
+extern Datum test_atomic_ops(PG_FUNCTION_ARGS);
 
 #ifdef PG_MODULE_MAGIC
 PG_MODULE_MAGIC;
@@ -829,3 +831,214 @@ make_tuple_indirect(PG_FUNCTION_ARGS)
 
 	PG_RETURN_HEAPTUPLEHEADER(newtup->t_data);
 }
+
+
+static pg_atomic_flag global_flag = PG_ATOMIC_INIT_FLAG();
+
+static void
+test_atomic_flag(void)
+{
+	pg_atomic_flag flag;
+
+	/* check the global flag */
+	if (pg_atomic_unlocked_test_flag(&global_flag))
+		elog(ERROR, "global_flag: unduly set");
+
+	if (!pg_atomic_test_set_flag(&global_flag))
+		elog(ERROR, "global_flag: couldn't set");
+
+	if (pg_atomic_test_set_flag(&global_flag))
+		elog(ERROR, "global_flag: set spuriously #1");
+
+	pg_atomic_clear_flag(&global_flag);
+
+	if (!pg_atomic_test_set_flag(&global_flag))
+		elog(ERROR, "global_flag: couldn't set");
+
+	/* check the local flag */
+	pg_atomic_init_flag(&flag);
+
+	if (pg_atomic_unlocked_test_flag(&flag))
+		elog(ERROR, "flag: unduly set");
+
+	if (!pg_atomic_test_set_flag(&flag))
+		elog(ERROR, "flag: couldn't set");
+
+	if (pg_atomic_test_set_flag(&flag))
+		elog(ERROR, "flag: set spuriously #2");
+
+	pg_atomic_clear_flag(&flag);
+
+	if (!pg_atomic_test_set_flag(&flag))
+		elog(ERROR, "flag: couldn't set");
+}
+
+#ifdef PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static void
+test_atomic_uint32(void)
+{
+	pg_atomic_uint32 var;
+	uint32 expected;
+	int i;
+
+	pg_atomic_init_u32(&var, 0);
+
+	if (pg_atomic_read_u32(&var) != 0)
+		elog(ERROR, "atomic_read_u32() #1 wrong");
+
+	pg_atomic_write_u32(&var, 3);
+
+	if (pg_atomic_read_u32(&var) != 3)
+		elog(ERROR, "atomic_read_u32() #2 wrong");
+
+	if (pg_atomic_fetch_add_u32(&var, 1) != 3)
+		elog(ERROR, "atomic_fetch_add_u32() #1 wrong");
+
+	if (pg_atomic_fetch_sub_u32(&var, 1) != 4)
+		elog(ERROR, "atomic_fetch_sub_u32() #1 wrong");
+
+	if (pg_atomic_sub_fetch_u32(&var, 3) != 0)
+		elog(ERROR, "atomic_sub_fetch_u32() #1 wrong");
+
+	if (pg_atomic_add_fetch_u32(&var, 10) != 10)
+		elog(ERROR, "atomic_add_fetch_u32() #1 wrong");
+
+	if (pg_atomic_exchange_u32(&var, 5) != 10)
+		elog(ERROR, "pg_atomic_exchange_u32() #1 wrong");
+
+	if (pg_atomic_exchange_u32(&var, 0) != 5)
+		elog(ERROR, "pg_atomic_exchange_u32() #0 wrong");
+
+	/* fail exchange because of old expected */
+	expected = 10;
+	if (pg_atomic_compare_exchange_u32(&var, &expected, 1))
+		elog(ERROR, "atomic_compare_exchange_u32() changed value spuriously");
+
+	/* CAS is allowed to fail due to interrupts, try a couple of times */
+	for (i = 0; i < 1000; i++)
+	{
+		expected = 0;
+		if (!pg_atomic_compare_exchange_u32(&var, &expected, 1))
+			break;
+	}
+	if (i == 1000)
+		elog(ERROR, "atomic_compare_exchange_u32() never succeeded");
+	if (pg_atomic_read_u32(&var) != 1)
+		elog(ERROR, "atomic_compare_exchange_u32() didn't set value properly");
+
+	pg_atomic_write_u32(&var, 0);
+
+	/* try setting flagbits */
+	if (pg_atomic_fetch_or_u32(&var, 1) & 1)
+		elog(ERROR, "pg_atomic_fetch_or_u32() #1 wrong");
+
+	if (!(pg_atomic_fetch_or_u32(&var, 2) & 1))
+		elog(ERROR, "pg_atomic_fetch_or_u32() #2 wrong");
+
+	if (pg_atomic_read_u32(&var) != 3)
+		elog(ERROR, "invalid result after pg_atomic_fetch_or_u32()");
+
+	/* try clearing flagbits */
+	if ((pg_atomic_fetch_and_u32(&var, ~2) & 3) != 3)
+		elog(ERROR, "pg_atomic_fetch_and_u32() #1 wrong");
+
+	if (pg_atomic_fetch_and_u32(&var, ~1) != 1)
+		elog(ERROR, "pg_atomic_fetch_and_u32() #2 wrong: is %u",
+			 pg_atomic_read_u32(&var));
+	/* no bits set anymore */
+	if (pg_atomic_fetch_and_u32(&var, ~0) != 0)
+		elog(ERROR, "pg_atomic_fetch_and_u32() #3 wrong");
+}
+#endif /* PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 */
+
+#ifdef PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static void
+test_atomic_uint64(void)
+{
+	pg_atomic_uint64 var;
+	uint64 expected;
+	int i;
+
+	pg_atomic_init_u64(&var, 0);
+
+	if (pg_atomic_read_u64(&var) != 0)
+		elog(ERROR, "atomic_read_u64() #1 wrong");
+
+	pg_atomic_write_u64(&var, 3);
+
+	if (pg_atomic_read_u64(&var) != 3)
+		elog(ERROR, "atomic_read_u64() #2 wrong");
+
+	if (pg_atomic_fetch_add_u64(&var, 1) != 3)
+		elog(ERROR, "atomic_fetch_add_u64() #1 wrong");
+
+	if (pg_atomic_fetch_sub_u64(&var, 1) != 4)
+		elog(ERROR, "atomic_fetch_sub_u64() #1 wrong");
+
+	if (pg_atomic_sub_fetch_u64(&var, 3) != 0)
+		elog(ERROR, "atomic_sub_fetch_u64() #1 wrong");
+
+	if (pg_atomic_add_fetch_u64(&var, 10) != 10)
+		elog(ERROR, "atomic_add_fetch_u64() #1 wrong");
+
+	if (pg_atomic_exchange_u64(&var, 5) != 10)
+		elog(ERROR, "pg_atomic_exchange_u64() #1 wrong");
+
+	if (pg_atomic_exchange_u64(&var, 0) != 5)
+		elog(ERROR, "pg_atomic_exchange_u64() #0 wrong");
+
+	/* fail exchange because of old expected */
+	expected = 10;
+	if (pg_atomic_compare_exchange_u64(&var, &expected, 1))
+		elog(ERROR, "atomic_compare_exchange_u64() changed value spuriously");
+
+	/* CAS is allowed to fail due to interrupts, try a couple of times */
+	for (i = 0; i < 100; i++)
+	{
+		expected = 0;
+		if (!pg_atomic_compare_exchange_u64(&var, &expected, 1))
+			break;
+	}
+	if (i == 100)
+		elog(ERROR, "atomic_compare_exchange_u64() never succeeded");
+	if (pg_atomic_read_u64(&var) != 1)
+		elog(ERROR, "atomic_compare_exchange_u64() didn't set value properly");
+
+	pg_atomic_write_u64(&var, 0);
+
+	/* try setting flagbits */
+	if (pg_atomic_fetch_or_u64(&var, 1) & 1)
+		elog(ERROR, "pg_atomic_fetch_or_u64() #1 wrong");
+
+	if (!(pg_atomic_fetch_or_u64(&var, 2) & 1))
+		elog(ERROR, "pg_atomic_fetch_or_u64() #2 wrong");
+
+	if (pg_atomic_read_u64(&var) != 3)
+		elog(ERROR, "invalid result after pg_atomic_fetch_or_u64()");
+
+	/* try clearing flagbits */
+	if ((pg_atomic_fetch_and_u64(&var, ~2) & 3) != 3)
+		elog(ERROR, "pg_atomic_fetch_and_u64() #1 wrong");
+
+	if (pg_atomic_fetch_and_u64(&var, ~1) != 1)
+		elog(ERROR, "pg_atomic_fetch_and_u64() #2 wrong: is %u",
+			 pg_atomic_read_u64(&var));
+	/* no bits set anymore */
+	if (pg_atomic_fetch_and_u64(&var, ~0) != 0)
+		elog(ERROR, "pg_atomic_fetch_and_u64() #3 wrong");
+}
+#endif /* PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 */
+
+
+extern Datum test_atomic_ops(PG_FUNCTION_ARGS)
+{
+	test_atomic_flag();
+#ifdef PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+	test_atomic_uint32();
+#endif
+#ifdef PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+	test_atomic_uint64();
+#endif
+
+	PG_RETURN_BOOL(true);
+}
diff --git a/src/test/regress/sql/lock.sql b/src/test/regress/sql/lock.sql
index dda212f..567e8bc 100644
--- a/src/test/regress/sql/lock.sql
+++ b/src/test/regress/sql/lock.sql
@@ -64,3 +64,8 @@ DROP TABLE lock_tbl2;
 DROP TABLE lock_tbl1;
 DROP SCHEMA lock_schema1 CASCADE;
 DROP ROLE regress_rol_lock1;
+
+
+-- atomic ops tests
+RESET search_path;
+SELECT test_atomic_ops();
-- 
1.8.3.251.g1462b67

