>From 958b04eb08603167dee2fe6684f9430f5b578f28 Mon Sep 17 00:00:00 2001
From: Mikko Tiihonen <mikko.tiihonen@nitor.fi>
Date: Wed, 12 Dec 2012 20:02:49 +0200
Subject: [PATCH] Use gcc built-in atomic add/sub instructions, if available


diff --git a/configure.in b/configure.in
index 2dee4b3..dec2785 100644
--- a/configure.in
+++ b/configure.in
@@ -1451,6 +1451,28 @@ if test x"$pgac_cv_gcc_int_atomics" = x"yes"; then
   AC_DEFINE(HAVE_GCC_INT_ATOMICS, 1, [Define to 1 if you have __sync_lock_test_and_set(int *) and friends.])
 fi
 
+#TODO, check for __atomic_is_lock_free (sizeof(int), 0) too
+
+AC_CACHE_CHECK([for builtin atomic functions], pgac_cv_gcc_int_atomic_add,
+[AC_TRY_LINK([],
+  [int counter = 0;
+   __atomic_add_fetch(&counter, 1, __ATOMIC_SEQ_CST);],
+  [pgac_cv_gcc_int_atomic_add="yes"],
+  [pgac_cv_gcc_int_atomic_add="no"])])
+if test x"$pgac_cv_gcc_int_atomic_add" = x"yes"; then
+  AC_DEFINE(HAVE_GCC_INT_ATOMIC_ADD, 1, [Define to 1 if you have __atomic_add_fetch(int *, int, int) and friends.])
+fi
+
+AC_CACHE_CHECK([for builtin sync functions], pgac_cv_gcc_int_sync_add,
+[AC_TRY_LINK([],
+  [int counter = 0;
+   __sync_add_and_fetch(&counter, 1);],
+  [pgac_cv_gcc_int_sync_add="yes"],
+  [pgac_cv_gcc_int_sync_add="no"])])
+if test x"$pgac_cv_gcc_int_sync_add" = x"yes"; then
+  AC_DEFINE(HAVE_GCC_INT_SYNC_ADD, 1, [Define to 1 if you have  __sync_add_and_fetch(int *, int) and friends.])
+fi
+
 
 #
 # Pthreads
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index ec4da20..c8c0b91 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -40,7 +40,7 @@
 #include "pgstat.h"
 #include "storage/proc.h"
 #include "storage/sinvaladt.h"
-#include "storage/spin.h"
+#include "storage/atomics.h"
 #include "storage/standby.h"
 #include "utils/memutils.h"
 #include "utils/ps_status.h"
@@ -234,7 +234,12 @@ static PROCLOCK *FastPathGetRelationLockEntry(LOCALLOCK *locallock);
 
 typedef struct
 {
+#ifdef MUTEXLESS_ATOMIC_INC
+#define FAST_PATH_MUTEX(data) NULL
+#else
+#define FAST_PATH_MUTEX(data) &(data)->mutex
 	slock_t		mutex;
+#endif
 	uint32		count[FAST_PATH_STRONG_LOCK_HASH_PARTITIONS];
 } FastPathStrongRelationLockData;
 
@@ -427,8 +432,10 @@ InitLocks(void)
 	FastPathStrongRelationLocks =
 		ShmemInitStruct("Fast Path Strong Relation Lock Data",
 						sizeof(FastPathStrongRelationLockData), &found);
+#ifndef MUTEXLESS_ATOMIC_INC
 	if (!found)
 		SpinLockInit(&FastPathStrongRelationLocks->mutex);
+#endif
 
 	/*
 	 * Allocate non-shared hash table for LOCALLOCK structs.  This stores lock
@@ -1207,11 +1214,8 @@ RemoveLocalLock(LOCALLOCK *locallock)
 
 		fasthashcode = FastPathStrongLockHashPartition(locallock->hashcode);
 
-		SpinLockAcquire(&FastPathStrongRelationLocks->mutex);
-		Assert(FastPathStrongRelationLocks->count[fasthashcode] > 0);
-		FastPathStrongRelationLocks->count[fasthashcode]--;
 		locallock->holdsStrongLockCount = FALSE;
-		SpinLockRelease(&FastPathStrongRelationLocks->mutex);
+		atomic_dec(&FastPathStrongRelationLocks->count[fasthashcode], FAST_PATH_MUTEX(FastPathStrongRelationLocks));
 	}
 
 	if (!hash_search(LockMethodLocalHash,
@@ -1475,16 +1479,10 @@ BeginStrongLockAcquire(LOCALLOCK *locallock, uint32 fasthashcode)
 	 * Adding to a memory location is not atomic, so we take a spinlock to
 	 * ensure we don't collide with someone else trying to bump the count at
 	 * the same time.
-	 *
-	 * XXX: It might be worth considering using an atomic fetch-and-add
-	 * instruction here, on architectures where that is supported.
 	 */
-
-	SpinLockAcquire(&FastPathStrongRelationLocks->mutex);
-	FastPathStrongRelationLocks->count[fasthashcode]++;
 	locallock->holdsStrongLockCount = TRUE;
 	StrongLockInProgress = locallock;
-	SpinLockRelease(&FastPathStrongRelationLocks->mutex);
+	atomic_inc(&FastPathStrongRelationLocks->count[fasthashcode], FAST_PATH_MUTEX(FastPathStrongRelationLocks));
 }
 
 /*
@@ -1512,11 +1510,9 @@ AbortStrongLockAcquire(void)
 
 	fasthashcode = FastPathStrongLockHashPartition(locallock->hashcode);
 	Assert(locallock->holdsStrongLockCount == TRUE);
-	SpinLockAcquire(&FastPathStrongRelationLocks->mutex);
-	FastPathStrongRelationLocks->count[fasthashcode]--;
 	locallock->holdsStrongLockCount = FALSE;
 	StrongLockInProgress = NULL;
-	SpinLockRelease(&FastPathStrongRelationLocks->mutex);
+	atomic_dec(&FastPathStrongRelationLocks->count[fasthashcode], FAST_PATH_MUTEX(FastPathStrongRelationLocks));
 }
 
 /*
@@ -2881,9 +2877,7 @@ LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc,
 	{
 		uint32		fasthashcode = FastPathStrongLockHashPartition(hashcode);
 
-		SpinLockAcquire(&FastPathStrongRelationLocks->mutex);
-		FastPathStrongRelationLocks->count[fasthashcode]--;
-		SpinLockRelease(&FastPathStrongRelationLocks->mutex);
+		atomic_dec(&FastPathStrongRelationLocks->count[fasthashcode], FAST_PATH_MUTEX(FastPathStrongRelationLocks));
 	}
 }
 
@@ -3765,9 +3759,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
 	{
 		uint32		fasthashcode = FastPathStrongLockHashPartition(hashcode);
 
-		SpinLockAcquire(&FastPathStrongRelationLocks->mutex);
-		FastPathStrongRelationLocks->count[fasthashcode]++;
-		SpinLockRelease(&FastPathStrongRelationLocks->mutex);
+		atomic_inc(&FastPathStrongRelationLocks->count[fasthashcode], FAST_PATH_MUTEX(FastPathStrongRelationLocks));
 	}
 
 	LWLockRelease(partitionLock);
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index edaf853..e6b2943 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -176,6 +176,12 @@
 /* Define to 1 if you have __sync_lock_test_and_set(int *) and friends. */
 #undef HAVE_GCC_INT_ATOMICS
 
+/* Define to 1 if you have __atomic_add_fetch(int *, int, int) and friends. */
+#undef HAVE_GCC_INT_ATOMIC_ADD 1
+
+/* Define to 1 if you have __sync_add_and_fetch(int *, int) and friends. */
+#undef HAVE_GCC_INT_SYNC_ADD 1
+
 /* Define to 1 if you have the `getaddrinfo' function. */
 #undef HAVE_GETADDRINFO
 
diff --git a/src/include/storage/atomics.h b/src/include/storage/atomics.h
new file mode 100644
index 0000000..b828942
--- /dev/null
+++ b/src/include/storage/atomics.h
@@ -0,0 +1,72 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics.h
+ *	   Hardware-independent implementation of atomics instructions
+ *	   on primitive values.
+ *
+ *
+ *
+ * Portions Copyright (c) 2012, PostgreSQL Global Development Group
+ *
+ * src/include/storage/atomics.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ATOMICS_H
+#define ATOMICS_H
+
+#include "storage/spin.h"
+
+#ifdef HAVE_GCC_INT_ATOMIC_ADD
+
+#define MUTEXLESS_ATOMIC_INC 1
+
+static __inline__ void
+atomic_inc(volatile uint32 *counter, void* ignored)
+{
+		__atomic_add_fetch(counter, 1, __ATOMIC_SEQ_CST);
+}
+
+static __inline__ void
+atomic_dec(volatile uint32 *counter, void* ignored)
+{
+		__atomic_sub_fetch(counter, 1, __ATOMIC_SEQ_CST);
+}
+
+#elif HAVE_GCC_INT_ATOMIC_ADD
+
+#define MUTEXLESS_ATOMIC_INC 1
+
+static __inline__ void
+atomic_inc(volatile uint32 *counter, void *ignored)
+{
+		__sync_add_and_fetch(counter, 1);
+}
+
+static __inline__ void
+atomic_dec(volatile uint32 *counter, void *ignored)
+{
+		__sync_add_and_fetch(counter, -1);
+}
+
+#else /* !(HAVE_GCC_INT_ATOMIC_ADD || HAVE_GCC_INT_SYNC_ADD) */
+
+static __inline__ void
+atomic_inc(volatile uint32 *counter, volatile slock_t *mutex)
+{
+		SpinLockAcquire(mutex);
+		(*counter)++;
+		SpinLockRelease(mutex);
+}
+
+static __inline__ void
+atomic_dec(volatile uint32 *counter, volatile slock_t *mutex)
+{
+		SpinLockAcquire(mutex);
+		(*counter)--;
+		SpinLockRelease(mutex);
+}
+
+#endif /* HAVE_GCC_INT_ATOMIC_ADD || HAVE_GCC_INT_SYNC_ADD */
+
+#endif   /* ATOMICS_H */
-- 
1.8.0.2

