diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 048a189..7eae2a1 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -471,6 +471,9 @@ typedef struct
 	slock_t    *ShmemLock;
 	VariableCache ShmemVariableCache;
 	Backend    *ShmemBackendArray;
+#ifndef HAVE_SPINLOCKS
+	PGSemaphore	SpinlockSemaArray;
+#endif
 	LWLock	   *LWLockArray;
 	slock_t    *ProcStructLock;
 	PROC_HDR   *ProcGlobal;
@@ -5626,6 +5629,9 @@ save_backend_variables(BackendParameters *param, Port *port,
 	param->ShmemVariableCache = ShmemVariableCache;
 	param->ShmemBackendArray = ShmemBackendArray;
 
+#ifndef HAVE_SPINLOCKS
+	param->SpinlockSemaArray = SpinlockSemaArray;
+#endif
 	param->LWLockArray = LWLockArray;
 	param->ProcStructLock = ProcStructLock;
 	param->ProcGlobal = ProcGlobal;
@@ -5854,6 +5860,9 @@ restore_backend_variables(BackendParameters *param, Port *port)
 	ShmemVariableCache = param->ShmemVariableCache;
 	ShmemBackendArray = param->ShmemBackendArray;
 
+#ifndef HAVE_SPINLOCKS
+	SpinlockSemaArray = param->SpinlockSemaArray;
+#endif
 	LWLockArray = param->LWLockArray;
 	ProcStructLock = param->ProcStructLock;
 	ProcGlobal = param->ProcGlobal;
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 040c7aa..ad663ec 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -105,6 +105,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 		 * need to be so careful during the actual allocation phase.
 		 */
 		size = 100000;
+		size = add_size(size, SpinlockSemaSize());
 		size = add_size(size, hash_estimate_size(SHMEM_INDEX_SIZE,
 												 sizeof(ShmemIndexEnt)));
 		size = add_size(size, BufferShmemSize());
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index 18ba426..4efd0fb 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -116,9 +116,24 @@ InitShmemAllocation(void)
 	Assert(shmhdr != NULL);
 
 	/*
-	 * Initialize the spinlock used by ShmemAlloc.	We have to do the space
-	 * allocation the hard way, since obviously ShmemAlloc can't be called
-	 * yet.
+	 * If spinlocks are disabled, initialize emulation layer.  We have to do
+	 * the space allocation the hard way, since obviously ShmemAlloc can't be
+	 * called yet.
+	 */
+#ifndef HAVE_SPINLOCKS
+	{
+		PGSemaphore spinsemas;
+
+		spinsemas = (PGSemaphore) (((char *) shmhdr) + shmhdr->freeoffset);
+		shmhdr->freeoffset += MAXALIGN(SpinlockSemaSize());
+		SpinlockSemaInit(spinsemas);
+		Assert(shmhdr->freeoffset <= shmhdr->totalsize);
+	}
+#endif
+
+	/*
+	 * Initialize the spinlock used by ShmemAlloc; we have to do this the hard
+	 * way, too, for the same reasons as above.
 	 */
 	ShmemLock = (slock_t *) (((char *) shmhdr) + shmhdr->freeoffset);
 	shmhdr->freeoffset += MAXALIGN(sizeof(slock_t));
diff --git a/src/backend/storage/lmgr/spin.c b/src/backend/storage/lmgr/spin.c
index f054be8..e108a37 100644
--- a/src/backend/storage/lmgr/spin.c
+++ b/src/backend/storage/lmgr/spin.c
@@ -28,6 +28,21 @@
 #include "storage/lwlock.h"
 #include "storage/spin.h"
 
+#ifdef USE_ASSERT_CHECKING
+bool any_spinlock_held;
+#endif
+
+PGSemaphore SpinlockSemaArray;
+
+/*
+ * Report the amount of shared memory needed to store semaphores for spinlock
+ * support.
+ */
+Size
+SpinlockSemaSize(void)
+{
+	return SpinlockSemas() * sizeof(PGSemaphoreData);
+}
 
 #ifdef HAVE_SPINLOCKS
 
@@ -43,8 +58,17 @@ SpinlockSemas(void)
 
 /*
  * No TAS, so spinlocks are implemented as PGSemaphores.
+ *
+ * In order to avoid requesting too many semaphores from the operating system,
+ * we multiplex however many spinlocks exist across a smaller number of OS
+ * semaphores.  This works reasonably well as long as the number of concurrent
+ * processes is small enough to make it unlikely that two processes will try
+ * to grab two different spinlocks that map to the same semaphore at the same
+ * time.  If this is not the case in your environment, you can raise this
+ * constant or, better yet, supply a real spinlock implementation for your
+ * platform.
  */
-
+#define	NUM_SPINLOCK_SEMAPHORES		1024
 
 /*
  * Report number of semaphores needed to support spinlocks.
@@ -52,22 +76,20 @@ SpinlockSemas(void)
 int
 SpinlockSemas(void)
 {
-	int			nsemas;
-
-	/*
-	 * It would be cleaner to distribute this logic into the affected modules,
-	 * similar to the way shmem space estimation is handled.
-	 *
-	 * For now, though, there are few enough users of spinlocks that we just
-	 * keep the knowledge here.
-	 */
-	nsemas = NumLWLocks();		/* one for each lwlock */
-	nsemas += NBuffers;			/* one for each buffer header */
-	nsemas += max_wal_senders;	/* one for each wal sender process */
-	nsemas += num_xloginsert_slots; /* one for each WAL insertion slot */
-	nsemas += 30;				/* plus a bunch for other small-scale use */
-
-	return nsemas;
+	return NUM_SPINLOCK_SEMAPHORES;
+}
+
+/*
+ * Initialize semaphores.
+ */
+extern void
+SpinlockSemaInit(PGSemaphore spinsemas)
+{
+	int	i;
+
+	for (i = 0; i < NUM_SPINLOCK_SEMAPHORES; ++i)
+		PGSemaphoreCreate(&spinsemas[i]);
+	SpinlockSemaArray = spinsemas;
 }
 
 /*
@@ -77,13 +99,15 @@ SpinlockSemas(void)
 void
 s_init_lock_sema(volatile slock_t *lock)
 {
-	PGSemaphoreCreate((PGSemaphore) lock);
+	static int counter = 0;
+
+	*lock = (++counter) % NUM_SPINLOCK_SEMAPHORES;
 }
 
 void
 s_unlock_sema(volatile slock_t *lock)
 {
-	PGSemaphoreUnlock((PGSemaphore) lock);
+	PGSemaphoreUnlock(&SpinlockSemaArray[*lock]);
 }
 
 bool
@@ -98,7 +122,7 @@ int
 tas_sema(volatile slock_t *lock)
 {
 	/* Note that TAS macros return 0 if *success* */
-	return !PGSemaphoreTryLock((PGSemaphore) lock);
+	return !PGSemaphoreTryLock(&SpinlockSemaArray[*lock]);
 }
 
 #endif   /* !HAVE_SPINLOCKS */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 7dcd5d9..6adecbb 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -915,7 +915,7 @@ spin_delay(void)
  * to fall foul of kernel limits on number of semaphores, so don't use this
  * unless you must!  The subroutines appear in spin.c.
  */
-typedef PGSemaphoreData slock_t;
+typedef int slock_t;
 
 extern bool s_lock_free_sema(volatile slock_t *lock);
 extern void s_unlock_sema(volatile slock_t *lock);
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index f459b90..6193a8c 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -60,14 +60,35 @@
 
 
 #define SpinLockInit(lock)	S_INIT_LOCK(lock)
+#define SpinLockFree(lock)	S_LOCK_FREE(lock)
 
+/*
+ * Spinlocks should only be acquired in straight-line code that does not
+ * loop, so no code should ever acquire a second spinlock while already
+ * holding one.  This is particularly important when --disable-spinlocks is
+ * in use, because in that case we map all of the spinlocks in the system
+ * onto a smaller number of semaphores allocated from the OS; attempting
+ * to acquire more than one at a time could self-deadlock.
+ */
+#ifdef USE_ASSERT_CHECKING
+extern bool any_spinlock_held;
+#define SpinLockAcquire(lock) \
+	(AssertMacro(!any_spinlock_held), any_spinlock_held = true, \
+		S_LOCK(lock))
+#define SpinLockRelease(lock) \
+	(AssertMacro(any_spinlock_held), any_spinlock_held = false, \
+		S_UNLOCK(lock))
+#else
 #define SpinLockAcquire(lock) S_LOCK(lock)
-
 #define SpinLockRelease(lock) S_UNLOCK(lock)
-
-#define SpinLockFree(lock)	S_LOCK_FREE(lock)
-
+#endif
 
 extern int	SpinlockSemas(void);
+extern Size	SpinlockSemaSize(void);
+
+#ifndef HAVE_SPINLOCKS
+extern void	SpinlockSemaInit(PGSemaphore);
+extern PGSemaphore	SpinlockSemaArray;
+#endif
 
 #endif   /* SPIN_H */