diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ce659ec..d726a98 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2066,23 +2066,42 @@ XLogFlush(XLogRecPtr record)
 	/* initialize to given target; may increase below */
 	WriteRqstPtr = record;
 
-	/* read LogwrtResult and update local state */
+	/*
+	 * Now wait until we get the write lock, or someone else does the
+	 * flush for us.
+	 */
+	for (;;)
 	{
 		/* use volatile pointer to prevent code rearrangement */
 		volatile XLogCtlData *xlogctl = XLogCtl;
 
+		/* read LogwrtResult and update local state */
 		SpinLockAcquire(&xlogctl->info_lck);
 		if (XLByteLT(WriteRqstPtr, xlogctl->LogwrtRqst.Write))
 			WriteRqstPtr = xlogctl->LogwrtRqst.Write;
 		LogwrtResult = xlogctl->LogwrtResult;
 		SpinLockRelease(&xlogctl->info_lck);
-	}
 
-	/* done already? */
-	if (!XLByteLE(record, LogwrtResult.Flush))
-	{
-		/* now wait for the write lock */
-		LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
+		/* done already? */
+		if (XLByteLE(record, LogwrtResult.Flush))
+			break;
+
+		/*
+		 * Try to get the write lock. If we can't get it immediately, wait
+		 * until it's released, but before actually acquiring it, loop back
+		 * to check if the backend holding the lock did the flush for us
+		 * already. This helps to maintain a good rate of group committing
+		 * when the system is bottlenecked by the speed of fsyncing.
+		 */
+		if (!LWLockConditionalAcquire(WALWriteLock, LW_EXCLUSIVE_BUT_WAIT))
+		{
+			/*
+			 * Didn't get the lock straight away, but we might be done
+			 * already
+			 */
+			continue;
+		}
+		/* Got the lock */
 		LogwrtResult = XLogCtl->Write.LogwrtResult;
 		if (!XLByteLE(record, LogwrtResult.Flush))
 		{
@@ -2111,6 +2130,8 @@ XLogFlush(XLogRecPtr record)
 			XLogWrite(WriteRqst, false, false);
 		}
 		LWLockRelease(WALWriteLock);
+		/* done */
+		break;
 	}
 
 	END_CRIT_SECTION();
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index cc41568..488f573 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -430,6 +430,7 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 			elog(PANIC, "cannot wait without a PGPROC structure");
 
 		proc->lwWaiting = true;
+		proc->lwWaitOnly = false;
 		proc->lwExclusive = (mode == LW_EXCLUSIVE);
 		proc->lwWaitLink = NULL;
 		if (lock->head == NULL)
@@ -496,7 +497,9 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 /*
  * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
  *
- * If the lock is not available, return FALSE with no side-effects.
+ * If the lock is not available, return FALSE with no side-effects. In
+ * LW_EXCLUSIVE_BUT_WAIT mode, if the lock is not available, waits until it
+ * is available, but then returns false without acquiring it.
  *
  * If successful, cancel/die interrupts are held off until lock release.
  */
@@ -504,7 +507,9 @@ bool
 LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
 {
 	volatile LWLock *lock = &(LWLockArray[lockid].lock);
+	PGPROC	   *proc = MyProc;
 	bool		mustwait;
+	int			extraWaits = 0;
 
 	PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, lock);
 
@@ -523,7 +528,7 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
 	SpinLockAcquire(&lock->mutex);
 
 	/* If I can get the lock, do so quickly. */
-	if (mode == LW_EXCLUSIVE)
+	if (mode == LW_EXCLUSIVE || mode == LW_EXCLUSIVE_BUT_WAIT)
 	{
 		if (lock->exclusive == 0 && lock->shared == 0)
 		{
@@ -544,8 +549,77 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
 			mustwait = true;
 	}
 
-	/* We are done updating shared state of the lock itself. */
-	SpinLockRelease(&lock->mutex);
+	if (mustwait && mode == LW_EXCLUSIVE_BUT_WAIT)
+	{
+		/*
+		 * Add myself to wait queue.
+		 *
+		 * If we don't have a PGPROC structure, there's no way to wait. This
+		 * should never occur, since MyProc should only be null during shared
+		 * memory initialization.
+		 */
+		if (proc == NULL)
+			elog(PANIC, "cannot wait without a PGPROC structure");
+
+		proc->lwWaiting = true;
+		proc->lwWaitOnly = true;
+		proc->lwExclusive = (mode == LW_EXCLUSIVE);
+		proc->lwWaitLink = NULL;
+		if (lock->head == NULL)
+			lock->head = proc;
+		else
+			lock->tail->lwWaitLink = proc;
+		lock->tail = proc;
+
+		/* Can release the mutex now */
+		SpinLockRelease(&lock->mutex);
+
+		/*
+		 * Wait until awakened.
+		 *
+		 * Since we share the process wait semaphore with the regular lock
+		 * manager and ProcWaitForSignal, and we may need to acquire an LWLock
+		 * while one of those is pending, it is possible that we get awakened
+		 * for a reason other than being signaled by LWLockRelease. If so,
+		 * loop back and wait again.  Once we've gotten the LWLock,
+		 * re-increment the sema by the number of additional signals received,
+		 * so that the lock manager or signal manager will see the received
+		 * signal when it next waits.
+		 */
+		LOG_LWDEBUG("LWLockAcquire", lockid, "waiting");
+
+#ifdef LWLOCK_STATS
+		block_counts[lockid]++;
+#endif
+
+		TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode);
+
+		for (;;)
+		{
+			/* "false" means cannot accept cancel/die interrupt here. */
+			PGSemaphoreLock(&proc->sem, false);
+			if (!proc->lwWaiting)
+				break;
+			extraWaits++;
+		}
+
+		TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode);
+
+		LOG_LWDEBUG("LWLockAcquire", lockid, "awakened");
+	}
+	else
+	{
+		/* We are done updating shared state of the lock itself. */
+		SpinLockRelease(&lock->mutex);
+	}
+
+	TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode);
+
+	/*
+	 * Fix the process wait semaphore's count for any absorbed wakeups.
+	 */
+	while (extraWaits-- > 0)
+		PGSemaphoreUnlock(&proc->sem);
 
 	if (mustwait)
 	{
@@ -619,19 +693,29 @@ LWLockRelease(LWLockId lockid)
 			 * Remove the to-be-awakened PGPROCs from the queue.  If the front
 			 * waiter wants exclusive lock, awaken him only. Otherwise awaken
 			 * as many waiters as want shared access.
+			 *
+			 * Everyone marked with lwWaitOnly are woken up, but they don't
+			 * affect the setting of releaseOK. They're woken up, but they
+			 * might decide to not acquire the lock after all.
 			 */
 			proc = head;
-			if (!proc->lwExclusive)
+			if (!proc->lwExclusive || proc->lwWaitOnly)
 			{
 				while (proc->lwWaitLink != NULL &&
-					   !proc->lwWaitLink->lwExclusive)
+					   (proc->lwWaitLink->lwWaitOnly ||
+						!proc->lwWaitLink->lwExclusive))
+				{
 					proc = proc->lwWaitLink;
+					if (!proc->lwWaitOnly)
+						lock->releaseOK = false;
+				}
 			}
 			/* proc is now the last PGPROC to be released */
 			lock->head = proc->lwWaitLink;
 			proc->lwWaitLink = NULL;
 			/* prevent additional wakeups until retryer gets to run */
-			lock->releaseOK = false;
+			if (!proc->lwWaitOnly)
+				lock->releaseOK = false;
 		}
 		else
 		{
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index df3df29..85c9a5c 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -94,7 +94,8 @@ typedef enum LWLockId
 typedef enum LWLockMode
 {
 	LW_EXCLUSIVE,
-	LW_SHARED
+	LW_SHARED,
+	LW_EXCLUSIVE_BUT_WAIT
 } LWLockMode;
 
 
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 358d1a4..027a793 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -102,6 +102,7 @@ struct PGPROC
 	/* Info about LWLock the process is currently waiting for, if any. */
 	bool		lwWaiting;		/* true if waiting for an LW lock */
 	bool		lwExclusive;	/* true if waiting for exclusive access */
+	bool		lwWaitOnly;		/* true if just want to wake up on release */
 	struct PGPROC *lwWaitLink;	/* next waiter for same LW lock */
 
 	/* Info about lock the process is currently waiting for, if any. */
