From 50a3e62b16b752837f5a388e3259fee2bca6c2ab Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Sat, 24 Jun 2017 16:46:11 -0700
Subject: [PATCH 1/3] Don't signal latch owners if owner not waiting on latch.

Doing so can generate a lot of useless syscalls and context switches
in cases where the owner of the latch is busy doing actual work.

Discussion: https://postgr.es/m/CAMkU=1z6_K4mE12vNirgV5qnu58+cZQdNxD+Pb5bzxnvOVVWoA@mail.gmail.com
---
 src/backend/storage/ipc/latch.c | 62 ++++++++++++++++++++++++++++++++++-------
 src/include/storage/latch.h     |  1 +
 2 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c
index 07b1364de8..c1a1a76d0a 100644
--- a/src/backend/storage/ipc/latch.c
+++ b/src/backend/storage/ipc/latch.c
@@ -221,6 +221,7 @@ InitLatch(volatile Latch *latch)
 {
 	latch->is_set = false;
 	latch->owner_pid = MyProcPid;
+	latch->owner_waiting = false;
 	latch->is_shared = false;
 
 #ifndef WIN32
@@ -268,6 +269,7 @@ InitSharedLatch(volatile Latch *latch)
 
 	latch->is_set = false;
 	latch->owner_pid = 0;
+	latch->owner_waiting = false;
 	latch->is_shared = true;
 }
 
@@ -311,6 +313,7 @@ DisownLatch(volatile Latch *latch)
 	Assert(latch->owner_pid == MyProcPid);
 
 	latch->owner_pid = 0;
+	latch->owner_waiting = false; /* paranoia */
 }
 
 /*
@@ -466,7 +469,17 @@ SetLatch(volatile Latch *latch)
 			sendSelfPipeByte();
 	}
 	else
-		kill(owner_pid, SIGUSR1);
+	{
+		/*
+		 * If the owner of the latch is currently waiting on it, send signal
+		 * to wake up that process.  The read-barrier is necessary so we see
+		 * an up-to-date value, and it pairs with the memory barrier in the
+		 * path setting owner_waiting in WaitEventSetWait.
+		 */
+		pg_read_barrier();
+		if (latch->owner_waiting)
+			kill(owner_pid, SIGUSR1);
+	}
 #else
 
 	/*
@@ -954,6 +967,12 @@ WaitEventSetWait(WaitEventSet *set, long timeout,
 		 * immediately, avoid blocking again. We don't attempt to report any
 		 * other events that might also be satisfied.
 		 *
+		 * To avoid other processes signalling us if we're not waiting,
+		 * wasting context switches, we first check whether the latch is
+		 * already set, and only enable signalling from other processes after
+		 * that.  To avoid a race, we've to recheck whether the latch is set
+		 * after asking to be woken up.
+		 *
 		 * If someone sets the latch between this and the
 		 * WaitEventSetWaitBlock() below, the setter will write a byte to the
 		 * pipe (or signal us and the signal handler will do that), and the
@@ -976,17 +995,37 @@ WaitEventSetWait(WaitEventSet *set, long timeout,
 		 * ordering, so that we cannot miss seeing is_set if a notification
 		 * has already been queued.
 		 */
-		if (set->latch && set->latch->is_set)
+		if (set->latch)
 		{
-			occurred_events->fd = PGINVALID_SOCKET;
-			occurred_events->pos = set->latch_pos;
-			occurred_events->user_data =
-				set->events[set->latch_pos].user_data;
-			occurred_events->events = WL_LATCH_SET;
-			occurred_events++;
-			returned_events++;
+			bool latch_set = false;
+
+			if (set->latch->is_set)
+			{
+				latch_set = true;
+			}
+			else
+			{
+				set->latch->owner_waiting = true;
+				pg_memory_barrier();
+				if (set->latch->is_set)
+				{
+					latch_set = true;
+				}
+			}
+
+			if (latch_set)
+			{
+				occurred_events->fd = PGINVALID_SOCKET;
+				occurred_events->pos = set->latch_pos;
+				occurred_events->user_data =
+					set->events[set->latch_pos].user_data;
+				occurred_events->events = WL_LATCH_SET;
+				occurred_events++;
+				returned_events++;
+
+				break;
+			}
 
-			break;
 		}
 
 		/*
@@ -1016,6 +1055,9 @@ WaitEventSetWait(WaitEventSet *set, long timeout,
 	waiting = false;
 #endif
 
+	if (set->latch)
+		set->latch->owner_waiting = false;
+
 	pgstat_report_wait_end();
 
 	return returned_events;
diff --git a/src/include/storage/latch.h b/src/include/storage/latch.h
index 73abfafec5..442344d914 100644
--- a/src/include/storage/latch.h
+++ b/src/include/storage/latch.h
@@ -112,6 +112,7 @@ typedef struct Latch
 	sig_atomic_t is_set;
 	bool		is_shared;
 	int			owner_pid;
+	int			owner_waiting; /* int, so all platforms can set atomically */
 #ifdef WIN32
 	HANDLE		event;
 #endif
-- 
2.13.1.392.g8d1b10321b.dirty

