>From 162f66f7fccc335d8caad7bb15be1c2030ec838e Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Thu, 14 Jan 2016 15:15:17 +0100
Subject: [PATCH 3/4] Only clear unix_latch.c's self-pipe if it actually
 contains data.

This avoids a good number of, individually quite fast, system calls in
scenarios with many quick queries. Besides the aesthetic benefit of
seing fewer superflous system calls with strace, it also improves
performance by ~2% measured by pgbench -M prepared -c 96 -j 8 -S (scale
100).
---
 src/backend/port/unix_latch.c | 77 ++++++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 26 deletions(-)

diff --git a/src/backend/port/unix_latch.c b/src/backend/port/unix_latch.c
index ad621ea..03bca68 100644
--- a/src/backend/port/unix_latch.c
+++ b/src/backend/port/unix_latch.c
@@ -283,27 +283,27 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 	do
 	{
 		/*
-		 * Clear the pipe, then check if the latch is set already. If someone
-		 * sets the latch between this and the poll()/select() below, the
-		 * setter will write a byte to the pipe (or signal us and the signal
-		 * handler will do that), and the poll()/select() will return
-		 * immediately.
+		 * Check if the latch is set already. If so, leave loop immediately,
+		 * avoid blocking again. We don't attempt to report any other events
+		 * that might also be satisfied.
+		 *
+		 * If someone sets the latch between this and the poll()/select()
+		 * below, the setter will write a byte to the pipe (or signal us and
+		 * the signal handler will do that), and the poll()/select() will
+		 * return immediately.
+		 *
+		 * If there's a pending byte in the self pipe, we'll notice whenever
+		 * blocking. Only clearing the pipe in that case avoids having to
+		 * drain it everytime WaitLatchOrSocket() is used.
 		 *
 		 * Note: we assume that the kernel calls involved in drainSelfPipe()
 		 * and SetLatch() will provide adequate synchronization on machines
 		 * with weak memory ordering, so that we cannot miss seeing is_set if
 		 * the signal byte is already in the pipe when we drain it.
 		 */
-		drainSelfPipe();
-
 		if ((wakeEvents & WL_LATCH_SET) && latch->is_set)
 		{
 			result |= WL_LATCH_SET;
-
-			/*
-			 * Leave loop immediately, avoid blocking again. We don't attempt
-			 * to report any other events that might also be satisfied.
-			 */
 			break;
 		}
 
@@ -313,24 +313,26 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		 */
 #if defined(LATCH_USE_POLL)
 		nfds = 0;
+
+		/* selfpipe is always in pfds[0] */
+		pfds[0].fd = selfpipe_readfd;
+		pfds[0].events = POLLIN;
+		pfds[0].revents = 0;
+		nfds++;
+
 		if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
 		{
-			/* socket, if used, is always in pfds[0] */
-			pfds[0].fd = sock;
-			pfds[0].events = 0;
+			/* socket, if used, is always in pfds[1] */
+			pfds[1].fd = sock;
+			pfds[1].events = 0;
 			if (wakeEvents & WL_SOCKET_READABLE)
-				pfds[0].events |= POLLIN;
+				pfds[1].events |= POLLIN;
 			if (wakeEvents & WL_SOCKET_WRITEABLE)
-				pfds[0].events |= POLLOUT;
-			pfds[0].revents = 0;
+				pfds[1].events |= POLLOUT;
+			pfds[1].revents = 0;
 			nfds++;
 		}
 
-		pfds[nfds].fd = selfpipe_readfd;
-		pfds[nfds].events = POLLIN;
-		pfds[nfds].revents = 0;
-		nfds++;
-
 		if (wakeEvents & WL_POSTMASTER_DEATH)
 		{
 			/* postmaster fd, if used, is always in pfds[nfds - 1] */
@@ -364,19 +366,26 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		else
 		{
 			/* at least one event occurred, so check revents values */
+
+			if (pfds[0].revents & POLLIN)
+			{
+				/* There's data in the self-pipe, clear it. */
+				drainSelfPipe();
+			}
+
 			if ((wakeEvents & WL_SOCKET_READABLE) &&
-				(pfds[0].revents & POLLIN))
+				(pfds[1].revents & POLLIN))
 			{
 				/* data available in socket, or EOF/error condition */
 				result |= WL_SOCKET_READABLE;
 			}
 			if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
-				(pfds[0].revents & POLLOUT))
+				(pfds[1].revents & POLLOUT))
 			{
 				/* socket is writable */
 				result |= WL_SOCKET_WRITEABLE;
 			}
-			if (pfds[0].revents & (POLLHUP | POLLERR | POLLNVAL))
+			if (pfds[1].revents & (POLLHUP | POLLERR | POLLNVAL))
 			{
 				/* EOF/error condition */
 				if (wakeEvents & WL_SOCKET_READABLE)
@@ -468,6 +477,11 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		else
 		{
 			/* at least one event occurred, so check masks */
+			if (FD_ISSET(selfpipe_readfd, &input_mask))
+			{
+				/* There's data in the self-pipe, clear it. */
+				drainSelfPipe();
+			}
 			if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
 			{
 				/* data available in socket, or EOF */
@@ -498,6 +512,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		}
 #endif   /* LATCH_USE_SELECT */
 
+		/*
+		 * Check again wether latch is set, the arrival of a signal/self-byte
+		 * might be what stopped our sleep. It's not required for correctness
+		 * to signal the latch as being set (we'd just loop if there's no
+		 * other event), but it seems good to report an arrived latch asap.
+		 */
+		if ((wakeEvents & WL_LATCH_SET) && latch->is_set)
+		{
+			result |= WL_LATCH_SET;
+		}
+
 		/* If we're not done, update cur_timeout for next iteration */
 		if (result == 0 && (wakeEvents & WL_TIMEOUT))
 		{
-- 
2.5.0.400.gff86faf.dirty

