From 733fd375578d8216fed9a81d4a1a575b1f542ca9 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Sat, 1 Apr 2023 12:36:12 +1300
Subject: [PATCH] Teach WaitEventSetWait to report multiple events on Windows.

The WaitEventSet implementation on Windows has always reported only one
event at a time, and always the "lowest" in its event array.  Since
commit 7389aad6 started using WaitEventSet to handle incoming socket
connections, this unfairness might potentially upset someone who wants
to handle incoming connection on multiple sockets.  If one of them has a
non-empty listen queue due to incoming connections, the other might
never be serviced.  The previously coding based on select() was fair in
that way.

Fix, by teaching WaitEventSetWait() to poll for extra events.  No change
in behavior in the common case of callers with nevents=1, but for the
postmaster's main look, we'll drain all the events that can fit in the
output buffer, which is deliberately set large enough to handle the
maximum possible number of sockets.  This brings the Windows behavior in
line with Unix.
---
 src/backend/storage/ipc/latch.c | 34 +++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c
index f4123e7de7..cc7b572008 100644
--- a/src/backend/storage/ipc/latch.c
+++ b/src/backend/storage/ipc/latch.c
@@ -2025,6 +2025,8 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
 	 */
 	cur_event = (WaitEvent *) &set->events[rc - WAIT_OBJECT_0 - 1];
 
+loop:
+
 	occurred_events->pos = cur_event->pos;
 	occurred_events->user_data = cur_event->user_data;
 	occurred_events->events = 0;
@@ -2044,6 +2046,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
 			occurred_events->events = WL_LATCH_SET;
 			occurred_events++;
 			returned_events++;
+			nevents--;
 		}
 	}
 	else if (cur_event->events == WL_POSTMASTER_DEATH)
@@ -2063,6 +2066,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
 			occurred_events->events = WL_POSTMASTER_DEATH;
 			occurred_events++;
 			returned_events++;
+			nevents--;
 		}
 	}
 	else if (cur_event->events & WL_SOCKET_MASK)
@@ -2124,6 +2128,36 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
 		{
 			occurred_events++;
 			returned_events++;
+			nevents--;
+		}
+	}
+
+	/*
+	 * Do we have space to report more events that might also be signaled later
+	 * in the array than cur_event?  Being able to return multiple socket
+	 * events at a time like the Unix implementations might be important for
+	 * client code that wants to be able to service busy sockets fairly.
+	 */
+	if (nevents > 0)
+	{
+		int			next_pos = cur_event->pos + 1;
+		int			count = set->nevents - next_pos;
+
+		if (count > 0)
+		{
+			/*
+			 * Poll with zero timeout, and ignore errors now because we
+			 * already have events to report.
+			 */
+			rc = WaitForMultipleObjects(count,
+										set->handles + next_pos + 1,
+										false,
+										0);
+			if (rc >= WAIT_OBJECT_0 && rc < WAIT_OBJECT_0 + count)
+			{
+				cur_event = &set->events[next_pos + rc];
+				goto loop;
+			}
 		}
 	}
 
-- 
2.40.0

