From 0b40b322652f5f7f538381a4f299deb94f465c94 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathandbossart@gmail.com>
Date: Thu, 15 Dec 2022 14:11:43 -0800
Subject: [PATCH v9 2/3] handle race condition when restarting wal receivers

---
 src/backend/postmaster/postmaster.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index f459dab360..4107a85b51 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1601,9 +1601,9 @@ checkControlFile(void)
  *
  * In normal conditions we wait at most one minute, to ensure that the other
  * background tasks handled by ServerLoop get done even when no requests are
- * arriving.  However, if there are background workers waiting to be started,
- * we don't actually sleep so that they are quickly serviced.  Other exception
- * cases are as shown in the code.
+ * arriving.  However, if there are background workers or a WAL receiver
+ * waiting to be started, we make sure they are quickly serviced.  Other
+ * exception cases are as shown in the code.
  */
 static void
 DetermineSleepTime(struct timeval *timeout)
@@ -1611,11 +1611,12 @@ DetermineSleepTime(struct timeval *timeout)
 	TimestampTz next_wakeup = 0;
 
 	/*
-	 * Normal case: either there are no background workers at all, or we're in
-	 * a shutdown sequence (during which we ignore bgworkers altogether).
+	 * Normal case: either there are no background workers and no WAL receiver
+	 * at all, or we're in a shutdown sequence (during which we ignore
+	 * bgworkers altogether).
 	 */
 	if (Shutdown > NoShutdown ||
-		(!StartWorkerNeeded && !HaveCrashedWorker))
+		(!StartWorkerNeeded && !HaveCrashedWorker && !WalReceiverRequested))
 	{
 		if (AbortStartTime != 0)
 		{
@@ -1640,6 +1641,17 @@ DetermineSleepTime(struct timeval *timeout)
 		return;
 	}
 
+	/*
+	 * We're probably waiting for SIGCHLD before starting the WAL receiver.  We
+	 * don't expect that to take long.
+	 */
+	if (WalReceiverRequested)
+	{
+		timeout->tv_sec = 0;
+		timeout->tv_usec = 100000;	/* 100ms */
+		return;
+	}
+
 	if (HaveCrashedWorker)
 	{
 		slist_mutable_iter siter;
-- 
2.25.1

