Reducing walreceiver latency with a latch

Started by Heikki Linnakangasover 15 years ago8 messages

heikki.linnakangas@enterprisedb.com

over 15 years ago

1 attachment(s)

Now that we have the wonderful latch facility, let's use it to reduce
the delay between receiving a piece of WAL and applying in the standby.
Currently, the startup process polls every 100ms to see if new WAL has
arrived, which adds an average a 50 ms delay between a transaction
commit in the master and it appearing as committed in a hot standby
server. The latch patch eliminated a similar polling delay in walsender
already, the attached patch does the same for walreceiver.

After this patch, there is no unnecessary delays in the streaming
replication code path. Note that this is all still asynchronous, just
with reduced latency.

This is pretty straightforward, but any comments?

--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com

Attachments:

walreceiver-latch-1.patchtext/x-diff; name=walreceiver-latch-1.patchDownload

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ddf7d79..40e1718 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -46,6 +46,7 @@
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
+#include "storage/latch.h"
 #include "storage/pmsignal.h"
 #include "storage/procarray.h"
 #include "storage/smgr.h"
@@ -9139,6 +9140,13 @@ startupproc_quickdie(SIGNAL_ARGS)
 }
 
 
+/* SIGUSR1: let latch facility handle the signal */
+static void
+StartupProcSigUsr1Handler(SIGNAL_ARGS)
+{
+	latch_sigusr1_handler();
+}
+
 /* SIGHUP: set flag to re-read config file at next convenient time */
 static void
 StartupProcSigHupHandler(SIGNAL_ARGS)
@@ -9213,7 +9221,7 @@ StartupProcessMain(void)
 	else
 		pqsignal(SIGALRM, SIG_IGN);
 	pqsignal(SIGPIPE, SIG_IGN);
-	pqsignal(SIGUSR1, SIG_IGN);
+	pqsignal(SIGUSR1, StartupProcSigUsr1Handler);
 	pqsignal(SIGUSR2, SIG_IGN);
 
 	/*
@@ -9397,16 +9405,13 @@ retry:
 					}
 
 					/*
-					 * Data not here yet, so check for trigger then sleep.
+					 * Data not here yet, so check for trigger then sleep for
+					 * five seconds like in the WAL file polling case below.
 					 */
 					if (CheckForStandbyTrigger())
 						goto triggered;
 
-					/*
-					 * When streaming is active, we want to react quickly when
-					 * the next WAL record arrives, so sleep only a bit.
-					 */
-					pg_usleep(100000L); /* 100ms */
+					WaitForWalArrival(5000000L);
 				}
 				else
 				{
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index b868707..e12f1f5 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -529,6 +529,9 @@ XLogWalRcvFlush(void)
 		walrcv->receivedUpto = LogstreamResult.Flush;
 		SpinLockRelease(&walrcv->mutex);
 
+		/* Signal the startup process that new WAL has arrived */
+		SetLatch(&walrcv->receivedLatch);
+
 		/* Report XLOG streaming progress in PS display */
 		if (update_process_title)
 		{
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index b206885..8182160 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -64,6 +64,7 @@ WalRcvShmemInit(void)
 		MemSet(WalRcv, 0, WalRcvShmemSize());
 		WalRcv->walRcvState = WALRCV_STOPPED;
 		SpinLockInit(&WalRcv->mutex);
+		InitSharedLatch(&WalRcv->receivedLatch);
 	}
 }
 
@@ -163,6 +164,9 @@ ShutdownWalRcv(void)
 
 		pg_usleep(100000);		/* 100ms */
 	}
+
+	/* We don't need the latch anymore */
+	DisownLatch(&walrcv->receivedLatch);
 }
 
 /*
@@ -187,6 +191,9 @@ RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo)
 	if (recptr.xrecoff % XLogSegSize != 0)
 		recptr.xrecoff -= recptr.xrecoff % XLogSegSize;
 
+	/*
+	 * Update shared memory status with information needed by walreceiver
+	 */
 	SpinLockAcquire(&walrcv->mutex);
 
 	/* It better be stopped before we try to restart it */
@@ -204,6 +211,10 @@ RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo)
 
 	SpinLockRelease(&walrcv->mutex);
 
+	/* Take ownership of the latch so that we can wait on it */
+	OwnLatch(&walrcv->receivedLatch);
+
+	/* Request postmaster to start the walreceiver process */
 	SendPostmasterSignal(PMSIGNAL_START_WALRECEIVER);
 }
 
@@ -229,3 +240,20 @@ GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart)
 
 	return recptr;
 }
+
+/*
+ * Wait for more WAL to arrive, or timeout (in microseconds) to be reached
+ */
+void
+WaitForWalArrival(int timeout)
+{
+	/* Wait for more WAL to arrive */
+	if (WaitLatch(&WalRcv->receivedLatch, timeout))
+	{
+		/*
+		 * Reset the latch so that next call to WaitForWalArrival will sleep
+		 * again.
+		 */
+		ResetLatch(&WalRcv->receivedLatch);
+	}
+}
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index 2ea881e..66a8229 100644
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -13,6 +13,7 @@
 #define _WALRECEIVER_H
 
 #include "access/xlogdefs.h"
+#include "storage/latch.h"
 #include "storage/spin.h"
 #include "pgtime.h"
 
@@ -72,6 +73,13 @@ typedef struct
 	char		conninfo[MAXCONNINFO];
 
 	slock_t		mutex;			/* locks shared variables shown above */
+
+	/*
+	 * Walreceiver sets this latch every time new WAL has been received and
+	 * fsync'd to disk, allowing startup process to wait for new WAL to
+	 * arrive.
+	 */
+	Latch		receivedLatch;
 } WalRcvData;
 
 extern WalRcvData *WalRcv;
@@ -92,8 +100,8 @@ extern Size WalRcvShmemSize(void);
 extern void WalRcvShmemInit(void);
 extern void ShutdownWalRcv(void);
 extern bool WalRcvInProgress(void);
-extern XLogRecPtr WaitNextXLogAvailable(XLogRecPtr recptr, bool *finished);
 extern void RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo);
 extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart);
+extern void WaitForWalArrival(int timeout);
 
 #endif   /* _WALRECEIVER_H */

Thom Brown

thom@linux.com

over 15 years ago

In reply to: Heikki Linnakangas (#1)

Re: Reducing walreceiver latency with a latch

On 13 September 2010 12:40, Heikki Linnakangas
<heikki.linnakangas@enterprisedb.com> wrote:

Now that we have the wonderful latch facility, let's use it to reduce the
delay between receiving a piece of WAL and applying in the standby.
Currently, the startup process polls every 100ms to see if new WAL has
arrived, which adds an average a 50 ms delay between a transaction commit in
the master and it appearing as committed in a hot standby server. The latch
patch eliminated a similar polling delay in walsender already, the attached
patch does the same for walreceiver.

After this patch, there is no unnecessary delays in the streaming
replication code path. Note that this is all still asynchronous, just with
reduced latency.

This is pretty straightforward, but any comments?

Is that supposed to be waiting 5000ms?

--
Thom Brown
Twitter: @darkixion
IRC (freenode): dark_ixion
Registered Linux user: #516935

Thom Brown

thom@linux.com

over 15 years ago

In reply to: Thom Brown (#2)

Re: Reducing walreceiver latency with a latch

On 13 September 2010 12:47, Thom Brown <thom@linux.com> wrote:

On 13 September 2010 12:40, Heikki Linnakangas
<heikki.linnakangas@enterprisedb.com> wrote:

Now that we have the wonderful latch facility, let's use it to reduce the
delay between receiving a piece of WAL and applying in the standby.
Currently, the startup process polls every 100ms to see if new WAL has
arrived, which adds an average a 50 ms delay between a transaction commit in
the master and it appearing as committed in a hot standby server. The latch
patch eliminated a similar polling delay in walsender already, the attached
patch does the same for walreceiver.

After this patch, there is no unnecessary delays in the streaming
replication code path. Note that this is all still asynchronous, just with
reduced latency.

This is pretty straightforward, but any comments?

Is that supposed to be waiting 5000ms?

Ignore me, I can see that it's right.

--
Thom Brown
Twitter: @darkixion
IRC (freenode): dark_ixion
Registered Linux user: #516935

Heikki Linnakangas

heikki.linnakangas@enterprisedb.com

over 15 years ago

In reply to: Thom Brown (#2)

Re: Reducing walreceiver latency with a latch

On 13/09/10 14:47, Thom Brown wrote:

On 13 September 2010 12:40, Heikki Linnakangas
<heikki.linnakangas@enterprisedb.com> wrote:

Now that we have the wonderful latch facility, let's use it to reduce the
delay between receiving a piece of WAL and applying in the standby.
Currently, the startup process polls every 100ms to see if new WAL has
arrived, which adds an average a 50 ms delay between a transaction commit in
the master and it appearing as committed in a hot standby server. The latch
patch eliminated a similar polling delay in walsender already, the attached
patch does the same for walreceiver.

After this patch, there is no unnecessary delays in the streaming
replication code path. Note that this is all still asynchronous, just with
reduced latency.

This is pretty straightforward, but any comments?

Is that supposed to be waiting 5000ms?

Yes, it gets interrupted as soon as WAL arrives, that timeout is to poll
for the standby trigger file to appear or SIGTERM.

BTW, I noticed that I missed incrementing the latch count in
win32_latch.c, and the owning/disowning the latch was done correctly,
you get an error if you restart the master and reconnect. I'll post an
updated patch shortly.

--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com

Heikki Linnakangas

heikki.linnakangas@enterprisedb.com

over 15 years ago

In reply to: Heikki Linnakangas (#4)

1 attachment(s)

Re: Reducing walreceiver latency with a latch

On 13/09/10 14:54, Heikki Linnakangas wrote:

BTW, I noticed that I missed incrementing the latch count in
win32_latch.c, and the owning/disowning the latch was done correctly,
you get an error if you restart the master and reconnect. I'll post an
updated patch shortly.

Here's an updated patch with those bugs fixed.

--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com

Attachments:

walreceiver-latch-2.patchtext/x-diff; name=walreceiver-latch-2.patchDownload

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ddf7d79..40e1718 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -46,6 +46,7 @@
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
+#include "storage/latch.h"
 #include "storage/pmsignal.h"
 #include "storage/procarray.h"
 #include "storage/smgr.h"
@@ -9139,6 +9140,13 @@ startupproc_quickdie(SIGNAL_ARGS)
 }
 
 
+/* SIGUSR1: let latch facility handle the signal */
+static void
+StartupProcSigUsr1Handler(SIGNAL_ARGS)
+{
+	latch_sigusr1_handler();
+}
+
 /* SIGHUP: set flag to re-read config file at next convenient time */
 static void
 StartupProcSigHupHandler(SIGNAL_ARGS)
@@ -9213,7 +9221,7 @@ StartupProcessMain(void)
 	else
 		pqsignal(SIGALRM, SIG_IGN);
 	pqsignal(SIGPIPE, SIG_IGN);
-	pqsignal(SIGUSR1, SIG_IGN);
+	pqsignal(SIGUSR1, StartupProcSigUsr1Handler);
 	pqsignal(SIGUSR2, SIG_IGN);
 
 	/*
@@ -9397,16 +9405,13 @@ retry:
 					}
 
 					/*
-					 * Data not here yet, so check for trigger then sleep.
+					 * Data not here yet, so check for trigger then sleep for
+					 * five seconds like in the WAL file polling case below.
 					 */
 					if (CheckForStandbyTrigger())
 						goto triggered;
 
-					/*
-					 * When streaming is active, we want to react quickly when
-					 * the next WAL record arrives, so sleep only a bit.
-					 */
-					pg_usleep(100000L); /* 100ms */
+					WaitForWalArrival(5000000L);
 				}
 				else
 				{
diff --git a/src/backend/port/win32_latch.c b/src/backend/port/win32_latch.c
index da06202..e39bf1c 100644
--- a/src/backend/port/win32_latch.c
+++ b/src/backend/port/win32_latch.c
@@ -230,6 +230,8 @@ NumSharedLatches(void)
 
 	/* Each walsender needs one latch */
 	numLatches += max_wal_senders;
+	/* One latch for startup process - walreceiver communication */
+	numLatches += 1;
 
 	return numLatches;
 }
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index b868707..996c54f 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -182,6 +182,12 @@ WalReceiverMain(void)
 	Assert(walrcv->pid == 0);
 	switch (walrcv->walRcvState)
 	{
+		case WALRCV_NOT_INITIALIZED:
+			/* can't happen */
+			SpinLockRelease(&walrcv->mutex);
+			elog(ERROR, "walreceiver state not initialized");
+			break;
+
 		case WALRCV_STOPPING:
 			/* If we've already been requested to stop, don't start up. */
 			walrcv->walRcvState = WALRCV_STOPPED;
@@ -529,6 +535,9 @@ XLogWalRcvFlush(void)
 		walrcv->receivedUpto = LogstreamResult.Flush;
 		SpinLockRelease(&walrcv->mutex);
 
+		/* Signal the startup process that new WAL has arrived */
+		SetLatch(&walrcv->receivedLatch);
+
 		/* Report XLOG streaming progress in PS display */
 		if (update_process_title)
 		{
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index b206885..571fc02 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -62,8 +62,9 @@ WalRcvShmemInit(void)
 	{
 		/* First time through, so initialize */
 		MemSet(WalRcv, 0, WalRcvShmemSize());
-		WalRcv->walRcvState = WALRCV_STOPPED;
+		WalRcv->walRcvState = WALRCV_NOT_INITIALIZED;
 		SpinLockInit(&WalRcv->mutex);
+		InitSharedLatch(&WalRcv->receivedLatch);
 	}
 }
 
@@ -104,7 +105,7 @@ WalRcvInProgress(void)
 		}
 	}
 
-	if (state != WALRCV_STOPPED)
+	if (state != WALRCV_STOPPED && state != WALRCV_NOT_INITIALIZED)
 		return true;
 	else
 		return false;
@@ -128,6 +129,7 @@ ShutdownWalRcv(void)
 	SpinLockAcquire(&walrcv->mutex);
 	switch (walrcv->walRcvState)
 	{
+		case WALRCV_NOT_INITIALIZED:
 		case WALRCV_STOPPED:
 			break;
 		case WALRCV_STARTING:
@@ -177,6 +179,7 @@ RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo)
 	/* use volatile pointer to prevent code rearrangement */
 	volatile WalRcvData *walrcv = WalRcv;
 	pg_time_t	now = (pg_time_t) time(NULL);
+	bool	firsttime;
 
 	/*
 	 * We always start at the beginning of the segment. That prevents a broken
@@ -187,8 +190,20 @@ RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo)
 	if (recptr.xrecoff % XLogSegSize != 0)
 		recptr.xrecoff -= recptr.xrecoff % XLogSegSize;
 
+	/*
+	 * Update shared memory status with information needed by walreceiver
+	 */
 	SpinLockAcquire(&walrcv->mutex);
 
+	/* Check if this is the first time we start walreceiver */
+	if (walrcv->walRcvState == WALRCV_NOT_INITIALIZED)
+	{
+		firsttime = true;
+		walrcv->walRcvState = WALRCV_STOPPED;
+	}
+	else
+		firsttime = false;
+
 	/* It better be stopped before we try to restart it */
 	Assert(walrcv->walRcvState == WALRCV_STOPPED);
 
@@ -204,6 +219,14 @@ RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo)
 
 	SpinLockRelease(&walrcv->mutex);
 
+	/*
+	 * Take ownership of the latch the first time we start walreceiver, so
+	 * that we can wait for WAL arrival
+	 */
+	if (firsttime)
+		OwnLatch(&walrcv->receivedLatch);
+
+	/* Request postmaster to start the walreceiver process */
 	SendPostmasterSignal(PMSIGNAL_START_WALRECEIVER);
 }
 
@@ -229,3 +252,20 @@ GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart)
 
 	return recptr;
 }
+
+/*
+ * Wait for more WAL to arrive, or timeout (in microseconds) to be reached
+ */
+void
+WaitForWalArrival(int timeout)
+{
+	/* Wait for more WAL to arrive */
+	if (WaitLatch(&WalRcv->receivedLatch, timeout))
+	{
+		/*
+		 * Reset the latch so that next call to WaitForWalArrival will sleep
+		 * again.
+		 */
+		ResetLatch(&WalRcv->receivedLatch);
+	}
+}
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index 2ea881e..081ea90 100644
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -13,6 +13,7 @@
 #define _WALRECEIVER_H
 
 #include "access/xlogdefs.h"
+#include "storage/latch.h"
 #include "storage/spin.h"
 #include "pgtime.h"
 
@@ -30,6 +31,7 @@ extern bool am_walreceiver;
  */
 typedef enum
 {
+	WALRCV_NOT_INITIALIZED,		/* never started */
 	WALRCV_STOPPED,				/* stopped and mustn't start up again */
 	WALRCV_STARTING,			/* launched, but the process hasn't
 								 * initialized yet */
@@ -72,6 +74,13 @@ typedef struct
 	char		conninfo[MAXCONNINFO];
 
 	slock_t		mutex;			/* locks shared variables shown above */
+
+	/*
+	 * Walreceiver sets this latch every time new WAL has been received and
+	 * fsync'd to disk, allowing startup process to wait for new WAL to
+	 * arrive.
+	 */
+	Latch		receivedLatch;
 } WalRcvData;
 
 extern WalRcvData *WalRcv;
@@ -92,8 +101,8 @@ extern Size WalRcvShmemSize(void);
 extern void WalRcvShmemInit(void);
 extern void ShutdownWalRcv(void);
 extern bool WalRcvInProgress(void);
-extern XLogRecPtr WaitNextXLogAvailable(XLogRecPtr recptr, bool *finished);
 extern void RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo);
 extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart);
+extern void WaitForWalArrival(int timeout);
 
 #endif   /* _WALRECEIVER_H */

Fujii Masao

masao.fujii@gmail.com

over 15 years ago

In reply to: Heikki Linnakangas (#5)

Re: Reducing walreceiver latency with a latch

On Mon, Sep 13, 2010 at 9:13 PM, Heikki Linnakangas
<heikki.linnakangas@enterprisedb.com> wrote:

Here's an updated patch with those bugs fixed.

Great!

+	/*
+	 * Walreceiver sets this latch every time new WAL has been received and
+	 * fsync'd to disk, allowing startup process to wait for new WAL to
+	 * arrive.
+	 */
+	Latch		receivedLatch;

I think that this latch should be available for other than walreceiver -
startup process communication. For example, backend - startup process
communication, which can be used for requesting a failover via SQL function
by users in the future. What about putting the latch in XLogCtl instead of
WalRcv and calling OwnLatch at the beginning of the startup process instead
of RequestXLogStreaming?

Regards,

--
Fujii Masao
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center

Heikki Linnakangas

heikki.linnakangas@enterprisedb.com

over 15 years ago

In reply to: Fujii Masao (#6)

1 attachment(s)

Re: Reducing walreceiver latency with a latch

On 14/09/10 05:02, Fujii Masao wrote:

+	/*
+	 * Walreceiver sets this latch every time new WAL has been received and
+	 * fsync'd to disk, allowing startup process to wait for new WAL to
+	 * arrive.
+	 */
+	Latch		receivedLatch;
I think that this latch should be available for other than walreceiver -
startup process communication. For example, backend - startup process
communication, which can be used for requesting a failover via SQL function
by users in the future. What about putting the latch in XLogCtl instead of
WalRcv and calling OwnLatch at the beginning of the startup process instead
of RequestXLogStreaming?

Yes, good point. I updated the patch along those lines, attached.

--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com

Attachments:

walreceiver-latch-3.patchtext/x-diff; name=walreceiver-latch-3.patchDownload

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ddf7d79..b94aa24 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -46,6 +46,7 @@
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
+#include "storage/latch.h"
 #include "storage/pmsignal.h"
 #include "storage/procarray.h"
 #include "storage/smgr.h"
@@ -393,6 +394,13 @@ typedef struct XLogCtlData
 	bool		SharedRecoveryInProgress;
 
 	/*
+	 * recoveryWakeupLatch is used to wake up the startup process to
+	 * continue WAL replay, if it is waiting for WAL to arrive or failover
+	 * trigger file to appear.
+	 */
+	Latch		recoveryWakeupLatch;
+
+	/*
 	 * During recovery, we keep a copy of the latest checkpoint record here.
 	 * Used by the background writer when it wants to create a restartpoint.
 	 *
@@ -4840,6 +4848,7 @@ XLOGShmemInit(void)
 	XLogCtl->SharedRecoveryInProgress = true;
 	XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
 	SpinLockInit(&XLogCtl->info_lck);
+	InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
 
 	/*
 	 * If we are not in bootstrap mode, pg_control should already exist. Read
@@ -5814,6 +5823,13 @@ StartupXLOG(void)
 					(errmsg("starting archive recovery")));
 	}
 
+	/*
+	 * Take ownership of the wakup latch if we're going to sleep during
+	 * recovery.
+	 */
+	if (StandbyMode)
+		OwnLatch(&XLogCtl->recoveryWakeupLatch);
+
 	if (read_backup_label(&checkPointLoc))
 	{
 		/*
@@ -9139,6 +9155,13 @@ startupproc_quickdie(SIGNAL_ARGS)
 }
 
 
+/* SIGUSR1: let latch facility handle the signal */
+static void
+StartupProcSigUsr1Handler(SIGNAL_ARGS)
+{
+	latch_sigusr1_handler();
+}
+
 /* SIGHUP: set flag to re-read config file at next convenient time */
 static void
 StartupProcSigHupHandler(SIGNAL_ARGS)
@@ -9213,7 +9236,7 @@ StartupProcessMain(void)
 	else
 		pqsignal(SIGALRM, SIG_IGN);
 	pqsignal(SIGPIPE, SIG_IGN);
-	pqsignal(SIGUSR1, SIG_IGN);
+	pqsignal(SIGUSR1, StartupProcSigUsr1Handler);
 	pqsignal(SIGUSR2, SIG_IGN);
 
 	/*
@@ -9397,16 +9420,17 @@ retry:
 					}
 
 					/*
-					 * Data not here yet, so check for trigger then sleep.
+					 * Data not here yet, so check for trigger then sleep for
+					 * five seconds like in the WAL file polling case below.
 					 */
 					if (CheckForStandbyTrigger())
 						goto triggered;
 
 					/*
-					 * When streaming is active, we want to react quickly when
-					 * the next WAL record arrives, so sleep only a bit.
+					 * Wait for more WAL to arrive, or timeout to be reached
 					 */
-					pg_usleep(100000L); /* 100ms */
+					WaitLatch(&XLogCtl->recoveryWakeupLatch, 5000000L);
+					ResetLatch(&XLogCtl->recoveryWakeupLatch);
 				}
 				else
 				{
@@ -9681,3 +9705,13 @@ CheckForStandbyTrigger(void)
 	}
 	return false;
 }
+
+/*
+ * Wake up startup process to replay newly arrived WAL, or to notice that
+ * failover has been requested.
+ */
+void
+WakeupRecovery(void)
+{
+	SetLatch(&XLogCtl->recoveryWakeupLatch);
+}
diff --git a/src/backend/port/win32_latch.c b/src/backend/port/win32_latch.c
index da06202..e39bf1c 100644
--- a/src/backend/port/win32_latch.c
+++ b/src/backend/port/win32_latch.c
@@ -230,6 +230,8 @@ NumSharedLatches(void)
 
 	/* Each walsender needs one latch */
 	numLatches += max_wal_senders;
+	/* One latch for startup process - walreceiver communication */
+	numLatches += 1;
 
 	return numLatches;
 }
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index b868707..a20119b 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -529,6 +529,9 @@ XLogWalRcvFlush(void)
 		walrcv->receivedUpto = LogstreamResult.Flush;
 		SpinLockRelease(&walrcv->mutex);
 
+		/* Signal the startup process that new WAL has arrived */
+		WakeupRecovery();
+
 		/* Report XLOG streaming progress in PS display */
 		if (update_process_title)
 		{
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index b206885..308d54e 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -187,6 +187,9 @@ RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo)
 	if (recptr.xrecoff % XLogSegSize != 0)
 		recptr.xrecoff -= recptr.xrecoff % XLogSegSize;
 
+	/*
+	 * Update shared memory status with information needed by walreceiver
+	 */
 	SpinLockAcquire(&walrcv->mutex);
 
 	/* It better be stopped before we try to restart it */
@@ -204,6 +207,7 @@ RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo)
 
 	SpinLockRelease(&walrcv->mutex);
 
+	/* Request postmaster to start the walreceiver process */
 	SendPostmasterSignal(PMSIGNAL_START_WALRECEIVER);
 }
 
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 7d667d5..bad36f3 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -303,5 +303,6 @@ extern TimeLineID GetRecoveryTargetTLI(void);
 
 extern void HandleStartupProcInterrupts(void);
 extern void StartupProcessMain(void);
+extern void WakeupRecovery(void);
 
 #endif   /* XLOG_H */

Fujii Masao

masao.fujii@gmail.com

over 15 years ago

In reply to: Heikki Linnakangas (#7)

Re: Reducing walreceiver latency with a latch

On Tue, Sep 14, 2010 at 5:51 PM, Heikki Linnakangas
<heikki.linnakangas@enterprisedb.com> wrote:

On 14/09/10 05:02, Fujii Masao wrote:
+       /*
+        * Walreceiver sets this latch every time new WAL has been
received and
+        * fsync'd to disk, allowing startup process to wait for new WAL
to
+        * arrive.
+        */
+       Latch           receivedLatch;
I think that this latch should be available for other than walreceiver -
startup process communication. For example, backend - startup process
communication, which can be used for requesting a failover via SQL
function
by users in the future. What about putting the latch in XLogCtl instead of
WalRcv and calling OwnLatch at the beginning of the startup process
instead
of RequestXLogStreaming?
Yes, good point. I updated the patch along those lines, attached.

Looks good.

+	/*
+	 * Take ownership of the wakup latch if we're going to sleep during
+	 * recovery.
+	 */
+	if (StandbyMode)
+		OwnLatch(&XLogCtl->recoveryWakeupLatch);

Since automatic restart after backend crash always performs a normal crash
recovery, the startup process will never call OwnLatch more than once. So
there might be no harm even if the startup process doesn't disown the shared
latch. But... what about calling DisownLatch at the end of recovery just in
case?

Regards,

--
Fujii Masao
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center