allow online change primary_conninfo

Started by Sergei Kornilovabout 7 years ago77 messages
1 attachment(s)

Hello all
Now we integrate recovery.conf into GUC system. So i would like to start discussion to make primary_conninfo and primary_slot_name PGC_SIGHUP.
My work-in-progress patch attached.

I think we have no futher reason to have a copy of conninfo and slot name in WalRcvData, so i propose remove these fields from pg_stat_get_wal_receiver() (and pg_stat_wal_receiver view). This data can be accesible now via regular GUC commands.

Thank you for advance!

regards, Sergei

Attachments:

reload_walreceiver_conninfo_v001.patchtext/x-diff; name=reload_walreceiver_conninfo_v001.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index db1a2d4..faa8e17 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3797,7 +3797,6 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
           This setting has no effect if the server is not in standby mode.
          </para>
         </listitem>
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 7aada14..69ccc15 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -2039,11 +2039,6 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
      <entry>Time of last write-ahead log location reported to origin WAL sender</entry>
     </row>
     <row>
-     <entry><structfield>slot_name</structfield></entry>
-     <entry><type>text</type></entry>
-     <entry>Replication slot name used by this WAL receiver</entry>
-    </row>
-    <row>
      <entry><structfield>sender_host</structfield></entry>
      <entry><type>text</type></entry>
      <entry>
@@ -2062,14 +2057,6 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
       this WAL receiver is connected to.
      </entry>
     </row>
-    <row>
-     <entry><structfield>conninfo</structfield></entry>
-     <entry><type>text</type></entry>
-     <entry>
-      Connection string used by this WAL receiver,
-      with security-sensitive fields obfuscated.
-     </entry>
-    </row>
    </tbody>
    </tgroup>
   </table>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index c80b14e..9d66455 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -11848,8 +11848,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 									 tli, curFileTLI);
 						}
 						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
+						RequestXLogStreaming(tli, ptr);
 						receivedUpto = 0;
 					}
 
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 715995d..d5159ce 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -751,10 +751,8 @@ CREATE VIEW pg_stat_wal_receiver AS
             s.last_msg_receipt_time,
             s.latest_end_lsn,
             s.latest_end_time,
-            s.slot_name,
             s.sender_host,
-            s.sender_port,
-            s.conninfo
+            s.sender_port
     FROM pg_stat_get_wal_receiver() s
     WHERE s.pid IS NOT NULL;
 
diff --git a/src/backend/replication/README b/src/backend/replication/README
index 0cbb990..b70c5b9 100644
--- a/src/backend/replication/README
+++ b/src/backend/replication/README
@@ -49,8 +49,7 @@ to fetch more WAL (if streaming replication is configured).
 
 Walreceiver is a postmaster subprocess, so the startup process can't fork it
 directly. Instead, it sends a signal to postmaster, asking postmaster to launch
-it. Before that, however, startup process fills in WalRcvData->conninfo
-and WalRcvData->slotname, and initializes the starting point in
+it. Before that, however, startup process initializes the starting point in
 WalRcvData->receiveStart.
 
 As walreceiver receives WAL from the master server, and writes and flushes
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 9643c2e..49cc059 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -188,9 +188,6 @@ DisableWalRcvImmediateExit(void)
 void
 WalReceiverMain(void)
 {
-	char		conninfo[MAXCONNINFO];
-	char	   *tmp_conninfo;
-	char		slotname[NAMEDATALEN];
 	XLogRecPtr	startpoint;
 	TimeLineID	startpointTLI;
 	TimeLineID	primaryTLI;
@@ -249,9 +246,6 @@ WalReceiverMain(void)
 	walrcv->walRcvState = WALRCV_STREAMING;
 
 	/* Fetch information required to start streaming */
-	walrcv->ready_to_display = false;
-	strlcpy(conninfo, (char *) walrcv->conninfo, MAXCONNINFO);
-	strlcpy(slotname, (char *) walrcv->slotname, NAMEDATALEN);
 	startpoint = walrcv->receiveStart;
 	startpointTLI = walrcv->receiveStartTLI;
 
@@ -293,35 +287,26 @@ WalReceiverMain(void)
 
 	/* Establish the connection to the primary for XLOG streaming */
 	EnableWalRcvImmediateExit();
-	wrconn = walrcv_connect(conninfo, false, "walreceiver", &err);
+	wrconn = walrcv_connect(PrimaryConnInfo, false, "walreceiver", &err);
 	if (!wrconn)
 		ereport(ERROR,
 				(errmsg("could not connect to the primary server: %s", err)));
 	DisableWalRcvImmediateExit();
 
 	/*
-	 * Save user-visible connection string.  This clobbers the original
-	 * conninfo, for security. Also save host and port of the sender server
+	 * Save host and port of the sender server
 	 * this walreceiver is connected to.
 	 */
-	tmp_conninfo = walrcv_get_conninfo(wrconn);
 	walrcv_get_senderinfo(wrconn, &sender_host, &sender_port);
 	SpinLockAcquire(&walrcv->mutex);
-	memset(walrcv->conninfo, 0, MAXCONNINFO);
-	if (tmp_conninfo)
-		strlcpy((char *) walrcv->conninfo, tmp_conninfo, MAXCONNINFO);
 
 	memset(walrcv->sender_host, 0, NI_MAXHOST);
 	if (sender_host)
 		strlcpy((char *) walrcv->sender_host, sender_host, NI_MAXHOST);
 
 	walrcv->sender_port = sender_port;
-	walrcv->ready_to_display = true;
 	SpinLockRelease(&walrcv->mutex);
 
-	if (tmp_conninfo)
-		pfree(tmp_conninfo);
-
 	if (sender_host)
 		pfree(sender_host);
 
@@ -387,7 +372,7 @@ WalReceiverMain(void)
 		 */
 		options.logical = false;
 		options.startpoint = startpoint;
-		options.slotname = slotname[0] != '\0' ? slotname : NULL;
+		options.slotname = PrimarySlotName[0] != '\0' ? PrimarySlotName : NULL;
 		options.proto.physical.startpointTLI = startpointTLI;
 		ThisTimeLineID = startpointTLI;
 		if (walrcv_startstreaming(wrconn, &options))
@@ -435,9 +420,33 @@ WalReceiverMain(void)
 
 				if (got_SIGHUP)
 				{
+					char	*conninfo = pstrdup(PrimaryConnInfo);
+					char	*slotname = pstrdup(PrimarySlotName);
+
 					got_SIGHUP = false;
 					ProcessConfigFile(PGC_SIGHUP);
 					XLogWalRcvSendHSFeedback(true);
+
+					/*
+					 * If primary_conninfo has been changed while walreceiver is running,
+					 * shut down walreceiver so that a new walreceiver is started and
+					 * initiates replication with the new connection information.
+					 */
+					if (strcmp(conninfo, PrimaryConnInfo) != 0)
+						ereport(FATAL,
+								(errcode(ERRCODE_ADMIN_SHUTDOWN),
+								 errmsg("closing replication connection because primary_conninfo was changed")));
+
+					/*
+					 * And the same for primary_slot_name.
+					 */
+					if (strcmp(slotname, PrimarySlotName) != 0)
+						ereport(FATAL,
+								(errcode(ERRCODE_ADMIN_SHUTDOWN),
+								 errmsg("closing replication connection because primary_slot_name was changed")));
+
+					pfree(conninfo);
+					pfree(slotname);
 				}
 
 				/* See if we can read data immediately */
@@ -672,7 +681,6 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 			   walrcv->walRcvState == WALRCV_STOPPING);
 		if (walrcv->walRcvState == WALRCV_RESTARTING)
 		{
-			/* we don't expect primary_conninfo to change */
 			*startpoint = walrcv->receiveStart;
 			*startpointTLI = walrcv->receiveStartTLI;
 			walrcv->walRcvState = WALRCV_STREAMING;
@@ -776,7 +784,6 @@ WalRcvDie(int code, Datum arg)
 	Assert(walrcv->pid == MyProcPid);
 	walrcv->walRcvState = WALRCV_STOPPED;
 	walrcv->pid = 0;
-	walrcv->ready_to_display = false;
 	walrcv->latch = NULL;
 	SpinLockRelease(&walrcv->mutex);
 
@@ -1374,7 +1381,6 @@ pg_stat_get_wal_receiver(PG_FUNCTION_ARGS)
 	Datum	   *values;
 	bool	   *nulls;
 	int			pid;
-	bool		ready_to_display;
 	WalRcvState state;
 	XLogRecPtr	receive_start_lsn;
 	TimeLineID	receive_start_tli;
@@ -1386,13 +1392,10 @@ pg_stat_get_wal_receiver(PG_FUNCTION_ARGS)
 	TimestampTz latest_end_time;
 	char		sender_host[NI_MAXHOST];
 	int			sender_port = 0;
-	char		slotname[NAMEDATALEN];
-	char		conninfo[MAXCONNINFO];
 
 	/* Take a lock to ensure value consistency */
 	SpinLockAcquire(&WalRcv->mutex);
 	pid = (int) WalRcv->pid;
-	ready_to_display = WalRcv->ready_to_display;
 	state = WalRcv->walRcvState;
 	receive_start_lsn = WalRcv->receiveStart;
 	receive_start_tli = WalRcv->receiveStartTLI;
@@ -1402,17 +1405,15 @@ pg_stat_get_wal_receiver(PG_FUNCTION_ARGS)
 	last_receipt_time = WalRcv->lastMsgReceiptTime;
 	latest_end_lsn = WalRcv->latestWalEnd;
 	latest_end_time = WalRcv->latestWalEndTime;
-	strlcpy(slotname, (char *) WalRcv->slotname, sizeof(slotname));
 	strlcpy(sender_host, (char *) WalRcv->sender_host, sizeof(sender_host));
 	sender_port = WalRcv->sender_port;
-	strlcpy(conninfo, (char *) WalRcv->conninfo, sizeof(conninfo));
 	SpinLockRelease(&WalRcv->mutex);
 
 	/*
 	 * No WAL receiver (or not ready yet), just return a tuple with NULL
 	 * values
 	 */
-	if (pid == 0 || !ready_to_display)
+	if (pid == 0)
 		PG_RETURN_NULL();
 
 	/* determine result type */
@@ -1464,22 +1465,14 @@ pg_stat_get_wal_receiver(PG_FUNCTION_ARGS)
 			nulls[9] = true;
 		else
 			values[9] = TimestampTzGetDatum(latest_end_time);
-		if (*slotname == '\0')
-			nulls[10] = true;
-		else
-			values[10] = CStringGetTextDatum(slotname);
 		if (*sender_host == '\0')
-			nulls[11] = true;
+			nulls[10] = true;
 		else
-			values[11] = CStringGetTextDatum(sender_host);
+			values[10] = CStringGetTextDatum(sender_host);
 		if (sender_port == 0)
-			nulls[12] = true;
-		else
-			values[12] = Int32GetDatum(sender_port);
-		if (*conninfo == '\0')
-			nulls[13] = true;
+			nulls[11] = true;
 		else
-			values[13] = CStringGetTextDatum(conninfo);
+			values[11] = Int32GetDatum(sender_port);
 	}
 
 	/* Returns the record as Datum */
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 67b1a07..be37241 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -215,13 +215,10 @@ ShutdownWalRcv(void)
 /*
  * Request postmaster to start walreceiver.
  *
- * recptr indicates the position where streaming should begin, conninfo
- * is a libpq connection string to use, and slotname is, optionally, the name
- * of a replication slot to acquire.
+ * recptr indicates the position where streaming should begin
  */
 void
-RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
-					 const char *slotname)
+RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr)
 {
 	WalRcvData *walrcv = WalRcv;
 	bool		launch = false;
@@ -243,16 +240,6 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
 	Assert(walrcv->walRcvState == WALRCV_STOPPED ||
 		   walrcv->walRcvState == WALRCV_WAITING);
 
-	if (conninfo != NULL)
-		strlcpy((char *) walrcv->conninfo, conninfo, MAXCONNINFO);
-	else
-		walrcv->conninfo[0] = '\0';
-
-	if (slotname != NULL)
-		strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
-	else
-		walrcv->slotname[0] = '\0';
-
 	if (walrcv->walRcvState == WALRCV_STOPPED)
 	{
 		launch = true;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 6497393..301a4b5 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3435,7 +3435,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3446,7 +3446,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 034a41e..7e7d062 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5030,9 +5030,9 @@
 { oid => '3317', descr => 'statistics: information about WAL receiver',
   proname => 'pg_stat_get_wal_receiver', proisstrict => 'f', provolatile => 's',
   proparallel => 'r', prorettype => 'record', proargtypes => '',
-  proallargtypes => '{int4,text,pg_lsn,int4,pg_lsn,int4,timestamptz,timestamptz,pg_lsn,timestamptz,text,text,int4,text}',
-  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
-  proargnames => '{pid,status,receive_start_lsn,receive_start_tli,received_lsn,received_tli,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time,slot_name,sender_host,sender_port,conninfo}',
+  proallargtypes => '{int4,text,pg_lsn,int4,pg_lsn,int4,timestamptz,timestamptz,pg_lsn,timestamptz,text,int4}',
+  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}',
+  proargnames => '{pid,status,receive_start_lsn,receive_start_tli,received_lsn,received_tli,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time,sender_host,sender_port}',
   prosrc => 'pg_stat_get_wal_receiver' },
 { oid => '6118', descr => 'statistics: information about subscription',
   proname => 'pg_stat_get_subscription', proisstrict => 'f', provolatile => 's',
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index 5913b58..e331648 100644
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -104,12 +104,6 @@ typedef struct
 	TimestampTz latestWalEndTime;
 
 	/*
-	 * connection string; initially set to connect to the primary, and later
-	 * clobbered to hide security-sensitive fields.
-	 */
-	char		conninfo[MAXCONNINFO];
-
-	/*
 	 * Host name (this can be a host name, an IP address, or a directory path)
 	 * and port number of the active replication connection.
 	 */
@@ -117,15 +111,6 @@ typedef struct
 	int			sender_port;
 
 	/*
-	 * replication slot name; is also used for walreceiver to connect with the
-	 * primary
-	 */
-	char		slotname[NAMEDATALEN];
-
-	/* set true once conninfo is ready to display (obfuscated pwds etc) */
-	bool		ready_to_display;
-
-	/*
 	 * Latch used by startup process to wake up walreceiver after telling it
 	 * where to start streaming (after setting receiveStart and
 	 * receiveStartTLI), and also to tell it to send apply feedback to the
@@ -306,8 +291,7 @@ extern void WalRcvShmemInit(void);
 extern void ShutdownWalRcv(void);
 extern bool WalRcvStreaming(void);
 extern bool WalRcvRunning(void);
-extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr,
-					 const char *conninfo, const char *slotname);
+extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr);
 extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI);
 extern int	GetReplicationApplyDelay(void);
 extern int	GetReplicationTransferLatency(void);
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index beb4555..5b9bc77 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 26;
+use Test::More tests => 27;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -146,7 +146,7 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+$node_standby_2->reload; # should have effect without restart
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -282,3 +282,21 @@ is($catalog_xmin, '',
 is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 735dd37..e31c894 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1971,11 +1971,9 @@ pg_stat_wal_receiver| SELECT s.pid,
     s.last_msg_receipt_time,
     s.latest_end_lsn,
     s.latest_end_time,
-    s.slot_name,
     s.sender_host,
-    s.sender_port,
-    s.conninfo
-   FROM pg_stat_get_wal_receiver() s(pid, status, receive_start_lsn, receive_start_tli, received_lsn, received_tli, last_msg_send_time, last_msg_receipt_time, latest_end_lsn, latest_end_time, slot_name, sender_host, sender_port, conninfo)
+    s.sender_port
+   FROM pg_stat_get_wal_receiver() s(pid, status, receive_start_lsn, receive_start_tli, received_lsn, received_tli, last_msg_send_time, last_msg_receipt_time, latest_end_lsn, latest_end_time, sender_host, sender_port)
   WHERE (s.pid IS NOT NULL);
 pg_stat_xact_all_tables| SELECT c.oid AS relid,
     n.nspname AS schemaname,
#2Andres Freund
andres@anarazel.de
In reply to: Sergei Kornilov (#1)
Re: allow online change primary_conninfo

Hi,

On 2018-11-26 00:25:43 +0300, Sergei Kornilov wrote:

Now we integrate recovery.conf into GUC system. So i would like to start discussion to make primary_conninfo and primary_slot_name PGC_SIGHUP.
My work-in-progress patch attached.

Cool!

I think we have no futher reason to have a copy of conninfo and slot name in WalRcvData, so i propose remove these fields from pg_stat_get_wal_receiver() (and pg_stat_wal_receiver view). This data can be accesible now via regular GUC commands.

Is that wise? It seems possible that wal receivers run for a while with
the old connection information, and without this information that seems
hard to debug.

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index db1a2d4..faa8e17 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3797,7 +3797,6 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
<varname>primary_conninfo</varname> string.
</para>
<para>
-          This parameter can only be set at server start.
This setting has no effect if the server is not in standby mode.
</para>
</listitem>

Should probably note something like

"This parameter can only be set in the <filename>postgresql.conf</filename>
file or on the server command line."

@@ -435,9 +420,33 @@ WalReceiverMain(void)

if (got_SIGHUP)
{
+					char	*conninfo = pstrdup(PrimaryConnInfo);
+					char	*slotname = pstrdup(PrimarySlotName);
+
got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
XLogWalRcvSendHSFeedback(true);
+
+					/*
+					 * If primary_conninfo has been changed while walreceiver is running,
+					 * shut down walreceiver so that a new walreceiver is started and
+					 * initiates replication with the new connection information.
+					 */
+					if (strcmp(conninfo, PrimaryConnInfo) != 0)
+						ereport(FATAL,
+								(errcode(ERRCODE_ADMIN_SHUTDOWN),
+								 errmsg("closing replication connection because primary_conninfo was changed")));
+
+					/*
+					 * And the same for primary_slot_name.
+					 */
+					if (strcmp(slotname, PrimarySlotName) != 0)
+						ereport(FATAL,
+								(errcode(ERRCODE_ADMIN_SHUTDOWN),
+								 errmsg("closing replication connection because primary_slot_name was changed")));
+
+					pfree(conninfo);
+					pfree(slotname);

I'd strongly advocate moving this to a separate function.

Greetings,

Andres Freund

#3Michael Paquier
michael@paquier.xyz
In reply to: Andres Freund (#2)
Re: allow online change primary_conninfo

On Sun, Nov 25, 2018 at 01:43:13PM -0800, Andres Freund wrote:

On 2018-11-26 00:25:43 +0300, Sergei Kornilov wrote:

I think we have no futher reason to have a copy of conninfo and slot
name in WalRcvData, so i propose remove these fields from
pg_stat_get_wal_receiver() (and pg_stat_wal_receiver view). This data
can be accesible now via regular GUC commands.

Is that wise? It seems possible that wal receivers run for a while with
the old connection information, and without this information that seems
hard to debug.

No, that's unwise. One of the reasons why conninfo is around is that we
know to which server a receiver is connected when specifying multiple
host and/or port targets in the configuration. Please see 9a895462 for
the details.

Removing the slot does not look like an improvement to me either,
following Andres' argument.
--
Michael

In reply to: Andres Freund (#2)
1 attachment(s)
Re: allow online change primary_conninfo

Hi

 I think we have no futher reason to have a copy of conninfo and slot name in WalRcvData, so i propose remove these fields from pg_stat_get_wal_receiver() (and pg_stat_wal_receiver view). This data can be accesible now via regular GUC commands.

Is that wise? It seems possible that wal receivers run for a while with
the old connection information, and without this information that seems
hard to debug.

Hmm... I considered SIGHUP processing was in fast loop and therefore shutdown should be fast. But i recheck code and found a possible long loop without processing SIGHUP (in case we receive new data faster than writes to disk). Ok, i will revert back.
How about write to WalRcvData only clobbered conninfo?

Should probably note something like

"This parameter can only be set in the <filename>postgresql.conf</filename>
file or on the server command line."

Seems other PGC_SIGHUP settings have such note, fixed, thank you.

I'd strongly advocate moving this to a separate function.

Done

regards, Sergei

Attachments:

reload_walreceiver_conninfo_v002.patchtext/x-diff; name=reload_walreceiver_conninfo_v002.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index db1a2d4..c3f19d8 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3797,7 +3797,8 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
         </listitem>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index c80b14e..9d66455 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -11848,8 +11848,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 									 tli, curFileTLI);
 						}
 						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
+						RequestXLogStreaming(tli, ptr);
 						receivedUpto = 0;
 					}
 
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 9643c2e..14cf279 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -143,6 +143,7 @@ static void XLogWalRcvFlush(bool dying);
 static void XLogWalRcvSendReply(bool force, bool requestReply);
 static void XLogWalRcvSendHSFeedback(bool immed);
 static void ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime);
+static void ProcessWalRcvSigHup(void);
 
 /* Signal handlers */
 static void WalRcvSigHupHandler(SIGNAL_ARGS);
@@ -188,9 +189,7 @@ DisableWalRcvImmediateExit(void)
 void
 WalReceiverMain(void)
 {
-	char		conninfo[MAXCONNINFO];
 	char	   *tmp_conninfo;
-	char		slotname[NAMEDATALEN];
 	XLogRecPtr	startpoint;
 	TimeLineID	startpointTLI;
 	TimeLineID	primaryTLI;
@@ -250,8 +249,6 @@ WalReceiverMain(void)
 
 	/* Fetch information required to start streaming */
 	walrcv->ready_to_display = false;
-	strlcpy(conninfo, (char *) walrcv->conninfo, MAXCONNINFO);
-	strlcpy(slotname, (char *) walrcv->slotname, NAMEDATALEN);
 	startpoint = walrcv->receiveStart;
 	startpointTLI = walrcv->receiveStartTLI;
 
@@ -293,7 +290,7 @@ WalReceiverMain(void)
 
 	/* Establish the connection to the primary for XLOG streaming */
 	EnableWalRcvImmediateExit();
-	wrconn = walrcv_connect(conninfo, false, "walreceiver", &err);
+	wrconn = walrcv_connect(PrimaryConnInfo, false, "walreceiver", &err);
 	if (!wrconn)
 		ereport(ERROR,
 				(errmsg("could not connect to the primary server: %s", err)));
@@ -387,7 +384,7 @@ WalReceiverMain(void)
 		 */
 		options.logical = false;
 		options.startpoint = startpoint;
-		options.slotname = slotname[0] != '\0' ? slotname : NULL;
+		options.slotname = PrimarySlotName[0] != '\0' ? PrimarySlotName : NULL;
 		options.proto.physical.startpointTLI = startpointTLI;
 		ThisTimeLineID = startpointTLI;
 		if (walrcv_startstreaming(wrconn, &options))
@@ -436,8 +433,7 @@ WalReceiverMain(void)
 				if (got_SIGHUP)
 				{
 					got_SIGHUP = false;
-					ProcessConfigFile(PGC_SIGHUP);
-					XLogWalRcvSendHSFeedback(true);
+					ProcessWalRcvSigHup();
 				}
 
 				/* See if we can read data immediately */
@@ -1317,6 +1313,41 @@ ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime)
 }
 
 /*
+ * Actual processing SIGHUP signal
+ */
+static void
+ProcessWalRcvSigHup(void)
+{
+	char	*conninfo = pstrdup(PrimaryConnInfo);
+	char	*slotname = pstrdup(PrimarySlotName);
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * If primary_conninfo has been changed while walreceiver is running,
+	 * shut down walreceiver so that a new walreceiver is started and
+	 * initiates replication with the new connection information.
+	 */
+	if (strcmp(conninfo, PrimaryConnInfo) != 0)
+		ereport(FATAL,
+				(errcode(ERRCODE_ADMIN_SHUTDOWN),
+				 errmsg("closing replication connection because primary_conninfo was changed")));
+
+	/*
+	 * And the same for primary_slot_name.
+	 */
+	if (strcmp(slotname, PrimarySlotName) != 0)
+		ereport(FATAL,
+				(errcode(ERRCODE_ADMIN_SHUTDOWN),
+				 errmsg("closing replication connection because primary_slot_name was changed")));
+
+	pfree(conninfo);
+	pfree(slotname);
+
+	XLogWalRcvSendHSFeedback(true);
+}
+
+/*
  * Wake up the walreceiver main loop.
  *
  * This is called by the startup process whenever interesting xlog records
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 67b1a07..be37241 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -215,13 +215,10 @@ ShutdownWalRcv(void)
 /*
  * Request postmaster to start walreceiver.
  *
- * recptr indicates the position where streaming should begin, conninfo
- * is a libpq connection string to use, and slotname is, optionally, the name
- * of a replication slot to acquire.
+ * recptr indicates the position where streaming should begin
  */
 void
-RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
-					 const char *slotname)
+RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr)
 {
 	WalRcvData *walrcv = WalRcv;
 	bool		launch = false;
@@ -243,16 +240,6 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
 	Assert(walrcv->walRcvState == WALRCV_STOPPED ||
 		   walrcv->walRcvState == WALRCV_WAITING);
 
-	if (conninfo != NULL)
-		strlcpy((char *) walrcv->conninfo, conninfo, MAXCONNINFO);
-	else
-		walrcv->conninfo[0] = '\0';
-
-	if (slotname != NULL)
-		strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
-	else
-		walrcv->slotname[0] = '\0';
-
 	if (walrcv->walRcvState == WALRCV_STOPPED)
 	{
 		launch = true;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 6497393..301a4b5 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3435,7 +3435,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3446,7 +3446,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index 5913b58..510c7f6 100644
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -104,7 +104,7 @@ typedef struct
 	TimestampTz latestWalEndTime;
 
 	/*
-	 * connection string; initially set to connect to the primary, and later
+	 * connection string used by runned walreceiver;
 	 * clobbered to hide security-sensitive fields.
 	 */
 	char		conninfo[MAXCONNINFO];
@@ -117,8 +117,7 @@ typedef struct
 	int			sender_port;
 
 	/*
-	 * replication slot name; is also used for walreceiver to connect with the
-	 * primary
+	 * replication slot name used by runned walreceiver
 	 */
 	char		slotname[NAMEDATALEN];
 
@@ -306,8 +305,7 @@ extern void WalRcvShmemInit(void);
 extern void ShutdownWalRcv(void);
 extern bool WalRcvStreaming(void);
 extern bool WalRcvRunning(void);
-extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr,
-					 const char *conninfo, const char *slotname);
+extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr);
 extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI);
 extern int	GetReplicationApplyDelay(void);
 extern int	GetReplicationTransferLatency(void);
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index beb4555..5b9bc77 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 26;
+use Test::More tests => 27;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -146,7 +146,7 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+$node_standby_2->reload; # should have effect without restart
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -282,3 +282,21 @@ is($catalog_xmin, '',
 is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
#5Andres Freund
andres@anarazel.de
In reply to: Sergei Kornilov (#4)
Re: allow online change primary_conninfo

Hi,

On 2018-11-26 12:30:06 +0300, Sergei Kornilov wrote:

Hmm... I considered SIGHUP processing was in fast loop and therefore shutdown should be fast. But i recheck code and found a possible long loop without processing SIGHUP (in case we receive new data faster than writes to disk). Ok, i will revert back.
How about write to WalRcvData only clobbered conninfo?

I'm not sure I understand what you mean? The way I'd solve this is that
that only walreceiver, at startup, writes out its conninfo/slot_name,
sourcing the values from the GUCs. That way there's no issue with values
being overwritten early.

- Andres

In reply to: Andres Freund (#5)
Re: allow online change primary_conninfo

Hi

 Hmm... I considered SIGHUP processing was in fast loop and therefore shutdown should be fast. But i recheck code and found a possible long loop without processing SIGHUP (in case we receive new data faster than writes to disk). Ok, i will revert back.
 How about write to WalRcvData only clobbered conninfo?

I'm not sure I understand what you mean?

I am about my initial proposal with remove conninfo wrom WalRcvData - walreceiver may run some time with old conninfo and

without this information that seems hard to debug.

Earlier i thought walreceiver will shutdown fast on SIGHUP.

The way I'd solve this is that
that only walreceiver, at startup, writes out its conninfo/slot_name,
sourcing the values from the GUCs. That way there's no issue with values
being overwritten early.

In second patch i follow exactly this logic.

regards, Sergei

#7Michael Paquier
michael@paquier.xyz
In reply to: Andres Freund (#5)
Re: allow online change primary_conninfo

On Mon, Nov 26, 2018 at 09:46:36AM -0800, Andres Freund wrote:

I'm not sure I understand what you mean? The way I'd solve this is that
that only walreceiver, at startup, writes out its conninfo/slot_name,
sourcing the values from the GUCs. That way there's no issue with values
being overwritten early.

WalRcv->conninfo, as stored in the WAL receiver, clobbers some of its
fields and includes a set of default parameters with its values after
establishing the connection so as no sensible data shows up in
pg_stat_wal_receiver so you cannot simply compare what is stored in the
WAL receiver with the GUCs to do the decision-making. For the password,
you'd want to do again a connection anyway even if the cloberred string
is the same.

What's proposed in the patch to issue an ERROR if the parameter has
changed does not look that bad to me as it depends on the process
context, but I think that this patch should document clearly that if
primary_conninfo or primary_slot_name are changed then the WAL receiver
is stopped immediately.

    /*
-    * replication slot name; is also used for walreceiver to connect with the
-    * primary
+    * replication slot name used by runned walreceiver
     */
Not sure that there is much point in updating those comments, because
it still has the same meaning in the new context.
-RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
-                    const char *slotname)
+RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr)
That's perhaps a matter of taste, but keeping the current API as-is
looks cleaner to me, particularly because those fields get copied into
WalRcv, so I'd rather not change what is done in
WaitForWALToBecomeAvailable and keep the patch at its simplest shape.
+$node_standby_2->reload; # should have effect without restart
This does not wait for the change to be effective, so I think that you
introduce a race condition for slow machines with this test, making it
unstable.
--
Michael
In reply to: Michael Paquier (#7)
Re: allow online change primary_conninfo

Hello

thank you for review!

but I think that this patch should document clearly that if
primary_conninfo or primary_slot_name are changed then the WAL receiver
is stopped immediately.

Good idea, will change.

    /*
- * replication slot name; is also used for walreceiver to connect with the
- * primary
+ * replication slot name used by runned walreceiver
     */
Not sure that there is much point in updating those comments, because
it still has the same meaning in the new context.

"is also used" seems outdated for me. With proposed patch this field means only currently active slot, not "also".
Well, depends on RequestXLogStreaming discussion

-RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
- const char *slotname)
+RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr)
That's perhaps a matter of taste, but keeping the current API as-is
looks cleaner to me, particularly because those fields get copied into
WalRcv, so I'd rather not change what is done in
WaitForWALToBecomeAvailable and keep the patch at its simplest shape.

Unfortunally here is race condition if i leave RequestXLogStreaming infrastructure as-is and walreceiver will use this struct for startup.
This case:
- startup process calls RequestXLogStreaming and fill WalRcv conninfo/slotname
- reload primary_conninnfo by SIGHUP
- walreceiver start with new GUC but trying conninfo/slotname from WalRcv struct. If walreceiver is able to connect - it will not restart till another error or primary_conninfo/slotname will be changed again (SIGHUP without change is not enough).

I already had such failures while testing this patch.

If walreceiver will use primary_conninfo GUC at startup - i see no reason to leave *conninfo in RequestXLogStreaming. These parameters will be misleading, we request them, but not using for anything.

+$node_standby_2->reload; # should have effect without restart
This does not wait for the change to be effective, so I think that you
introduce a race condition for slow machines with this test, making it
unstable.

No, here is no race condition because just after this line we wait this replication slot in upstream pg_catalog.pg_replication_slots (get_slot_xmins routine).
Before reload we have no replication slot and should reconnect here with replication slot. I can add some comment here.

regards, Sergei

In reply to: Michael Paquier (#7)
Re: allow online change primary_conninfo

Hello

After some thinking i can rewrite this patch in another way.

This is better or worse?

regards, Sergei

In reply to: Sergei Kornilov (#9)
1 attachment(s)
Re: allow online change primary_conninfo

oops, forgot attach patch

11.12.2018, 13:14, "Sergei Kornilov" <sk@zsrv.org>:

Show quoted text

Hello

After some thinking i can rewrite this patch in another way.

This is better or worse?

regards, Sergei

Attachments:

reload_walreceiver_conninfo_v003.patchtext/x-diff; name=reload_walreceiver_conninfo_v003.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 4a7121a..bce99ce 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3796,9 +3796,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          <application>walreceiver</application> will be restarted after
+          <varname>primary_conninfo</varname> was changed.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -3813,9 +3818,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          <application>walreceiver</application> will be restarted after
+          <varname>primary_slot_name</varname> was changed.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 9643c2e..cdd9f40 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -113,6 +113,12 @@ static StringInfoData reply_message;
 static StringInfoData incoming_message;
 
 /*
+ * Copy of current WalReceiverConn connection info (not clobbered)
+ */
+static char current_conninfo[MAXCONNINFO];
+static char current_slotname[NAMEDATALEN];
+
+/*
  * About SIGTERM handling:
  *
  * We can't just exit(1) within SIGTERM signal handler, because the signal
@@ -143,6 +149,7 @@ static void XLogWalRcvFlush(bool dying);
 static void XLogWalRcvSendReply(bool force, bool requestReply);
 static void XLogWalRcvSendHSFeedback(bool immed);
 static void ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime);
+static void ProcessWalRcvSigHup(void);
 
 /* Signal handlers */
 static void WalRcvSigHupHandler(SIGNAL_ARGS);
@@ -188,9 +195,7 @@ DisableWalRcvImmediateExit(void)
 void
 WalReceiverMain(void)
 {
-	char		conninfo[MAXCONNINFO];
 	char	   *tmp_conninfo;
-	char		slotname[NAMEDATALEN];
 	XLogRecPtr	startpoint;
 	TimeLineID	startpointTLI;
 	TimeLineID	primaryTLI;
@@ -250,8 +255,8 @@ WalReceiverMain(void)
 
 	/* Fetch information required to start streaming */
 	walrcv->ready_to_display = false;
-	strlcpy(conninfo, (char *) walrcv->conninfo, MAXCONNINFO);
-	strlcpy(slotname, (char *) walrcv->slotname, NAMEDATALEN);
+	strlcpy(current_conninfo, (char *) walrcv->conninfo, MAXCONNINFO);
+	strlcpy(current_slotname, (char *) walrcv->slotname, NAMEDATALEN);
 	startpoint = walrcv->receiveStart;
 	startpointTLI = walrcv->receiveStartTLI;
 
@@ -293,7 +298,7 @@ WalReceiverMain(void)
 
 	/* Establish the connection to the primary for XLOG streaming */
 	EnableWalRcvImmediateExit();
-	wrconn = walrcv_connect(conninfo, false, "walreceiver", &err);
+	wrconn = walrcv_connect(current_conninfo, false, "walreceiver", &err);
 	if (!wrconn)
 		ereport(ERROR,
 				(errmsg("could not connect to the primary server: %s", err)));
@@ -387,7 +392,7 @@ WalReceiverMain(void)
 		 */
 		options.logical = false;
 		options.startpoint = startpoint;
-		options.slotname = slotname[0] != '\0' ? slotname : NULL;
+		options.slotname = current_slotname[0] != '\0' ? current_slotname : NULL;
 		options.proto.physical.startpointTLI = startpointTLI;
 		ThisTimeLineID = startpointTLI;
 		if (walrcv_startstreaming(wrconn, &options))
@@ -436,8 +441,7 @@ WalReceiverMain(void)
 				if (got_SIGHUP)
 				{
 					got_SIGHUP = false;
-					ProcessConfigFile(PGC_SIGHUP);
-					XLogWalRcvSendHSFeedback(true);
+					ProcessWalRcvSigHup();
 				}
 
 				/* See if we can read data immediately */
@@ -1317,6 +1321,35 @@ ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime)
 }
 
 /*
+ * Actual processing SIGHUP signal
+ */
+static void
+ProcessWalRcvSigHup(void)
+{
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * If primary_conninfo has been changed while walreceiver is running,
+	 * shut down walreceiver so that a new walreceiver is started and
+	 * initiates replication with the new connection information.
+	 */
+	if (strcmp(current_conninfo, PrimaryConnInfo) != 0)
+		ereport(FATAL,
+				(errcode(ERRCODE_ADMIN_SHUTDOWN),
+				 errmsg("closing replication connection because primary_conninfo was changed")));
+
+	/*
+	 * And the same for primary_slot_name.
+	 */
+	if (strcmp(current_slotname, PrimarySlotName) != 0)
+		ereport(FATAL,
+				(errcode(ERRCODE_ADMIN_SHUTDOWN),
+				 errmsg("closing replication connection because primary_slot_name was changed")));
+
+	XLogWalRcvSendHSFeedback(true);
+}
+
+/*
  * Wake up the walreceiver main loop.
  *
  * This is called by the startup process whenever interesting xlog records
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 6fe1939..09948ee 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3448,7 +3448,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3459,7 +3459,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index beb4555..07ac964 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 26;
+use Test::More tests => 27;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -146,7 +146,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -282,3 +284,21 @@ is($catalog_xmin, '',
 is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
#11Michael Paquier
michael@paquier.xyz
In reply to: Sergei Kornilov (#8)
Re: allow online change primary_conninfo

On Tue, Dec 11, 2018 at 11:59:08AM +0300, Sergei Kornilov wrote:

but I think that this patch should document clearly that if
primary_conninfo or primary_slot_name are changed then the WAL receiver
is stopped immediately.

Good idea, will change.

+         <para>
+          <application>walreceiver</application> will be restarted after
+          <varname>primary_conninfo</varname> was changed.
+         </para>
Hm.  It may be cleaner to use "WAL receiver" here for clarity.  Perhaps
that's a matter of state but that seems cleaner when referring to the
actual process.  The docs share both grammar, depending on the context.

If walreceiver will use primary_conninfo GUC at startup - i see no
reason to leave *conninfo in RequestXLogStreaming. These parameters
will be misleading, we request them, but not using for anything.

Oh, indeed. My apologies for being confused here, I can see now your
point. It seems to me that this is an extra cleanup caused by the
recovery parameters switched to be GUCs, and not something that we
should be changed as an effect to SIGHUP for primary_conninfo and
primary_slot_name. So let's do this cleanup first. For this purpose, I
am going to post a new thread with a proper patch, with in CC the folks
who moved the recovery parameters to be GUCs.

+$node_standby_2->reload; # should have effect without restart
This does not wait for the change to be effective, so I think that you
introduce a race condition for slow machines with this test, making it
unstable.

No, here is no race condition because just after this line we wait
this replication slot in upstream pg_catalog.pg_replication_slots
(get_slot_xmins routine).
Before reload we have no replication slot and should reconnect here
with replication slot. I can add some comment here.

Good point, I have forgotten the call to get_slot_xmins() below.
--
Michael

#12Kyotaro HORIGUCHI
horiguchi.kyotaro@lab.ntt.co.jp
In reply to: Sergei Kornilov (#10)
Re: allow online change primary_conninfo

At Tue, 11 Dec 2018 13:16:23 +0300, Sergei Kornilov <sk@zsrv.org> wrote in <9653601544523383@iva8-37fc2ad204cd.qloud-c.yandex.net>
sk> oops, forgot attach patch
sk>
sk> 11.12.2018, 13:14, "Sergei Kornilov" <sk@zsrv.org>:
sk> > Hello
sk> >
sk> > After some thinking i can rewrite this patch in another way.
sk> >
sk> > This is better or worse?

As the whole the new version looks better for me.

===
Do we no longer need static version of conninfo/slotname in
walreceiver.c? We can detect a change of the variables without
them (as you did in the previous version.).

===
I don't think it is acceptable that raw conninfo is exposed into
log file.

LOG: parameter "primary_conninfo" changed to "host=/tmp port=5432 password=hoge"

===

errmsg("closing replication connection because primary_conninfo was changed")));

Maybe it is better being as follows according to similar messages:

errmsg("terminating walreceiver process due to change of primary_conninfo")));

And it *might* be good to have detail.

errdetail("In a moment starts streaming WAL with new configuration.");

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

In reply to: Kyotaro HORIGUCHI (#12)
Re: allow online change primary_conninfo

Hello

Do we no longer need static version of conninfo/slotname in
walreceiver.c? We can detect a change of the variables without
them (as you did in the previous version.).

Depends on this discussion: /messages/by-id/20181212053042.GK17695@paquier.xyz
If walreceiver will use GUC conninfo for startup - then yes, we can remove such static variables.
If walreceiver will still use conninfo from WalRcv - we have race condition between RequestXLogStreaming from startup, config reload, and walreceiver start. In this case i need save conninfo from WalRcv and compare new GUC value with them.

I don't think it is acceptable that raw conninfo is exposed into
log file.

  LOG: parameter "primary_conninfo" changed to "host=/tmp port=5432 password=hoge"

Hmm. We have infrastructure to hide such values?
I need implement something like flag GUC_HIDE_VALUE_FROM_LOG near GUC_SUPERUSER_ONLY in separate thread? Log message will be changed to "LOG: parameter "primary_conninfo" changed" with this flag.

I also think that this is not a big problem. We may already have secret data in the logs. For example, in query text from application.

 errmsg("closing replication connection because primary_conninfo was changed")));

Maybe it is better being as follows according to similar messages:

 errmsg("terminating walreceiver process due to change of primary_conninfo")));

And it *might* be good to have detail.

 errdetail("In a moment starts streaming WAL with new configuration.");

Agreed, will change.

regards, Sergei

#14Michael Paquier
michael@paquier.xyz
In reply to: Sergei Kornilov (#13)
Re: allow online change primary_conninfo

On Thu, Dec 13, 2018 at 01:51:48PM +0300, Sergei Kornilov wrote:

Depends on this discussion: /messages/by-id/20181212053042.GK17695@paquier.xyz
If walreceiver will use GUC conninfo for startup - then yes, we can
remove such static variables. If walreceiver will still use conninfo
from WalRcv - we have race condition between RequestXLogStreaming from
startup, config reload, and walreceiver start. In this case I need
save conninfo from WalRcv and compare new GUC value with them.

I would recommend waiting for the conclusion of other thread before
moving on with this one. There are arguments for letting the startup
process deciding which connection string the WAL receiver should use as
well as letting the WAL receiver use directly the connection string from
the GUC.

Hmm. We have infrastructure to hide such values?
I need implement something like flag GUC_HIDE_VALUE_FROM_LOG near
GUC_SUPERUSER_ONLY in separate thread? Log message will be changed to
"LOG: parameter "primary_conninfo" changed" with this flag.

Good point. Passwords in logs can be considered as a security issue.
Having such an option may be interesting for other things, though I am
not sure if just having an option to hide logs related to a given
parameter are the correct way to shape it. We could also consider using
the show hook function of a parameter to print its value in such logs.
--
Michael

In reply to: Michael Paquier (#14)
Re: allow online change primary_conninfo

Hi

I would recommend waiting for the conclusion of other thread before
moving on with this one.

Agreed.
I mark this patch as Waiting on Author. Not quite correct for waiting another discussion, but most suitable.

We could also consider using
the show hook function of a parameter to print its value in such logs.

But show hook also affects "show primary_conninfo;" command and pg_settings.value
I already marked primary_conninfo as GUC_SUPERUSER_ONLY. I doubt if we need hide some connection info even from superuser.
Also this hook would be a bit complicated, i found suitable code only in libpqrcv_get_conninfo after establishing connect

regards, Sergei

#16andres@anarazel.de
andres@anarazel.de
In reply to: Michael Paquier (#14)
Re: allow online change primary_conninfo

Hi,

On 2018-12-14 16:55:42 +0900, Michael Paquier wrote:

On Thu, Dec 13, 2018 at 01:51:48PM +0300, Sergei Kornilov wrote:

Depends on this discussion: /messages/by-id/20181212053042.GK17695@paquier.xyz
If walreceiver will use GUC conninfo for startup - then yes, we can
remove such static variables. If walreceiver will still use conninfo
from WalRcv - we have race condition between RequestXLogStreaming from
startup, config reload, and walreceiver start. In this case I need
save conninfo from WalRcv and compare new GUC value with them.

I would recommend waiting for the conclusion of other thread before
moving on with this one. There are arguments for letting the startup
process deciding which connection string the WAL receiver should use as
well as letting the WAL receiver use directly the connection string from
the GUC.

I suggest continuing the development of the patch without relying on
that refactoring.

Greetings,

Andres Freund

In reply to: andres@anarazel.de (#16)
Re: allow online change primary_conninfo

Hello

Yeah, we have no consensus.

Another open question is about logging new primary_conninfo:

LOG: parameter "primary_conninfo" changed to "host=/tmp port=5432 password=hoge"

I my opinion this is not issue, database logs can have sensitive data. User queries, for example.
If we not want expose such info - it is ok just hide new value from logs with new GUC flag? Or i need implement masked conninfo for this purpose?

regards, Sergei

#18Michael Paquier
michael@paquier.xyz
In reply to: Sergei Kornilov (#17)
Re: allow online change primary_conninfo

On Thu, Jan 31, 2019 at 04:13:22PM +0300, Sergei Kornilov wrote:

I my opinion this is not issue, database logs can have sensitive
data. User queries, for example. If we not want expose such info -
it is ok just hide new value from logs with new GUC flag? Or i need
implement masked conninfo for this purpose?

You have problems with things in this area for any commands logged and
able to show a connection string or a password, which can go down as
well to CREATE/ALTER ROLE or FDWs. So for the purpose of what's
discussed on this thread it does not sound like a requirement to be
able to hide that. Role DDLs can take an already-hashed input to
avoid that, still knowing the MD5 hash is sufficient for connection
(not for SCRAM!). Now for FDWs..
--
Michael

#19andres@anarazel.de
andres@anarazel.de
In reply to: Sergei Kornilov (#17)
Re: allow online change primary_conninfo

Hi,

On 2019-01-31 16:13:22 +0300, Sergei Kornilov wrote:

Hello

Yeah, we have no consensus.

Are you planning to update the patch? Given there's not been much
progress here, I think we ough tot mark the CF entry as returned with
feedback for now.

Another open question is about logging new primary_conninfo:

LOG: parameter "primary_conninfo" changed to "host=/tmp port=5432 password=hoge"

I my opinion this is not issue, database logs can have sensitive data. User queries, for example.
If we not want expose such info - it is ok just hide new value from logs with new GUC flag? Or i need implement masked conninfo for this purpose?

I agree that this doesn't need to be solved as part of this patch. Given
the config is in the conf file, I don't think it's meaningful to hide
this from the log. If necessary one can use client certs, service files,
etc.

Greetings,

Andres Freund

In reply to: andres@anarazel.de (#19)
1 attachment(s)
Re: allow online change primary_conninfo

Hi

I agree that this doesn't need to be solved as part of this patch.

Thank you!

Are you planning to update the patch?

Sorry, i was busy last month. But, well, i already did such update: /messages/by-id/9653601544523383@iva8-37fc2ad204cd.qloud-c.yandex.net
v003 version does not change RequestXLogStreaming, not require "Making WAL receiver startup rely on GUC context" patch and does not hide new value from logs.

Here is updated version (from v003 patch) with few wording changes suggested by Kyotaro HORIGUCHI and Michael Paquier

regards, Sergei

Attachments:

reload_walreceiver_conninfo_v004.patchtext/x-diff; name=reload_walreceiver_conninfo_v004.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 9b7a7388d5..af68afa715 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3827,9 +3827,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          WAL receiver will be restarted after <varname>primary_conninfo</varname>
+          was changed.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -3844,9 +3849,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          WAL receiver will be restarted after <varname>primary_slot_name</varname>
+          was changed.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 2e90944ad5..05b90d00a4 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -112,6 +112,12 @@ static struct
 static StringInfoData reply_message;
 static StringInfoData incoming_message;
 
+/*
+ * Copy of current WalReceiverConn connection info (not clobbered)
+ */
+static char current_conninfo[MAXCONNINFO];
+static char current_slotname[NAMEDATALEN];
+
 /*
  * About SIGTERM handling:
  *
@@ -143,6 +149,7 @@ static void XLogWalRcvFlush(bool dying);
 static void XLogWalRcvSendReply(bool force, bool requestReply);
 static void XLogWalRcvSendHSFeedback(bool immed);
 static void ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime);
+static void ProcessWalRcvSigHup(void);
 
 /* Signal handlers */
 static void WalRcvSigHupHandler(SIGNAL_ARGS);
@@ -188,9 +195,7 @@ DisableWalRcvImmediateExit(void)
 void
 WalReceiverMain(void)
 {
-	char		conninfo[MAXCONNINFO];
 	char	   *tmp_conninfo;
-	char		slotname[NAMEDATALEN];
 	XLogRecPtr	startpoint;
 	TimeLineID	startpointTLI;
 	TimeLineID	primaryTLI;
@@ -250,8 +255,8 @@ WalReceiverMain(void)
 
 	/* Fetch information required to start streaming */
 	walrcv->ready_to_display = false;
-	strlcpy(conninfo, (char *) walrcv->conninfo, MAXCONNINFO);
-	strlcpy(slotname, (char *) walrcv->slotname, NAMEDATALEN);
+	strlcpy(current_conninfo, (char *) walrcv->conninfo, MAXCONNINFO);
+	strlcpy(current_slotname, (char *) walrcv->slotname, NAMEDATALEN);
 	startpoint = walrcv->receiveStart;
 	startpointTLI = walrcv->receiveStartTLI;
 
@@ -293,7 +298,7 @@ WalReceiverMain(void)
 
 	/* Establish the connection to the primary for XLOG streaming */
 	EnableWalRcvImmediateExit();
-	wrconn = walrcv_connect(conninfo, false, "walreceiver", &err);
+	wrconn = walrcv_connect(current_conninfo, false, "walreceiver", &err);
 	if (!wrconn)
 		ereport(ERROR,
 				(errmsg("could not connect to the primary server: %s", err)));
@@ -387,7 +392,7 @@ WalReceiverMain(void)
 		 */
 		options.logical = false;
 		options.startpoint = startpoint;
-		options.slotname = slotname[0] != '\0' ? slotname : NULL;
+		options.slotname = current_slotname[0] != '\0' ? current_slotname : NULL;
 		options.proto.physical.startpointTLI = startpointTLI;
 		ThisTimeLineID = startpointTLI;
 		if (walrcv_startstreaming(wrconn, &options))
@@ -436,8 +441,7 @@ WalReceiverMain(void)
 				if (got_SIGHUP)
 				{
 					got_SIGHUP = false;
-					ProcessConfigFile(PGC_SIGHUP);
-					XLogWalRcvSendHSFeedback(true);
+					ProcessWalRcvSigHup();
 				}
 
 				/* See if we can read data immediately */
@@ -1316,6 +1320,37 @@ ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime)
 	}
 }
 
+/*
+ * Actual processing SIGHUP signal
+ */
+static void
+ProcessWalRcvSigHup(void)
+{
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * If primary_conninfo has been changed while walreceiver is running,
+	 * shut down walreceiver so that a new walreceiver is started and
+	 * initiates replication with the new connection information.
+	 */
+	if (strcmp(current_conninfo, PrimaryConnInfo) != 0)
+		ereport(FATAL,
+				(errcode(ERRCODE_ADMIN_SHUTDOWN),
+				 errmsg("terminating walreceiver process due to change of primary_conninfo"),
+				 errdetail("In a moment starts streaming WAL with new configuration.")));
+
+	/*
+	 * And the same for primary_slot_name.
+	 */
+	if (strcmp(current_slotname, PrimarySlotName) != 0)
+		ereport(FATAL,
+				(errcode(ERRCODE_ADMIN_SHUTDOWN),
+				 errmsg("terminating walreceiver process due to change of primary_slot_name"),
+				 errdetail("In a moment starts streaming WAL with new configuration.")));
+
+	XLogWalRcvSendHSFeedback(true);
+}
+
 /*
  * Wake up the walreceiver main loop.
  *
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 8681ada33a..ab3cff2620 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3484,7 +3484,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3495,7 +3495,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index beb45551a2..07ac9642ba 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 26;
+use Test::More tests => 27;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -146,7 +146,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -282,3 +284,21 @@ is($catalog_xmin, '',
 is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
#21Andres Freund
andres@anarazel.de
In reply to: Sergei Kornilov (#20)
Re: allow online change primary_conninfo

Hi,

On 2019-02-03 15:33:38 +0300, Sergei Kornilov wrote:

+/*
+ * Actual processing SIGHUP signal
+ */
+static void
+ProcessWalRcvSigHup(void)
+{
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * If primary_conninfo has been changed while walreceiver is running,
+	 * shut down walreceiver so that a new walreceiver is started and
+	 * initiates replication with the new connection information.
+	 */
+	if (strcmp(current_conninfo, PrimaryConnInfo) != 0)
+		ereport(FATAL,
+				(errcode(ERRCODE_ADMIN_SHUTDOWN),
+				 errmsg("terminating walreceiver process due to change of primary_conninfo"),
+				 errdetail("In a moment starts streaming WAL with new configuration.")));
+
+	/*
+	 * And the same for primary_slot_name.
+	 */
+	if (strcmp(current_slotname, PrimarySlotName) != 0)
+		ereport(FATAL,
+				(errcode(ERRCODE_ADMIN_SHUTDOWN),
+				 errmsg("terminating walreceiver process due to change of primary_slot_name"),
+				 errdetail("In a moment starts streaming WAL with new configuration.")));
+
+	XLogWalRcvSendHSFeedback(true);
+}

I don't quite think this is the right design. IMO the startup process
should signal the walreceiver to shut down, and the wal receiver should
never look at the config. Otherwise there's chances for knowledge of
pg.conf to differ between the processes.

Greetings,

Andres Freund

In reply to: Andres Freund (#21)
1 attachment(s)
Re: allow online change primary_conninfo

Hi

I don't quite think this is the right design. IMO the startup process
should signal the walreceiver to shut down, and the wal receiver should
never look at the config.

Ok, i can rewrite such way. Please check attached version.

Otherwise there's chances for knowledge of
pg.conf to differ between the processes.

I still not understood why this:
- is not issue for startup process with all primary_conninfo logic
- but issue for walreceiver with Michael Paquier patch; therefore without any primary_conninfo change logic in startup.
In both cases primary_conninfo change logic is only in one process.

regards, Sergei

Attachments:

reload_walreceiver_conninfo_v005.patchtext/x-diff; name=reload_walreceiver_conninfo_v005.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 8bd57f376b..d7b75e462a 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3838,9 +3838,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          WAL receiver will be restarted after <varname>primary_conninfo</varname>
+          was changed.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -3855,9 +3860,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          WAL receiver will be restarted after <varname>primary_slot_name</varname>
+          was changed.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ecd12fc53a..6a64a27a51 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -12137,6 +12137,36 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+void
+ProcessStartupSigHup(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * we need shutdown running walreceiver if replication settings was
+	 * changed. walreceiver will be started with new settings in next
+	 * WaitForWALToBecomeAvailable iteration
+	 */
+	if ((strcmp(conninfo, PrimaryConnInfo) != 0 ||
+		 strcmp(slotname, PrimarySlotName) != 0) &&
+		WalRcvRunning())
+	{
+		ereport(LOG,
+				(errmsg("terminating walreceiver process due to change of %s",
+						strcmp(conninfo, PrimaryConnInfo) != 0 ? "primary_conninfo" : "primary_slot_name"),
+				 errdetail("In a moment starts streaming WAL with new configuration.")));
+
+		ShutdownWalRcv();
+		lastSourceFailed = true;
+	}
+
+	pfree(conninfo);
+	pfree(slotname);
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 5048a2c2aa..9bf5c792fe 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -147,7 +147,7 @@ HandleStartupProcInterrupts(void)
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		ProcessStartupSigHup();
 	}
 
 	/*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 156d147c85..87bd7443ef 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3485,7 +3485,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3496,7 +3496,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index f90a6a9139..75ec605a3f 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -318,6 +318,7 @@ extern void SetWalWriterSleeping(bool sleeping);
 
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index beb45551a2..07ac9642ba 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 26;
+use Test::More tests => 27;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -146,7 +146,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -282,3 +284,21 @@ is($catalog_xmin, '',
 is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
#23Michael Paquier
michael@paquier.xyz
In reply to: Sergei Kornilov (#22)
Re: allow online change primary_conninfo

On Sat, Feb 16, 2019 at 02:50:35PM +0300, Sergei Kornilov wrote:

+	if ((strcmp(conninfo, PrimaryConnInfo) != 0 ||
+		 strcmp(slotname, PrimarySlotName) != 0) &&
+		WalRcvRunning())
+	{
+		ereport(LOG,
+				(errmsg("terminating walreceiver process due to change of %s",
+						strcmp(conninfo, PrimaryConnInfo) != 0 ? "primary_conninfo" : "primary_slot_name"),
+				 errdetail("In a moment starts streaming WAL with new configuration.")));
+
+		ShutdownWalRcv();
+		lastSourceFailed = true;
+	}

If you do that, the startup process would try to jump to a different
source to fetch WAL if archiving is enabled. Is that really what we
want? It would be nice to have one message for primary_conninfo being
updated, and one for primary_slot_name so as if both are changed at
the same time the user gets the correct information.

"In a moment starts streaming WAL with new configuration." sounds
strange. It would be more natural to have something like "The WAL
receiver is going to be restarted with the new configuration", just a
suggestion.
--
Michael

In reply to: Michael Paquier (#23)
1 attachment(s)
Re: allow online change primary_conninfo

Hi

If you do that, the startup process would try to jump to a different
source to fetch WAL if archiving is enabled. Is that really what we
want?

Yes, similar behavior with exit walreceiver by any reason. Same as in all previous patch versions.
(not sure this can be changed in some small and elegant way)

It would be nice to have one message for primary_conninfo being
updated, and one for primary_slot_name so as if both are changed at
the same time the user gets the correct information.

Hm, maybe even better would be separate message if both settings are changed? Doing this in attached patch.

"In a moment starts streaming WAL with new configuration." sounds
strange. It would be more natural to have something like "The WAL
receiver is going to be restarted with the new configuration", just a
suggestion.

This phrase was proposed here: /messages/by-id/20181213.184233.215171075.horiguchi.kyotaro@lab.ntt.co.jp
My english is poor, so i added message just as proposed. I replaced to your variant.

regards, Sergei

Attachments:

reload_walreceiver_conninfo_v006.patchtext/x-diff; name=reload_walreceiver_conninfo_v006.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 8bd57f376b..d7b75e462a 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3838,9 +3838,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          WAL receiver will be restarted after <varname>primary_conninfo</varname>
+          was changed.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -3855,9 +3860,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          WAL receiver will be restarted after <varname>primary_slot_name</varname>
+          was changed.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ecd12fc53a..8a77ebb0d3 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -12137,6 +12137,44 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+void
+ProcessStartupSigHup(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * we need shutdown running walreceiver if replication settings was
+	 * changed. walreceiver will be started with new settings in next
+	 * WaitForWALToBecomeAvailable iteration
+	 */
+	conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
+	slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
+
+	if ((conninfoChanged || slotnameChanged) && WalRcvRunning())
+	{
+		if (conninfoChanged && slotnameChanged)
+			ereport(LOG,
+					(errmsg("terminating walreceiver process due to change of primary_conninfo and primary_slot_name"),
+					 errdetail("The WAL receiver is going to be restarted with the new configuration")));
+		else
+			ereport(LOG,
+					(errmsg("terminating walreceiver process due to change of %s",
+							conninfoChanged ? "primary_conninfo" : "primary_slot_name"),
+					 errdetail("The WAL receiver is going to be restarted with the new configuration")));
+
+		ShutdownWalRcv();
+		lastSourceFailed = true;
+	}
+
+	pfree(conninfo);
+	pfree(slotname);
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 5048a2c2aa..9bf5c792fe 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -147,7 +147,7 @@ HandleStartupProcInterrupts(void)
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		ProcessStartupSigHup();
 	}
 
 	/*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 156d147c85..87bd7443ef 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3485,7 +3485,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3496,7 +3496,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index f90a6a9139..75ec605a3f 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -318,6 +318,7 @@ extern void SetWalWriterSleeping(bool sleeping);
 
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index beb45551a2..07ac9642ba 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 26;
+use Test::More tests => 27;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -146,7 +146,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -282,3 +284,21 @@ is($catalog_xmin, '',
 is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
#25Andres Freund
andres@anarazel.de
In reply to: Sergei Kornilov (#22)
Re: allow online change primary_conninfo

Hi,

On 2019-02-16 14:50:35 +0300, Sergei Kornilov wrote:

I don't quite think this is the right design. IMO the startup process
should signal the walreceiver to shut down, and the wal receiver should
never look at the config.

Ok, i can rewrite such way. Please check attached version.

Otherwise there's chances for knowledge of
pg.conf to differ between the processes.

I still not understood why this:
- is not issue for startup process with all primary_conninfo logic
- but issue for walreceiver with Michael Paquier patch; therefore without any primary_conninfo change logic in startup.
In both cases primary_conninfo change logic is only in one process.

regards, Sergei

For one, it's not ok to just let the startup process think that the
walreceiver failed - that'll make it change source of WAL to the next
available method. Something we definitely do not want, as
restore_command is very commonly slower.

But just as importantly, I think, is that we ought to automate
cluster-wide tasks like failing over more inside postgres. And that's
much harder if different parts of PG, say the startup process and
walreceiver, have different assumptions about the current configuration.

+void
+ProcessStartupSigHup(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * we need shutdown running walreceiver if replication settings was
+	 * changed. walreceiver will be started with new settings in next
+	 * WaitForWALToBecomeAvailable iteration
+	 */
+	if ((strcmp(conninfo, PrimaryConnInfo) != 0 ||
+		 strcmp(slotname, PrimarySlotName) != 0) &&
+		WalRcvRunning())
+	{
+		ereport(LOG,
+				(errmsg("terminating walreceiver process due to change of %s",
+						strcmp(conninfo, PrimaryConnInfo) != 0 ? "primary_conninfo" : "primary_slot_name"),
+				 errdetail("In a moment starts streaming WAL with new configuration.")));
+
+		ShutdownWalRcv();
+		lastSourceFailed = true;
+	}
+
+	pfree(conninfo);
+	pfree(slotname);
+}

That'd still switch to a different method, something we do not want...

Greetings,

Andres Freund

In reply to: Andres Freund (#25)
Re: allow online change primary_conninfo

Hi

For one, it's not ok to just let the startup process think that the
walreceiver failed - that'll make it change source of WAL to the next
available method. Something we definitely do not want, as
restore_command is very commonly slower.

Ok. This was not mentioned before Michael response yesterday. restore_command is much faster compared to database restart, also switch to a different method was proposed few years ago by Simon Riggs in original change recovery.conf proposal ( /messages/by-id/CANP8+jLO5fmfudbB1b1iw3pTdOK1HBM=xMTaRfOa5zpDVcqzew@mail.gmail.com/ ). I assumed we can start with this. Sorry for your wasted time.

That'd still switch to a different method, something we do not want...

Ok, do not want means we do not want. Will try change behavior.

regards, Sergei

#27Michael Paquier
michael@paquier.xyz
In reply to: Sergei Kornilov (#26)
Re: allow online change primary_conninfo

On Mon, Feb 18, 2019 at 12:06:05AM +0300, Sergei Kornilov wrote:

Ok. This was not mentioned before Michael response
yesterday. restore_command is much faster compared to database
restart [...]

The startup process would not switch back to streaming with archiving
enabled until it has consumed all the segments available. I recall
David Steele mentioning me that a perl command invocation can take
easily 100ms. On a system which generates more than 10 segments per
segment, you can guess what happens.. I think that this argument is a
non-starter if we want to make the WAL receiver a maximum available.

That'd still switch to a different method, something we do not want...

Ok, do not want means we do not want. Will try change behavior.

Thanks Sergei for considering it. The code paths touched are
sensitive, so we had better be careful here.
--
Michael

In reply to: Andres Freund (#25)
2 attachment(s)
Re: allow online change primary_conninfo

Hello

This might be not the right way, but I can't think of a better way to not switch to a different method than split of lastSourceFailed processing and starting new source. Something like refactoring in first attached patch. I move RequestXLogStreaming logic from lastSourceFailed processing and add new pendingRestartSource indicator.
Second patch is mostly the same as previous version but uses new pendingRestartSource mechanism instead of lastSourceFailed.

Thanks in advance!

regards, Sergei

Attachments:

0001_refactor_WaitForWALToBecomeAvailable_to_graceful_restart_source.patchtext/x-diff; name=0001_refactor_WaitForWALToBecomeAvailable_to_graceful_restart_source.patchDownload
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ecd12fc53a..daddaeb236 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -798,10 +798,11 @@ static XLogSource readSource = 0;	/* XLOG_FROM_* code */
  * different from readSource in that this is always set, even when we don't
  * currently have a WAL file open. If lastSourceFailed is set, our last
  * attempt to read from currentSource failed, and we should try another source
- * next.
+ * next. If pendingRestartSource is set we want restart current source
  */
 static XLogSource currentSource = 0;	/* XLOG_FROM_* code */
 static bool lastSourceFailed = false;
+static bool pendingRestartSource = false;
 
 typedef struct XLogPageReadPrivate
 {
@@ -11828,48 +11829,6 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					if (!StandbyMode)
 						return false;
 
-					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
-						receivedUpto = 0;
-					}
-
 					/*
 					 * Move to XLOG_FROM_STREAM state in either case. We'll
 					 * get immediate failure if we didn't launch walreceiver,
@@ -11969,10 +11928,83 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 				 lastSourceFailed ? "failure" : "success");
 
 		/*
-		 * We've now handled possible failure. Try to read from the chosen
-		 * source.
+		 * Prepare to read from the chosen source if we asked restart source
+		 * or last source was failed
+		 */
+		if (pendingRestartSource || lastSourceFailed)
+		{
+			/*
+			 * make sure that walreceiver is not active. this is needed for
+			 * all supported sources
+			 */
+			if (WalRcvRunning())
+				ShutdownWalRcv();
+
+			switch (currentSource)
+			{
+				case XLOG_FROM_ARCHIVE:
+				case XLOG_FROM_PG_WAL:
+
+					/*
+					 * We do not need additional actions here
+					 */
+					break;
+
+				case XLOG_FROM_STREAM:
+
+					/*
+					 * If primary_conninfo is set, launch walreceiver to try
+					 * to stream the missing WAL.
+					 *
+					 * If fetching_ckpt is true, RecPtr points to the initial
+					 * checkpoint location. In that case, we use RedoStartLSN
+					 * as the streaming start position instead of RecPtr, so
+					 * that when we later jump backwards to start redo at
+					 * RedoStartLSN, we will have the logs streamed already.
+					 */
+					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+					{
+						XLogRecPtr	ptr;
+						TimeLineID	tli;
+
+						if (fetching_ckpt)
+						{
+							ptr = RedoStartLSN;
+							tli = ControlFile->checkPointCopy.ThisTimeLineID;
+						}
+						else
+						{
+							ptr = RecPtr;
+
+							/*
+							 * Use the record begin position to determine the
+							 * TLI, rather than the position we're reading.
+							 */
+							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+							if (curFileTLI > 0 && tli < curFileTLI)
+								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+									 (uint32) (tliRecPtr >> 32),
+									 (uint32) tliRecPtr,
+									 tli, curFileTLI);
+						}
+						curFileTLI = tli;
+						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+											 PrimarySlotName);
+						receivedUpto = 0;
+					}
+					break;
+				default:
+					elog(ERROR, "unexpected WAL source %d", currentSource);
+			}
+		}
+
+		/*
+		 * We've now handled possible failure and pending source restart. Try
+		 * to read from the chosen source.
 		 */
 		lastSourceFailed = false;
+		pendingRestartSource = false;
 
 		switch (currentSource)
 		{
0002_restart_stream_source_on_conf_change.patchtext/x-diff; name=0002_restart_stream_source_on_conf_change.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 6d42b7afe7..bf37cc4637 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3839,9 +3839,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          WAL receiver will be restarted after <varname>primary_conninfo</varname>
+          was changed.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -3856,9 +3861,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          WAL receiver will be restarted after <varname>primary_slot_name</varname>
+          was changed.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index daddaeb236..156954b7d9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -12169,6 +12169,42 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+void
+ProcessStartupSigHup(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * We need restart XLOG_FROM_STREAM source if replication settings was
+	 * changed
+	 */
+	conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
+	slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
+
+	if ((conninfoChanged || slotnameChanged) &&
+		currentSource == XLOG_FROM_STREAM
+		&& WalRcvRunning())
+	{
+		if (conninfoChanged && slotnameChanged)
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of primary_conninfo and primary_slot_name")));
+		else
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s",
+							conninfoChanged ? "primary_conninfo" : "primary_slot_name")));
+
+		pendingRestartSource = true;
+	}
+
+	pfree(conninfo);
+	pfree(slotname);
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 5048a2c2aa..9bf5c792fe 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -147,7 +147,7 @@ HandleStartupProcInterrupts(void)
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		ProcessStartupSigHup();
 	}
 
 	/*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 156d147c85..87bd7443ef 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3485,7 +3485,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3496,7 +3496,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index f90a6a9139..75ec605a3f 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -318,6 +318,7 @@ extern void SetWalWriterSleeping(bool sleeping);
 
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index beb45551a2..07ac9642ba 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 26;
+use Test::More tests => 27;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -146,7 +146,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -282,3 +284,21 @@ is($catalog_xmin, '',
 is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
#29Michael Paquier
michael@paquier.xyz
In reply to: Sergei Kornilov (#28)
Re: allow online change primary_conninfo

On Sat, Mar 02, 2019 at 01:49:51PM +0300, Sergei Kornilov wrote:

This might be not the right way, but I can't think of a better way
to not switch to a different method than split of lastSourceFailed
processing and starting new source. Something like refactoring in
first attached patch. I move RequestXLogStreaming logic from
lastSourceFailed processing and add new pendingRestartSource
indicator.

OK. This needs a rather close lookup, and I am not actually sure that
you even need pendingRestartSource. Please let me a couple of days
before coming back to you on 0001.

Second patch is mostly the same as previous version but uses new
pendingRestartSource mechanism instead of lastSourceFailed.

This, on the other hand, looks like the implementation we are looking
for based on the discussions which happened until now to have the
startup process handle the shutdown and restart of the WAL receiver.
--
Michael

#30David Steele
david@pgmasters.net
In reply to: Michael Paquier (#29)
Re: Re: allow online change primary_conninfo

Hi Michael,

On 3/4/19 7:19 AM, Michael Paquier wrote:

On Sat, Mar 02, 2019 at 01:49:51PM +0300, Sergei Kornilov wrote:

This might be not the right way, but I can't think of a better way
to not switch to a different method than split of lastSourceFailed
processing and starting new source. Something like refactoring in
first attached patch. I move RequestXLogStreaming logic from
lastSourceFailed processing and add new pendingRestartSource
indicator.

OK. This needs a rather close lookup, and I am not actually sure that
you even need pendingRestartSource. Please let me a couple of days
before coming back to you on 0001.

Second patch is mostly the same as previous version but uses new
pendingRestartSource mechanism instead of lastSourceFailed.

This, on the other hand, looks like the implementation we are looking
for based on the discussions which happened until now to have the
startup process handle the shutdown and restart of the WAL receiver.

Do you know when you'll have a chance to revisit this?

Regards,
--
-David
david@pgmasters.net

#31Andres Freund
andres@anarazel.de
In reply to: David Steele (#30)
Re: allow online change primary_conninfo

Hi,

On 2019-03-25 12:32:21 +0400, David Steele wrote:

On 3/4/19 7:19 AM, Michael Paquier wrote:

On Sat, Mar 02, 2019 at 01:49:51PM +0300, Sergei Kornilov wrote:

This might be not the right way, but I can't think of a better way
to not switch to a different method than split of lastSourceFailed
processing and starting new source. Something like refactoring in
first attached patch. I move RequestXLogStreaming logic from
lastSourceFailed processing and add new pendingRestartSource
indicator.

OK. This needs a rather close lookup, and I am not actually sure that
you even need pendingRestartSource. Please let me a couple of days
before coming back to you on 0001.

Second patch is mostly the same as previous version but uses new
pendingRestartSource mechanism instead of lastSourceFailed.

This, on the other hand, looks like the implementation we are looking
for based on the discussions which happened until now to have the
startup process handle the shutdown and restart of the WAL receiver.

Do you know when you'll have a chance to revisit this?

I think we unfortunately got to mark this as returned with
feedback. I've not done so, but just switched the entry to waiting on
author.

Greetings,

Andres Freund

In reply to: Andres Freund (#31)
Re: allow online change primary_conninfo

Hi

I think we unfortunately got to mark this as returned with
feedback. I've not done so, but just switched the entry to waiting on
author.

Why returned with feedback? Why waiting on author? I didn't receive a feedback for latest published patch version. What can I do as author? Patch still applied (thanks cf bot)
Obviously too late for pg12, but why can not be target pg13 and therefore be moved to next CF?

regards, Sergei

#33Andres Freund
andres@anarazel.de
In reply to: Sergei Kornilov (#32)
Re: allow online change primary_conninfo

Hi,

On 2019-04-03 23:49:54 +0300, Sergei Kornilov wrote:

I think we unfortunately got to mark this as returned with
feedback. I've not done so, but just switched the entry to waiting on
author.

Why returned with feedback? Why waiting on author? I didn't receive a
feedback for latest published patch version. What can I do as author?
Patch still applied (thanks cf bot) Obviously too late for pg12, but
why can not be target pg13 and therefore be moved to next CF?

Well, my impression was that the patch didn't yet really address the
feedback. And thus should have been marked as waiting on author for a
while.

Greetings,

Andres Freund

In reply to: Andres Freund (#33)
Re: allow online change primary_conninfo

Hi

 > I think we unfortunately got to mark this as returned with
 > feedback. I've not done so, but just switched the entry to waiting on
 > author.

 Why returned with feedback? Why waiting on author? I didn't receive a
 feedback for latest published patch version. What can I do as author?
 Patch still applied (thanks cf bot) Obviously too late for pg12, but
 why can not be target pg13 and therefore be moved to next CF?

Well, my impression was that the patch didn't yet really address the
feedback. And thus should have been marked as waiting on author for a
while.

Not agree. Latest patch version perform walreceiver restart without switch to a different method as discussed. Here is no race condition between startup process and walreceiver because conninfo passed via WalRcvData struct as currently. I miss something important?
Michael Paquier had no possibility to review latest implementation, but did not say this is totally wrong, just asked wait a rather close lookup.

Of cource we can close this cf entry. I would be happy if someone else post proper implementation. And I can rework my implementation again, but I don’t know how the correct implementation should look or why latest implementation is wrong.

regards, Sergei

#35Michael Paquier
michael@paquier.xyz
In reply to: Sergei Kornilov (#34)
Re: allow online change primary_conninfo

On Thu, Apr 04, 2019 at 12:27:55AM +0300, Sergei Kornilov wrote:

Not agree. Latest patch version perform walreceiver restart without
switch to a different method as discussed. Here is no race condition
between startup process and walreceiver because conninfo passed via
WalRcvData struct as currently. I miss something important?
Michael Paquier had no possibility to review latest implementation,
but did not say this is totally wrong, just asked wait a rather
close lookup.

I agree with Sergei's statement here. He has sent a patch for review,
which I have looked up a bit, but not enough to provide a full review
unfortunately, and this even if I was listed as a reviewer of it. So
if somebody is to blame here, that's me.

Of cource we can close this cf entry. I would be happy if someone
else post proper implementation. And I can rework my implementation
again, but I don’t know how the correct implementation should look
or why latest implementation is wrong.

Moving this patch to next CF is fine.
--
Michael

#36Andres Freund
andres@anarazel.de
In reply to: Michael Paquier (#35)
Re: allow online change primary_conninfo

On 2019-04-04 11:06:05 +0900, Michael Paquier wrote:

On Thu, Apr 04, 2019 at 12:27:55AM +0300, Sergei Kornilov wrote:

Not agree. Latest patch version perform walreceiver restart without
switch to a different method as discussed. Here is no race condition
between startup process and walreceiver because conninfo passed via
WalRcvData struct as currently. I miss something important?
Michael Paquier had no possibility to review latest implementation,
but did not say this is totally wrong, just asked wait a rather
close lookup.

I agree with Sergei's statement here. He has sent a patch for review,
which I have looked up a bit, but not enough to provide a full review
unfortunately, and this even if I was listed as a reviewer of it. So
if somebody is to blame here, that's me.

Of cource we can close this cf entry. I would be happy if someone
else post proper implementation. And I can rework my implementation
again, but I don’t know how the correct implementation should look
or why latest implementation is wrong.

Moving this patch to next CF is fine.

Cool, sorry for the misunderstanding then.

In reply to: Michael Paquier (#35)
2 attachment(s)
Re: allow online change primary_conninfo

Hello

Updated version attached. Merge conflict was about tests count in 001_stream_rep.pl. Nothing else was changed. My approach can be still incorrect, any redesign ideas are welcome. Thanks in advance!

regards, Sergei

Attachments:

0002_v2_restart_stream_source_on_conf_change.patchtext/x-diff; name=0002_v2_restart_stream_source_on_conf_change.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 84341a30e5..054be17e08 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3916,9 +3916,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          WAL receiver will be restarted after <varname>primary_conninfo</varname>
+          was changed.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -3933,9 +3938,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          WAL receiver will be restarted after <varname>primary_slot_name</varname>
+          was changed.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 3e2c4e3e5b..964989432c 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -12125,6 +12125,42 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+void
+ProcessStartupSigHup(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * We need restart XLOG_FROM_STREAM source if replication settings was
+	 * changed
+	 */
+	conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
+	slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
+
+	if ((conninfoChanged || slotnameChanged) &&
+		currentSource == XLOG_FROM_STREAM
+		&& WalRcvRunning())
+	{
+		if (conninfoChanged && slotnameChanged)
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of primary_conninfo and primary_slot_name")));
+		else
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s",
+							conninfoChanged ? "primary_conninfo" : "primary_slot_name")));
+
+		pendingRestartSource = true;
+	}
+
+	pfree(conninfo);
+	pfree(slotname);
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 5048a2c2aa..9bf5c792fe 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -147,7 +147,7 @@ HandleStartupProcInterrupts(void)
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		ProcessStartupSigHup();
 	}
 
 	/*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 92c4fee8f8..e54d8e7172 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3571,7 +3571,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3582,7 +3582,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index d519252aad..9e49020b19 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -320,6 +320,7 @@ extern void SetWalWriterSleeping(bool sleeping);
 
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 3c743d7d7c..ae80f4df3a 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 32;
+use Test::More tests => 33;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -344,3 +346,21 @@ is($catalog_xmin, '',
 is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
0001_v2_refactor_WaitForWALToBecomeAvailable_to_graceful_restart_source.patchtext/x-diff; name=0001_v2_refactor_WaitForWALToBecomeAvailable_to_graceful_restart_source.patchDownload
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 13e0d2366f..3e2c4e3e5b 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -798,10 +798,11 @@ static XLogSource readSource = 0;	/* XLOG_FROM_* code */
  * different from readSource in that this is always set, even when we don't
  * currently have a WAL file open. If lastSourceFailed is set, our last
  * attempt to read from currentSource failed, and we should try another source
- * next.
+ * next. If pendingRestartSource is set we want restart current source
  */
 static XLogSource currentSource = 0;	/* XLOG_FROM_* code */
 static bool lastSourceFailed = false;
+static bool pendingRestartSource = false;
 
 typedef struct XLogPageReadPrivate
 {
@@ -11784,48 +11785,6 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					if (!StandbyMode)
 						return false;
 
-					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
-						receivedUpto = 0;
-					}
-
 					/*
 					 * Move to XLOG_FROM_STREAM state in either case. We'll
 					 * get immediate failure if we didn't launch walreceiver,
@@ -11925,10 +11884,83 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 				 lastSourceFailed ? "failure" : "success");
 
 		/*
-		 * We've now handled possible failure. Try to read from the chosen
-		 * source.
+		 * Prepare to read from the chosen source if we asked restart source
+		 * or last source was failed
+		 */
+		if (pendingRestartSource || lastSourceFailed)
+		{
+			/*
+			 * make sure that walreceiver is not active. this is needed for
+			 * all supported sources
+			 */
+			if (WalRcvRunning())
+				ShutdownWalRcv();
+
+			switch (currentSource)
+			{
+				case XLOG_FROM_ARCHIVE:
+				case XLOG_FROM_PG_WAL:
+
+					/*
+					 * We do not need additional actions here
+					 */
+					break;
+
+				case XLOG_FROM_STREAM:
+
+					/*
+					 * If primary_conninfo is set, launch walreceiver to try
+					 * to stream the missing WAL.
+					 *
+					 * If fetching_ckpt is true, RecPtr points to the initial
+					 * checkpoint location. In that case, we use RedoStartLSN
+					 * as the streaming start position instead of RecPtr, so
+					 * that when we later jump backwards to start redo at
+					 * RedoStartLSN, we will have the logs streamed already.
+					 */
+					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+					{
+						XLogRecPtr	ptr;
+						TimeLineID	tli;
+
+						if (fetching_ckpt)
+						{
+							ptr = RedoStartLSN;
+							tli = ControlFile->checkPointCopy.ThisTimeLineID;
+						}
+						else
+						{
+							ptr = RecPtr;
+
+							/*
+							 * Use the record begin position to determine the
+							 * TLI, rather than the position we're reading.
+							 */
+							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+							if (curFileTLI > 0 && tli < curFileTLI)
+								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+									 (uint32) (tliRecPtr >> 32),
+									 (uint32) tliRecPtr,
+									 tli, curFileTLI);
+						}
+						curFileTLI = tli;
+						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+											 PrimarySlotName);
+						receivedUpto = 0;
+					}
+					break;
+				default:
+					elog(ERROR, "unexpected WAL source %d", currentSource);
+			}
+		}
+
+		/*
+		 * We've now handled possible failure and pending source restart. Try
+		 * to read from the chosen source.
 		 */
 		lastSourceFailed = false;
+		pendingRestartSource = false;
 
 		switch (currentSource)
 		{
#38Michael Paquier
michael@paquier.xyz
In reply to: Sergei Kornilov (#37)
Re: allow online change primary_conninfo

On Mon, Jul 01, 2019 at 02:33:39PM +0300, Sergei Kornilov wrote:

Updated version attached. Merge conflict was about tests count in
001_stream_rep.pl. Nothing else was changed. My approach can be
still incorrect, any redesign ideas are welcome. Thanks in advance!

It has been some time, and I am finally catching up with this patch.

+         <para>
+          WAL receiver will be restarted after <varname>primary_slot_name</varname>
+          was changed.
          </para>
The sentence sounds strange.  Here is a suggestion:
The WAL receiver is restarted after an update of primary_slot_name (or
primary_conninfo).

The comment at the top of the call of ProcessStartupSigHup() in
HandleStartupProcInterrupts() needs to be updated as it mentions a
configuration file re-read, but that's not the case anymore.

pendingRestartSource's name does not represent what it does, as it is
only associated with the restart of a WAL receiver when in streaming
state, and that's a no-op for the archive mode and the local mode.

So, the patch splits the steps taken when checking for a WAL source by
adding an extra step after the failure handling that you are calling
the restart step. When a failure happens for the stream mode
(shutdown of WAL receiver, promotion. etc), there is a switch to the
archive mode, and nothing is changed in this case in your patch. So
when shutting down the WAL receiver after a parameter change, what
happens is that the startup process waits for retrieve_retry_interval
before moving back to the archive mode. Only after scanning again the
archives do we restart a new WAL receiver. However, if the restart of
the WAL receiver is planned because of an update of primary_conninfo
(or slot), shouldn't the follow-up mode be XLOG_FROM_STREAM without
waiting for wal_retrieve_retry_interval ms for extra WAL to become
available?
--
Michael

#39Thomas Munro
thomas.munro@gmail.com
In reply to: Michael Paquier (#38)
Re: allow online change primary_conninfo

On Tue, Jul 30, 2019 at 6:42 PM Michael Paquier <michael@paquier.xyz> wrote:

On Mon, Jul 01, 2019 at 02:33:39PM +0300, Sergei Kornilov wrote:

Updated version attached. Merge conflict was about tests count in
001_stream_rep.pl. Nothing else was changed. My approach can be
still incorrect, any redesign ideas are welcome. Thanks in advance!

[review]

Unfortunately this comes right that the end of the CF, but the good
news is that there is another one just around the corner. Moved to
September.

--
Thomas Munro
https://enterprisedb.com

#40Michael Paquier
michael@paquier.xyz
In reply to: Thomas Munro (#39)
Re: allow online change primary_conninfo

On Thu, Aug 01, 2019 at 09:06:59PM +1200, Thomas Munro wrote:

Unfortunately this comes right that the end of the CF, but the good
news is that there is another one just around the corner. Moved to
September.

Moving it to the next CF is fine, thanks. The author had no time to
reply since my last lookup.
--
Michael

In reply to: Michael Paquier (#38)
Re: allow online change primary_conninfo

Hello

It has been some time, and I am finally catching up with this patch.

Thank you!

+ <para>
+ WAL receiver will be restarted after <varname>primary_slot_name</varname>
+ was changed.
          </para>
The sentence sounds strange. Here is a suggestion:
The WAL receiver is restarted after an update of primary_slot_name (or
primary_conninfo).

The comment at the top of the call of ProcessStartupSigHup() in
HandleStartupProcInterrupts() needs to be updated as it mentions a
configuration file re-read, but that's not the case anymore.

I agree. But I want to know if this is an acceptable idea in general, before spending a few more months polishing the text on something we don’t want.

pendingRestartSource's name does not represent what it does, as it is
only associated with the restart of a WAL receiver when in streaming
state, and that's a no-op for the archive mode and the local mode.

Currently we have only one source with such operation, so I can name variable pendingRestartWalreceiver and replace corresponding switch to compact condition. If someday we need additional actions for another wal source - we can replace this code. Looks like some form of premature optimization from me

However, if the restart of
the WAL receiver is planned because of an update of primary_conninfo
(or slot), shouldn't the follow-up mode be XLOG_FROM_STREAM without
waiting for wal_retrieve_retry_interval ms for extra WAL to become
available?

Hmm. Standby with log_min_messages = debug2 and default wal_retrieve_retry_interval = 5s gives me:

2019-08-01 12:38:31.255 MSK 15707 @ from [vxid: txid:0] [] DEBUG: sendtime 2019-08-01 12:38:31.254905+03 receipttime 2019-08-01 12:38:31.254986+03 replication apply delay 0 ms transfer latency 0 ms
2019-08-01 12:38:31.255 MSK 15707 @ from [vxid: txid:0] [] DEBUG: sending write 0/3023FA8 flush 0/3023F38 apply 0/3023F38
2019-08-01 12:38:31.262 MSK 15707 @ from [vxid: txid:0] [] DEBUG: sending write 0/3023FA8 flush 0/3023FA8 apply 0/3023F38
2019-08-01 12:38:31.262 MSK 15707 @ from [vxid: txid:0] [] DEBUG: sending write 0/3023FA8 flush 0/3023FA8 apply 0/3023FA8
2019-08-01 12:38:31.457 MSK 15699 @ from [vxid: txid:0] [] LOG: received SIGHUP, reloading configuration files
2019-08-01 12:38:31.458 MSK 15699 @ from [vxid: txid:0] [] LOG: parameter "primary_conninfo" changed to " host='/tmp' port=5555"
2019-08-01 12:38:31.459 MSK 15700 @ from [vxid:1/0 txid:0] [] LOG: The WAL receiver is going to be restarted due to change of primary_conninfo
2019-08-01 12:38:31.459 MSK 15703 @ from [vxid: txid:0] [] DEBUG: checkpointer updated shared memory configuration values
2019-08-01 12:38:31.459 MSK 15707 @ from [vxid: txid:0] [] FATAL: terminating walreceiver process due to administrator command
2019-08-01 12:38:31.463 MSK 15724 @ from [vxid: txid:0] [] LOG: started streaming WAL from primary at 0/3000000 on timeline 1
2019-08-01 12:38:31.464 MSK 15724 @ from [vxid: txid:0] [] DEBUG: sendtime 2019-08-01 12:38:31.463954+03 receipttime 2019-08-01 12:38:31.464228+03 replication apply delay 0 ms transfer latency 0 ms
2019-08-01 12:38:31.464 MSK 15724 @ from [vxid: txid:0] [] DEBUG: sendtime 2019-08-01 12:38:31.464068+03 receipttime 2019-08-01 12:38:31.464375+03 replication apply delay 0 ms transfer latency 0 ms
2019-08-01 12:38:31.464 MSK 15724 @ from [vxid: txid:0] [] DEBUG: sending write 0/3023FA8 flush 0/3023FA8 apply 0/3023FA8

No source switch, no wal_retrieve_retry_interval waiting. (wal writes on primary by \watch 0.3 query)
RequestXLogStreaming set walRcvState to WALRCV_STARTING - correct state for WalRcvStreaming and therefore we have no lastSourceFailed.

regards, Sergei

In reply to: Michael Paquier (#38)
1 attachment(s)
Re: allow online change primary_conninfo

Hello

Updated patch attached. (also I merged into one file)

+ <para>
+ WAL receiver will be restarted after <varname>primary_slot_name</varname>
+ was changed.
          </para>
The sentence sounds strange. Here is a suggestion:
The WAL receiver is restarted after an update of primary_slot_name (or
primary_conninfo).

Changed.

The comment at the top of the call of ProcessStartupSigHup() in
HandleStartupProcInterrupts() needs to be updated as it mentions a
configuration file re-read, but that's not the case anymore.

Changed to "Process any requests or signals received recently." like in other places, e.g. syslogger

pendingRestartSource's name does not represent what it does, as it is
only associated with the restart of a WAL receiver when in streaming
state, and that's a no-op for the archive mode and the local mode.

I renamed to pendingWalRcvRestart and replaced switch with simple condition.

So when shutting down the WAL receiver after a parameter change, what
happens is that the startup process waits for retrieve_retry_interval
before moving back to the archive mode. Only after scanning again the
archives do we restart a new WAL receiver.

As I answered earlier, here is no switch to archive or wal_retrieve_retry_interval waiting in my patch. I recheck on current revision too:

2019-08-28 12:16:27.295 MSK 11180 @ from [vxid: txid:0] [] DEBUG: sending write 0/30346C8 flush 0/30346C8 apply 0/30346C8
2019-08-28 12:16:27.493 MSK 11172 @ from [vxid: txid:0] [] LOG: received SIGHUP, reloading configuration files
2019-08-28 12:16:27.494 MSK 11172 @ from [vxid: txid:0] [] LOG: parameter "primary_conninfo" changed to "host='/tmp' port=5555 sslmode=disable sslcompression=0 gssencmode=disable target_session_attrs=any"
2019-08-28 12:16:27.496 MSK 11173 @ from [vxid:1/0 txid:0] [] LOG: The WAL receiver is going to be restarted due to change of primary_conninfo
2019-08-28 12:16:27.496 MSK 11176 @ from [vxid: txid:0] [] DEBUG: checkpointer updated shared memory configuration values
2019-08-28 12:16:27.496 MSK 11180 @ from [vxid: txid:0] [] FATAL: terminating walreceiver process due to administrator command
2019-08-28 12:16:27.500 MSK 11335 @ from [vxid: txid:0] [] LOG: started streaming WAL from primary at 0/3000000 on timeline 1
2019-08-28 12:16:27.500 MSK 11335 @ from [vxid: txid:0] [] DEBUG: sendtime 2019-08-28 12:16:27.50037+03 receipttime 2019-08-28 12:16:27.500821+03 replication apply delay 0 ms transfer latency 0 ms

No "DEBUG: switched WAL source from stream to archive after failure" messages, no time difference (wal_retrieve_retry_interval = 5s).

regards, Sergei

Attachments:

v3_allow_reload_walreceiver_conninfo.patchtext/x-diff; name=v3_allow_reload_walreceiver_conninfo.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 89284dc5c0..b36749fe91 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3929,9 +3929,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          The WAL receiver is restarted after an update of <varname>primary_conninfo</varname>.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -3946,9 +3950,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          The WAL receiver is restarted after an update of <varname>primary_slot_name</varname>.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index e651a841bb..4eed462f34 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -803,6 +803,12 @@ static XLogSource readSource = 0;	/* XLOG_FROM_* code */
 static XLogSource currentSource = 0;	/* XLOG_FROM_* code */
 static bool lastSourceFailed = false;
 
+/*
+ * Need for restart running WalReceiver due the configuration change.
+ * Suitable only for XLOG_FROM_STREAM source
+ */
+static bool pendingWalRcvRestart = false;
+
 typedef struct XLogPageReadPrivate
 {
 	int			emode;
@@ -11787,48 +11793,6 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					if (!StandbyMode)
 						return false;
 
-					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
-						receivedUpto = 0;
-					}
-
 					/*
 					 * Move to XLOG_FROM_STREAM state in either case. We'll
 					 * get immediate failure if we didn't launch walreceiver,
@@ -11928,10 +11892,66 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 				 lastSourceFailed ? "failure" : "success");
 
 		/*
-		 * We've now handled possible failure. Try to read from the chosen
-		 * source.
+		 * Request walreceiver to start if we switch from another source or if
+		 * we need to change walreceiver connection configuration.
+		 */
+		if (currentSource == XLOG_FROM_STREAM && (lastSourceFailed || pendingWalRcvRestart))
+		{
+			/*
+			 * Ensure walreceiver is not running
+			 */
+			if (WalRcvRunning())
+				ShutdownWalRcv();
+
+			/*
+			 * If primary_conninfo is set, launch walreceiver to try to stream
+			 * the missing WAL.
+			 *
+			 * If fetching_ckpt is true, RecPtr points to the initial
+			 * checkpoint location. In that case, we use RedoStartLSN as the
+			 * streaming start position instead of RecPtr, so that when we
+			 * later jump backwards to start redo at RedoStartLSN, we will
+			 * have the logs streamed already.
+			 */
+			if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+			{
+				XLogRecPtr	ptr;
+				TimeLineID	tli;
+
+				if (fetching_ckpt)
+				{
+					ptr = RedoStartLSN;
+					tli = ControlFile->checkPointCopy.ThisTimeLineID;
+				}
+				else
+				{
+					ptr = RecPtr;
+
+					/*
+					 * Use the record begin position to determine the TLI,
+					 * rather than the position we're reading.
+					 */
+					tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+					if (curFileTLI > 0 && tli < curFileTLI)
+						elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+							 (uint32) (tliRecPtr >> 32),
+							 (uint32) tliRecPtr,
+							 tli, curFileTLI);
+				}
+				curFileTLI = tli;
+				RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+									 PrimarySlotName);
+				receivedUpto = 0;
+			}
+		}
+
+		/*
+		 * We've now handled possible failure and configuration change. Try to
+		 * read from the chosen source.
 		 */
 		lastSourceFailed = false;
+		pendingWalRcvRestart = false;
 
 		switch (currentSource)
 		{
@@ -12096,6 +12116,45 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+/*
+ * Re-read config file and plan to restart running walreceiver if
+ * connection settings was changed.
+ */
+void
+ProcessStartupSigHup(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * We need restart walreceiver if replication settings was changed.
+	 */
+	conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
+	slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
+
+	if ((conninfoChanged || slotnameChanged) &&
+		currentSource == XLOG_FROM_STREAM
+		&& WalRcvRunning())
+	{
+		if (conninfoChanged && slotnameChanged)
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of primary_conninfo and primary_slot_name")));
+		else
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s",
+							conninfoChanged ? "primary_conninfo" : "primary_slot_name")));
+
+		pendingWalRcvRestart = true;
+	}
+
+	pfree(conninfo);
+	pfree(slotname);
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 5048a2c2aa..62492daf6b 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -142,12 +142,12 @@ void
 HandleStartupProcInterrupts(void)
 {
 	/*
-	 * Check if we were requested to re-read config file.
+	 * Process any requests or signals received recently.
 	 */
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		ProcessStartupSigHup();
 	}
 
 	/*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 90ffd89339..5ffe593517 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3558,7 +3558,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3569,7 +3569,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index d519252aad..9e49020b19 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -320,6 +320,7 @@ extern void SetWalWriterSleeping(bool sleeping);
 
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 3c743d7d7c..ae80f4df3a 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 32;
+use Test::More tests => 33;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -344,3 +346,21 @@ is($catalog_xmin, '',
 is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
#43Fujii Masao
masao.fujii@gmail.com
In reply to: Sergei Kornilov (#42)
Re: allow online change primary_conninfo

On Wed, Aug 28, 2019 at 6:50 PM Sergei Kornilov <sk@zsrv.org> wrote:

Hello

Updated patch attached. (also I merged into one file)

Thanks for updating the patch! Here are some comments from me.

#primary_conninfo = '' # connection string to sending server
# (change requires restart)
#primary_slot_name = '' # replication slot on sending server
# (change requires restart)

ISTM that you need to update the above parts in postgresql.conf.sample.

/* we don't expect primary_conninfo to change */

ISTM that you need to update the above comment in walreceiver.c.

+         <para>
+          The WAL receiver is restarted after an update of
<varname>primary_conninfo</varname>.
+         </para>

If primary_conninfo is set to an empty string, walreceiver just shuts down,
not restarts. The above description in the doc is not always true.
So I'm thinking something like the following description is better.
Thought?

If primary_conninfo is changed while WAL receiver is running,
the WAL receiver shuts down and then restarts with new setting,
except when primary_conninfo is an empty string.

There is the case where walreceiver doesn't shut down immediately
after the change of primary_conninfo. If the change happens while
the startup process in paused state (e.g., by pg_wal_replay_pause(),
recovery_min_apply_delay, recovery conflict, etc), the startup
process tries to terminate walreceiver after it gets out of such state.
Shouldn't this fact be documented as a note?

Regards,

--
Fujii Masao

In reply to: Fujii Masao (#43)
1 attachment(s)
Re: allow online change primary_conninfo

Hello

Thank you for review!

ISTM that you need to update the above parts in postgresql.conf.sample.

Good catch, I forgot about conf sample.

ISTM that you need to update the above comment in walreceiver.c.

Changed

If primary_conninfo is set to an empty string, walreceiver just shuts down,
not restarts. The above description in the doc is not always true.
So I'm thinking something like the following description is better.
Thought?

    If primary_conninfo is changed while WAL receiver is running,
    the WAL receiver shuts down and then restarts with new setting,
    except when primary_conninfo is an empty string.

Ok, changed.
I leave primary_slot_name description as before.

There is the case where walreceiver doesn't shut down immediately
after the change of primary_conninfo. If the change happens while
the startup process in paused state (e.g., by pg_wal_replay_pause(),
recovery_min_apply_delay, recovery conflict, etc), the startup
process tries to terminate walreceiver after it gets out of such state.
Shouldn't this fact be documented as a note?

Hmm. Is somewhere documented that walreceiver will receive WAL while the startup process in paused state?
(didn't add such note in current version)

regards, Sergei

Attachments:

v4_allow_reload_walreceiver_conninfo.patchtext/x-diff; name=v4_allow_reload_walreceiver_conninfo.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 6612f95f9f..1fa48058e8 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3929,9 +3929,15 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          If <varname>primary_conninfo</varname> is changed while WAL receiver is running,
+          the WAL receiver shuts down and then restarts with new setting,
+          except when primary_conninfo is an empty string.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -3946,9 +3952,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          The WAL receiver is restarted after an update of <varname>primary_slot_name</varname>.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index b7ff004234..743cae079e 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -803,6 +803,12 @@ static XLogSource readSource = 0;	/* XLOG_FROM_* code */
 static XLogSource currentSource = 0;	/* XLOG_FROM_* code */
 static bool lastSourceFailed = false;
 
+/*
+ * Need for restart running WalReceiver due the configuration change.
+ * Suitable only for XLOG_FROM_STREAM source
+ */
+static bool pendingWalRcvRestart = false;
+
 typedef struct XLogPageReadPrivate
 {
 	int			emode;
@@ -11787,48 +11793,6 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					if (!StandbyMode)
 						return false;
 
-					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
-						receivedUpto = 0;
-					}
-
 					/*
 					 * Move to XLOG_FROM_STREAM state in either case. We'll
 					 * get immediate failure if we didn't launch walreceiver,
@@ -11928,10 +11892,66 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 				 lastSourceFailed ? "failure" : "success");
 
 		/*
-		 * We've now handled possible failure. Try to read from the chosen
-		 * source.
+		 * Request walreceiver to start if we switch from another source or if
+		 * we need to change walreceiver connection configuration.
+		 */
+		if (currentSource == XLOG_FROM_STREAM && (lastSourceFailed || pendingWalRcvRestart))
+		{
+			/*
+			 * Ensure walreceiver is not running
+			 */
+			if (WalRcvRunning())
+				ShutdownWalRcv();
+
+			/*
+			 * If primary_conninfo is set, launch walreceiver to try to stream
+			 * the missing WAL.
+			 *
+			 * If fetching_ckpt is true, RecPtr points to the initial
+			 * checkpoint location. In that case, we use RedoStartLSN as the
+			 * streaming start position instead of RecPtr, so that when we
+			 * later jump backwards to start redo at RedoStartLSN, we will
+			 * have the logs streamed already.
+			 */
+			if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+			{
+				XLogRecPtr	ptr;
+				TimeLineID	tli;
+
+				if (fetching_ckpt)
+				{
+					ptr = RedoStartLSN;
+					tli = ControlFile->checkPointCopy.ThisTimeLineID;
+				}
+				else
+				{
+					ptr = RecPtr;
+
+					/*
+					 * Use the record begin position to determine the TLI,
+					 * rather than the position we're reading.
+					 */
+					tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+					if (curFileTLI > 0 && tli < curFileTLI)
+						elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+							 (uint32) (tliRecPtr >> 32),
+							 (uint32) tliRecPtr,
+							 tli, curFileTLI);
+				}
+				curFileTLI = tli;
+				RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+									 PrimarySlotName);
+				receivedUpto = 0;
+			}
+		}
+
+		/*
+		 * We've now handled possible failure and configuration change. Try to
+		 * read from the chosen source.
 		 */
 		lastSourceFailed = false;
+		pendingWalRcvRestart = false;
 
 		switch (currentSource)
 		{
@@ -12096,6 +12116,45 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+/*
+ * Re-read config file and plan to restart running walreceiver if
+ * connection settings was changed.
+ */
+void
+ProcessStartupSigHup(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * We need restart walreceiver if replication settings was changed.
+	 */
+	conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
+	slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
+
+	if ((conninfoChanged || slotnameChanged) &&
+		currentSource == XLOG_FROM_STREAM
+		&& WalRcvRunning())
+	{
+		if (conninfoChanged && slotnameChanged)
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of primary_conninfo and primary_slot_name")));
+		else
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s",
+							conninfoChanged ? "primary_conninfo" : "primary_slot_name")));
+
+		pendingWalRcvRestart = true;
+	}
+
+	pfree(conninfo);
+	pfree(slotname);
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 5048a2c2aa..62492daf6b 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -142,12 +142,12 @@ void
 HandleStartupProcInterrupts(void)
 {
 	/*
-	 * Check if we were requested to re-read config file.
+	 * Process any requests or signals received recently.
 	 */
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		ProcessStartupSigHup();
 	}
 
 	/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 6abc780778..59834b9df8 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -644,7 +644,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 			   walrcv->walRcvState == WALRCV_STOPPING);
 		if (walrcv->walRcvState == WALRCV_RESTARTING)
 		{
-			/* we don't expect primary_conninfo to change */
+			/*
+			 * We don't need handle changes of primary_conninfo or
+			 * primary_slotname here. Startup process will shutdown running
+			 * walreceiver in this case.
+			 */
 			*startpoint = walrcv->receiveStart;
 			*startpointTLI = walrcv->receiveStartTLI;
 			walrcv->walRcvState = WALRCV_STREAMING;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 90ffd89339..5ffe593517 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3558,7 +3558,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3569,7 +3569,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 0fc23e3a61..c25e406b9e 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -308,9 +308,7 @@
 # These settings are ignored on a master server.
 
 #primary_conninfo = ''			# connection string to sending server
-					# (change requires restart)
 #primary_slot_name = ''			# replication slot on sending server
-					# (change requires restart)
 #promote_trigger_file = ''		# file name whose presence ends recovery
 #hot_standby = on			# "off" disallows queries during recovery
 					# (change requires restart)
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index d519252aad..9e49020b19 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -320,6 +320,7 @@ extern void SetWalWriterSleeping(bool sleeping);
 
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 3c743d7d7c..ae80f4df3a 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 32;
+use Test::More tests => 33;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -344,3 +346,21 @@ is($catalog_xmin, '',
 is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
#45Kyotaro Horiguchi
horikyota.ntt@gmail.com
In reply to: Sergei Kornilov (#42)
1 attachment(s)
Re: allow online change primary_conninfo

Hello. I looked this and have some comments.

At Wed, 28 Aug 2019 12:49:46 +0300, Sergei Kornilov <sk@zsrv.org> wrote in <34084371566985786@sas1-0a6c2e2b59d7.qloud-c.yandex.net>
sk> Hello
sk>
sk> Updated patch attached. (also I merged into one file)
sk>
sk> > + <para>
sk> > + WAL receiver will be restarted after <varname>primary_slot_name</varname>
sk> > + was changed.
sk> >           </para>
sk> > The sentence sounds strange. Here is a suggestion:
sk> > The WAL receiver is restarted after an update of primary_slot_name (or
sk> > primary_conninfo).
sk>
sk> Changed.

-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.

I'm not sure it's good to change the context of this
description. This was mentioning that changing of this parameter
requires server (re)start. So if we want to be on the same
context after rewriting, it would be like "This parameter can be
set any time and causes WAL receiver restart with the new setting
if the server is in standby mode."

And If I'm not missing something, I don't find an (explict)
paramter of postmaster for setting primary_conninfo.

sk> > The comment at the top of the call of ProcessStartupSigHup() in
sk> > HandleStartupProcInterrupts() needs to be updated as it mentions a
sk> > configuration file re-read, but that's not the case anymore.
sk>
sk> Changed to "Process any requests or signals received recently." like in other places, e.g. syslogger
sk>
sk> > pendingRestartSource's name does not represent what it does, as it is
sk> > only associated with the restart of a WAL receiver when in streaming
sk> > state, and that's a no-op for the archive mode and the local mode.
sk>
sk> I renamed to pendingWalRcvRestart and replaced switch with simple condition.
sk>
sk> > So when shutting down the WAL receiver after a parameter change, what
sk> > happens is that the startup process waits for retrieve_retry_interval
sk> > before moving back to the archive mode. Only after scanning again the
sk> > archives do we restart a new WAL receiver.
sk>
sk> As I answered earlier, here is no switch to archive or wal_retrieve_retry_interval waiting in my patch. I recheck on current revision too:

@@ -11787,48 +11793,6 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
if (!StandbyMode)
return false;

- /*
- * If primary_conninfo is set, launch walreceiver to try
- * to stream the missing WAL.
- *
- * If fetching_ckpt is true, RecPtr points to the initial
- * checkpoint location. In that case, we use RedoStartLSN

Mentioning code, the movement of the code block makes the
surrounding swtich almost useless and the structure and the
result looks somewhat strange.

Couldn't we do the same thing by just skipping the wait and
setting lastSourceFailed to true in the case of intentional
walreceiver restart?

The attached is an incomplete (fraction) patch to sketch what is
in my mind. With something like this and making
ProcessStartupSigHup kill walreceiver would work.

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

Attachments:

immediately_restart_walreceiver_INCOMPLETE.patchtext/x-patch; charset=us-asciiDownload
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6876537b62..61b235f8f7 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -11881,10 +11881,12 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					 * obtaining the requested WAL. We're going to loop back
 					 * and retry from the archive, but if it hasn't been long
 					 * since last attempt, sleep wal_retrieve_retry_interval
-					 * milliseconds to avoid busy-waiting.
+					 * milliseconds to avoid busy-waiting. We don't wait if
+					 * explicitly requested to restart.
 					 */
 					now = GetCurrentTimestamp();
-					if (!TimestampDifferenceExceeds(last_fail_time, now,
+					if (!pendingWalRcvRestart &&
+						!TimestampDifferenceExceeds(last_fail_time, now,
 													wal_retrieve_retry_interval))
 					{
 						long		secs,
@@ -11905,6 +11907,17 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					}
 					last_fail_time = now;
 					currentSource = XLOG_FROM_ARCHIVE;
+
+					/*
+					 * If wal receiver is requested to restart, we skip the
+					 * next XLOG_FROM_ARCHIVE to immediately starting it.
+					 */
+					if (pendingWalRcvRestart)
+					{
+						lastSourceFailed = true;
+						continue;
+					}
+
 					break;
 
 				default:
In reply to: Kyotaro Horiguchi (#45)
1 attachment(s)
Re: allow online change primary_conninfo

Hello

Thank you for review!

- This parameter can only be set at server start.
+ This parameter can only be set in the <filename>postgresql.conf</filename>
+ file or on the server command line.

I'm not sure it's good to change the context of this
description. This was mentioning that changing of this parameter
requires server (re)start. So if we want to be on the same
context after rewriting, it would be like "This parameter can be
set any time and causes WAL receiver restart with the new setting
if the server is in standby mode."

Was written such way after this review: /messages/by-id/20181125214313.lydvmrraqjfrb3s2@alap3.anarazel.de

And If I'm not missing something, I don't find an (explict)
paramter of postmaster for setting primary_conninfo.

Well, we have common -c option: -c name=value

Couldn't we do the same thing by just skipping the wait and
setting lastSourceFailed to true in the case of intentional
walreceiver restart?

Yes, it's possible. Let's see... Done in attached variant.
We need check pendingWalRcvRestart before rescanLatestTimeLine lines.

regards, Sergei

Attachments:

v5_allow_reload_walreceiver_conninfo.patchtext/x-diff; name=v5_allow_reload_walreceiver_conninfo.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 6612f95f9f..1fa48058e8 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3929,9 +3929,15 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          If <varname>primary_conninfo</varname> is changed while WAL receiver is running,
+          the WAL receiver shuts down and then restarts with new setting,
+          except when primary_conninfo is an empty string.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -3946,9 +3952,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          The WAL receiver is restarted after an update of <varname>primary_slot_name</varname>.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index b7ff004234..f0e47cc663 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -803,6 +803,12 @@ static XLogSource readSource = 0;	/* XLOG_FROM_* code */
 static XLogSource currentSource = 0;	/* XLOG_FROM_* code */
 static bool lastSourceFailed = false;
 
+/*
+ * Need for restart running WalReceiver due the configuration change.
+ * Suitable only for XLOG_FROM_STREAM source
+ */
+static bool pendingWalRcvRestart = false;
+
 typedef struct XLogPageReadPrivate
 {
 	int			emode;
@@ -11756,12 +11762,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 		int			oldSource = currentSource;
 
 		/*
-		 * First check if we failed to read from the current source, and
+		 * First check if we failed to read from the current source or if
+		 * we want to restart wal receiver, and
 		 * advance the state machine if so. The failure to read might've
 		 * happened outside this function, e.g when a CRC check fails on a
 		 * record, or within this loop.
 		 */
-		if (lastSourceFailed)
+		if (lastSourceFailed || pendingWalRcvRestart)
 		{
 			switch (currentSource)
 			{
@@ -11862,6 +11869,17 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					if (WalRcvStreaming())
 						ShutdownWalRcv();
 
+					/*
+					 * If wal receiver is requested to restart, we skip the
+					 * next XLOG_FROM_ARCHIVE to immediately starting it.
+					 */
+					if (pendingWalRcvRestart)
+					{
+						lastSourceFailed = true;
+						currentSource = XLOG_FROM_ARCHIVE;
+						continue;
+					}
+
 					/*
 					 * Before we sleep, re-scan for possible new timelines if
 					 * we were requested to recover to the latest timeline.
@@ -11881,7 +11899,8 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					 * obtaining the requested WAL. We're going to loop back
 					 * and retry from the archive, but if it hasn't been long
 					 * since last attempt, sleep wal_retrieve_retry_interval
-					 * milliseconds to avoid busy-waiting.
+					 * milliseconds to avoid busy-waiting. We don't wait if
+					 * explicitly requested to restart.
 					 */
 					now = GetCurrentTimestamp();
 					if (!TimestampDifferenceExceeds(last_fail_time, now,
@@ -11922,16 +11941,17 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 				currentSource = XLOG_FROM_ARCHIVE;
 		}
 
-		if (currentSource != oldSource)
+		if (currentSource != oldSource && !pendingWalRcvRestart)
 			elog(DEBUG2, "switched WAL source from %s to %s after %s",
 				 xlogSourceNames[oldSource], xlogSourceNames[currentSource],
 				 lastSourceFailed ? "failure" : "success");
 
 		/*
-		 * We've now handled possible failure. Try to read from the chosen
-		 * source.
+		 * We've now handled possible failure and configuration change. Try to
+		 * read from the chosen source.
 		 */
 		lastSourceFailed = false;
+		pendingWalRcvRestart = false;
 
 		switch (currentSource)
 		{
@@ -12096,6 +12116,45 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+/*
+ * Re-read config file and plan to restart running walreceiver if
+ * connection settings was changed.
+ */
+void
+ProcessStartupSigHup(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * We need restart walreceiver if replication settings was changed.
+	 */
+	conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
+	slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
+
+	if ((conninfoChanged || slotnameChanged) &&
+		currentSource == XLOG_FROM_STREAM
+		&& WalRcvRunning())
+	{
+		if (conninfoChanged && slotnameChanged)
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of primary_conninfo and primary_slot_name")));
+		else
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s",
+							conninfoChanged ? "primary_conninfo" : "primary_slot_name")));
+
+		pendingWalRcvRestart = true;
+	}
+
+	pfree(conninfo);
+	pfree(slotname);
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 5048a2c2aa..62492daf6b 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -142,12 +142,12 @@ void
 HandleStartupProcInterrupts(void)
 {
 	/*
-	 * Check if we were requested to re-read config file.
+	 * Process any requests or signals received recently.
 	 */
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		ProcessStartupSigHup();
 	}
 
 	/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 6abc780778..59834b9df8 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -644,7 +644,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 			   walrcv->walRcvState == WALRCV_STOPPING);
 		if (walrcv->walRcvState == WALRCV_RESTARTING)
 		{
-			/* we don't expect primary_conninfo to change */
+			/*
+			 * We don't need handle changes of primary_conninfo or
+			 * primary_slotname here. Startup process will shutdown running
+			 * walreceiver in this case.
+			 */
 			*startpoint = walrcv->receiveStart;
 			*startpointTLI = walrcv->receiveStartTLI;
 			walrcv->walRcvState = WALRCV_STREAMING;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 90ffd89339..5ffe593517 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3558,7 +3558,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3569,7 +3569,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 0fc23e3a61..c25e406b9e 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -308,9 +308,7 @@
 # These settings are ignored on a master server.
 
 #primary_conninfo = ''			# connection string to sending server
-					# (change requires restart)
 #primary_slot_name = ''			# replication slot on sending server
-					# (change requires restart)
 #promote_trigger_file = ''		# file name whose presence ends recovery
 #hot_standby = on			# "off" disallows queries during recovery
 					# (change requires restart)
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index d519252aad..9e49020b19 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -320,6 +320,7 @@ extern void SetWalWriterSleeping(bool sleeping);
 
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 3c743d7d7c..ae80f4df3a 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 32;
+use Test::More tests => 33;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -344,3 +346,21 @@ is($catalog_xmin, '',
 is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
#47Andres Freund
andres@anarazel.de
In reply to: Sergei Kornilov (#46)
Re: allow online change primary_conninfo

Hi,

On 2019-09-19 17:46:06 +0300, Sergei Kornilov wrote:

<para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
This setting has no effect if the server is not in standby mode.
</para>
+         <para>
+          If <varname>primary_conninfo</varname> is changed while WAL receiver is running,
+          the WAL receiver shuts down and then restarts with new setting,
+          except when primary_conninfo is an empty string.
+         </para>

From the sentence structure it's not clear that "except when
primary_conninfo is an empty string" only applies to "and then restarts
with new setting".

+/*
+ * Need for restart running WalReceiver due the configuration change.
+ * Suitable only for XLOG_FROM_STREAM source
+ */
+static bool pendingWalRcvRestart = false;

s/due the/due to a/ (or even "due to the").

@@ -11862,6 +11869,17 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
if (WalRcvStreaming())
ShutdownWalRcv();

+					/*
+					 * If wal receiver is requested to restart, we skip the
+					 * next XLOG_FROM_ARCHIVE to immediately starting it.
+					 */
+					if (pendingWalRcvRestart)
+					{
+						lastSourceFailed = true;
+						currentSource = XLOG_FROM_ARCHIVE;
+						continue;
+					}

I can't parse that comment. What does "skipping to starting" mean? I
assume it's just about avoiding wal_retrieve_retry_interval, but I think
the comment ought to be rephrased.

Also, should we really check this before scanning for new timelines?

Why is it the right thing to change to XLOG_FROM_ARCHIVE when we're just
restarting walreceiver? The server might unnecessarily get stuck in
archive based recovery for a long time this way? It seems to me that
we'd need to actually trigger RequestXLogStreaming() in this case.

Greetings,

Andres Freund

In reply to: Andres Freund (#47)
Re: allow online change primary_conninfo

Hello

Thank you for review! Can you please also check v4 version? v5 implements design suggested by Kyotaro Horiguchi-san, while v4 has another design. Which one do you prefer? Or are both wrong?

I can't parse that comment. What does "skipping to starting" mean? I
assume it's just about avoiding wal_retrieve_retry_interval, but I think
the comment ought to be rephrased.

Design idea is to rewrite current state from working XLOG_FROM_STREAM to failed XLOG_FROM_ARCHIVE (without actually try this method on this iteration) and immediately go to next iteration to advance the state machine. Next iteration after failed archive recovery is walreceiver. So walreceiver will be stopped just before this lines and started on next iteration. Walreceiver will be restarted, we do not call restore_command

Also, should we really check this before scanning for new timelines?

Hmm, on the next day... No, this is not really necessary.

Why is it the right thing to change to XLOG_FROM_ARCHIVE when we're just
restarting walreceiver? The server might unnecessarily get stuck in
archive based recovery for a long time this way? It seems to me that
we'd need to actually trigger RequestXLogStreaming() in this case.

I hope I clarified this in design idea description above.

Thank you!

regards, Sergei

#49Kyotaro Horiguchi
horikyota.ntt@gmail.com
In reply to: Sergei Kornilov (#48)
1 attachment(s)
Re: allow online change primary_conninfo

Hello.

At Sat, 21 Sep 2019 13:06:25 +0300, Sergei Kornilov <sk@zsrv.org> wrote in <41171569060385@myt5-b646bde4b8f3.qloud-c.yandex.net>

Hello

Thank you for review! Can you please also check v4 version? v5 implements design suggested by Kyotaro Horiguchi-san, while v4 has another design. Which one do you prefer? Or are both wrong?

I can't parse that comment. What does "skipping to starting" mean? I
assume it's just about avoiding wal_retrieve_retry_interval, but I think
the comment ought to be rephrased.

Design idea is to rewrite current state from working XLOG_FROM_STREAM to failed XLOG_FROM_ARCHIVE (without actually try this method on this iteration) and immediately go to next iteration to advance the state machine. Next iteration after failed archive recovery is walreceiver. So walreceiver will be stopped just before this lines and started on next iteration. Walreceiver will be restarted, we do not call restore_command

Sorry, it's my bad. It meant "If wal receiver is requested to
restart, change state to XLOG_FROM_STREAM immediately skipping
the next XLOG_FROM_ARCHIVE state.".

Also, should we really check this before scanning for new timelines?

Hmm, on the next day... No, this is not really necessary.

No problem when timeline is not changed when explicit restart of
wal receiver. But if it happened just after the standby received
new hisotry file, we suffer an extra restart of wal receiver. It
seems better that we check that in the case.

Why is it the right thing to change to XLOG_FROM_ARCHIVE when we're just
restarting walreceiver? The server might unnecessarily get stuck in
archive based recovery for a long time this way? It seems to me that
we'd need to actually trigger RequestXLogStreaming() in this case.

I hope I clarified this in design idea description above.

My suggestion was just to pretend that the next XLOG_FROM_STREAM
failed, but the outcome actually looks wierd as Andres commented.

I think that comes from the structure of the state machine. WAL
receiver is started not at the beginning of XLOG_FROM_STREAM
state but at the end of XLOG_FROM_ARCHIVE. So we need to switch
once to XLOG_FROM_ARCHIVE in order to start wal receiver.

So, I'd like to propose to move the stuff to the second switch().
(See the attached incomplete patch.) This is rather similar to
Sergei's previous proposal, but the structure of the state
machine is kept.

Some other comments are below.

In ProcessStartupSigHup, conninfo and slotname don't need to be
retained until the end of the function.

The log message in the function seems to be too detailed. On the
other hand, if we changed primary_conninfo to '' (stop) or vise
versa (start), the message (restart) looks strange.

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

Attachments:

incomplete_xlog_c_change.patchtext/x-patch; charset=us-asciiDownload
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6c69eb6dd7..dba0042db6 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -11755,12 +11755,15 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	for (;;)
 	{
 		int			oldSource = currentSource;
+		bool		start_wal_receiver = false;
 
 		/*
-		 * First check if we failed to read from the current source, and
+		 * First check if we failed to read from the current source or if
+		 * we want to restart wal receiver, and
 		 * advance the state machine if so. The failure to read might've
 		 * happened outside this function, e.g when a CRC check fails on a
 		 * record, or within this loop.
+		 * Restart wal receiver if explicitly requested, too.
 		 */
 		if (lastSourceFailed)
 		{
@@ -11788,53 +11791,12 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					if (!StandbyMode)
 						return false;
 
-					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
-						receivedUpto = 0;
-					}
-
 					/*
 					 * Move to XLOG_FROM_STREAM state in either case. We'll
 					 * get immediate failure if we didn't launch walreceiver,
 					 * and move on to the next state.
 					 */
+					start_wal_receiver = true;
 					currentSource = XLOG_FROM_STREAM;
 					break;
 
@@ -11864,8 +11826,8 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 						ShutdownWalRcv();
 
 					/*
-					 * Before we sleep, re-scan for possible new timelines if
-					 * we were requested to recover to the latest timeline.
+					 * Re-scan for possible new timelines if we were requested
+					 * to recover to the latest timeline.
 					 */
 					if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
 					{
@@ -11929,8 +11891,8 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 				 lastSourceFailed ? "failure" : "success");
 
 		/*
-		 * We've now handled possible failure. Try to read from the chosen
-		 * source.
+		 * We've now handled possible failure and configuration change. Try to
+		 * read from the chosen source.
 		 */
 		lastSourceFailed = false;
 
@@ -11969,9 +11931,71 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					bool		havedata;
 
 					/*
-					 * Check if WAL receiver is still active.
+					 * shutdown wal receiver if restart is requested.
 					 */
-					if (!WalRcvStreaming())
+					if (!start_wal_receiver && pendingWalRcvRestart)
+					{
+						if (WalRcvRunning())
+							ShutdownWalRcv();
+
+						/*
+						 * Re-scan for possible new timelines if we were
+						 * requested to recover to the latest timeline.
+						 */
+						if (recoveryTargetTimeLineGoal ==
+							RECOVERY_TARGET_TIMELINE_LATEST)
+							rescanLatestTimeLine();
+						
+						start_wal_receiver = true;
+					}
+					pendingWalRcvRestart = false;
+
+					/*
+					 * Launch walreceiver if needed.
+					 *
+					 * If fetching_ckpt is true, RecPtr points to the initial
+					 * checkpoint location. In that case, we use RedoStartLSN
+					 * as the streaming start position instead of RecPtr, so
+					 * that when we later jump backwards to start redo at
+					 * RedoStartLSN, we will have the logs streamed already.
+					 */
+					if (start_wal_receiver &&
+						PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+					{
+						XLogRecPtr	ptr;
+						TimeLineID	tli;
+
+						if (fetching_ckpt)
+						{
+							ptr = RedoStartLSN;
+							tli = ControlFile->checkPointCopy.ThisTimeLineID;
+						}
+						else
+						{
+							ptr = RecPtr;
+
+							/*
+							 * Use the record begin position to determine the
+							 * TLI, rather than the position we're reading.
+							 */
+							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+							if (curFileTLI > 0 && tli < curFileTLI)
+								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+									 (uint32) (tliRecPtr >> 32),
+									 (uint32) tliRecPtr,
+									 tli, curFileTLI);
+						}
+						curFileTLI = tli;
+						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+											 PrimarySlotName);
+						receivedUpto = 0;
+					}
+
+					/*
+					 * Else, check if WAL receiver is still active.
+					 */
+					else if (!WalRcvStreaming())
 					{
 						lastSourceFailed = true;
 						break;
In reply to: Kyotaro Horiguchi (#49)
1 attachment(s)
Re: allow online change primary_conninfo

Hello

So, I'd like to propose to move the stuff to the second switch().
(See the attached incomplete patch.) This is rather similar to
Sergei's previous proposal, but the structure of the state
machine is kept.

Very similar to my v4 proposal (also move RequestXLogStreaming call), but closer to currentSource reading. No objections from me, attached patch is changed this way.
I renamed start_wal_receiver to startWalReceiver - this style looks more consistent to near code.

+					/*
+					 * Else, check if WAL receiver is still active.
+					 */
+					else if (!WalRcvStreaming())

I think we still need wait WalRcvStreaming after RequestXLogStreaming call. So I remove else branch and leave separate condition.

In ProcessStartupSigHup, conninfo and slotname don't need to be
retained until the end of the function.

Agreed, I move pfree

The log message in the function seems to be too detailed. On the
other hand, if we changed primary_conninfo to '' (stop) or vise
versa (start), the message (restart) looks strange.

I have no strong opinion here. These messages was changed many times during this thread lifetime, can be changed anytime. I think this is not issue since we have no consensus about overall design.
Write detailed messages was proposed here: /messages/by-id/20190216151025.GJ2240@paquier.xyz

or vise versa (start)

I explicitly check currentSource and WalRcvRunning, so we have no such messages if user had no walreceiver before.

regards, Sergei

Attachments:

v6_allow_reload_walreceiver_conninfo.patchtext/x-diff; name=v6_allow_reload_walreceiver_conninfo.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 0191ec84b1..587031dea8 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3983,9 +3983,15 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          If <varname>primary_conninfo</varname> is changed while WAL receiver is running,
+          the WAL receiver shuts down and then restarts with new setting,
+          except when primary_conninfo is an empty string.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -4000,9 +4006,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          The WAL receiver is restarted after an update of <varname>primary_slot_name</varname>.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 2e3cc51006..8eee079eb2 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -803,6 +803,12 @@ static XLogSource readSource = 0;	/* XLOG_FROM_* code */
 static XLogSource currentSource = 0;	/* XLOG_FROM_* code */
 static bool lastSourceFailed = false;
 
+/*
+ * Need for restart running WalReceiver due the configuration change.
+ * Suitable only for XLOG_FROM_STREAM source
+ */
+static bool pendingWalRcvRestart = false;
+
 typedef struct XLogPageReadPrivate
 {
 	int			emode;
@@ -11773,6 +11779,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	for (;;)
 	{
 		int			oldSource = currentSource;
+		bool		startWalReceiver = false;
 
 		/*
 		 * First check if we failed to read from the current source, and
@@ -11806,54 +11813,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					if (!StandbyMode)
 						return false;
 
-					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
-						receivedUpto = 0;
-					}
-
 					/*
 					 * Move to XLOG_FROM_STREAM state in either case. We'll
 					 * get immediate failure if we didn't launch walreceiver,
 					 * and move on to the next state.
 					 */
 					currentSource = XLOG_FROM_STREAM;
+					startWalReceiver = true;
 					break;
 
 				case XLOG_FROM_STREAM:
@@ -11999,7 +11965,69 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					Assert(StandbyMode);
 
 					/*
-					 * Check if WAL receiver is still active.
+					 * shutdown WAL receiver if restart is requested.
+					 */
+					if (!startWalReceiver && pendingWalRcvRestart)
+					{
+						if (WalRcvRunning())
+							ShutdownWalRcv();
+
+						/*
+						 * Re-scan for possible new timelines if we were
+						 * requested to recover to the latest timeline.
+						 */
+						if (recoveryTargetTimeLineGoal ==
+							RECOVERY_TARGET_TIMELINE_LATEST)
+							rescanLatestTimeLine();
+
+						startWalReceiver = true;
+					}
+					pendingWalRcvRestart = false;
+
+					/*
+					 * Launch walreceiver if needed.
+					 *
+					 * If fetching_ckpt is true, RecPtr points to the initial
+					 * checkpoint location. In that case, we use RedoStartLSN
+					 * as the streaming start position instead of RecPtr, so
+					 * that when we later jump backwards to start redo at
+					 * RedoStartLSN, we will have the logs streamed already.
+					 */
+					if (startWalReceiver &&
+						PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+					{
+						XLogRecPtr	ptr;
+						TimeLineID	tli;
+
+						if (fetching_ckpt)
+						{
+							ptr = RedoStartLSN;
+							tli = ControlFile->checkPointCopy.ThisTimeLineID;
+						}
+						else
+						{
+							ptr = RecPtr;
+
+							/*
+							 * Use the record begin position to determine the
+							 * TLI, rather than the position we're reading.
+							 */
+							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+							if (curFileTLI > 0 && tli < curFileTLI)
+								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+									 (uint32) (tliRecPtr >> 32),
+									 (uint32) tliRecPtr,
+									 tli, curFileTLI);
+						}
+						curFileTLI = tli;
+						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+											 PrimarySlotName);
+						receivedUpto = 0;
+					}
+
+					/*
+					 * Check if WAL receiver is active or wait to start up.
 					 */
 					if (!WalRcvStreaming())
 					{
@@ -12127,6 +12155,45 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+/*
+ * Re-read config file and plan to restart running walreceiver if
+ * connection settings was changed.
+ */
+void
+ProcessStartupSigHup(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * We need restart walreceiver if replication settings was changed.
+	 */
+	conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
+	slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
+
+	pfree(conninfo);
+	pfree(slotname);
+
+	if ((conninfoChanged || slotnameChanged) &&
+		currentSource == XLOG_FROM_STREAM
+		&& WalRcvRunning())
+	{
+		if (conninfoChanged && slotnameChanged)
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of primary_conninfo and primary_slot_name")));
+		else
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s",
+							conninfoChanged ? "primary_conninfo" : "primary_slot_name")));
+
+		pendingWalRcvRestart = true;
+	}
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 5048a2c2aa..62492daf6b 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -142,12 +142,12 @@ void
 HandleStartupProcInterrupts(void)
 {
 	/*
-	 * Check if we were requested to re-read config file.
+	 * Process any requests or signals received recently.
 	 */
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		ProcessStartupSigHup();
 	}
 
 	/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index f54ae7690d..8698e07f8a 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -644,7 +644,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 			   walrcv->walRcvState == WALRCV_STOPPING);
 		if (walrcv->walRcvState == WALRCV_RESTARTING)
 		{
-			/* we don't expect primary_conninfo to change */
+			/*
+			 * We don't need handle changes of primary_conninfo or
+			 * primary_slotname here. Startup process will shutdown running
+			 * walreceiver in this case.
+			 */
 			*startpoint = walrcv->receiveStart;
 			*startpointTLI = walrcv->receiveStartTLI;
 			walrcv->walRcvState = WALRCV_STREAMING;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 31a5ef0474..432296714f 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3558,7 +3558,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3569,7 +3569,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 0fc23e3a61..c25e406b9e 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -308,9 +308,7 @@
 # These settings are ignored on a master server.
 
 #primary_conninfo = ''			# connection string to sending server
-					# (change requires restart)
 #primary_slot_name = ''			# replication slot on sending server
-					# (change requires restart)
 #promote_trigger_file = ''		# file name whose presence ends recovery
 #hot_standby = on			# "off" disallows queries during recovery
 					# (change requires restart)
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index d519252aad..9e49020b19 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -320,6 +320,7 @@ extern void SetWalWriterSleeping(bool sleeping);
 
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 3c743d7d7c..ae80f4df3a 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 32;
+use Test::More tests => 33;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -344,3 +346,21 @@ is($catalog_xmin, '',
 is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
#51Tomas Vondra
tomas.vondra@2ndquadrant.com
In reply to: Sergei Kornilov (#50)
Re: allow online change primary_conninfo

Hi,

I'm a bit confused by the status of this patch - it's marked as Waiting
on Author (since last week), but that status was set by Sergei Kornilov
who is also the patch author. And there have been no messages since the
beginning of 2019-11 CF.

So what's the current status of this patch?

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

In reply to: Tomas Vondra (#51)
Re: allow online change primary_conninfo

Hello

"Waiting on Author" is the right status. I found logical conflict with recently added wal_receiver_create_temp_slot GUC and my tests are currently broken. Will fix it and post new version.

PS: also, I surprised why it's ok for wal_receiver_create_temp_slot to be PGC_SIGHUP and ignore change of this setting until walreceiver will reconnect by unrelated reason. I means walreceiver does nothing special on SIGHUP. In common case change of wal_receiver_create_temp_slot setting will have effect only during restart of walreceiver process. And therefore we will switch to archive recovery. But such design was strongly rejected for my patch year ago.

regards, Sergei

#53Michael Paquier
michael@paquier.xyz
In reply to: Sergei Kornilov (#52)
Re: allow online change primary_conninfo

On Tue, Jan 21, 2020 at 06:03:18PM +0300, Sergei Kornilov wrote:

PS: also, I surprised why it's ok for wal_receiver_create_temp_slot
to be PGC_SIGHUP and ignore change of this setting until walreceiver
will reconnect by unrelated reason. I means walreceiver does nothing
special on SIGHUP. In common case change of
wal_receiver_create_temp_slot setting will have effect only during
restart of walreceiver process. And therefore we will switch to
archive recovery. But such design was strongly rejected for my patch
year ago.

[ Looks at 3297308... ]
Yeah, you are right. I was not paying much attention but something
does not stick here. My understanding is that we should have the WAL
receiver receive the value it needs to use from the startup process
(aka via RequestXLogStreaming from xlog.c), and that we ought to make
this new parameter PGC_POSTMASTER instead of PGC_SIGHUP. HEAD is
inconsistent here.
--
Michael

In reply to: Michael Paquier (#53)
2 attachment(s)
Re: allow online change primary_conninfo

Hello

Yeah, you are right. I was not paying much attention but something
does not stick here. My understanding is that we should have the WAL
receiver receive the value it needs to use from the startup process
(aka via RequestXLogStreaming from xlog.c), and that we ought to make
this new parameter PGC_POSTMASTER instead of PGC_SIGHUP. HEAD is
inconsistent here.

Thank you!

I attached two patches:
- first changes wal_receiver_create_temp_slot to PGC_POSTMASTER and moved the logic to RequestXLogStreaming
- second is based on last published v6 version of main patch. It changes wal_receiver_create_temp_slot back to PGC_SIGHUP along with primary_conninfo and primary_slot_name and will restart walreceiver if need.

Regarding the main patch: we have several possibilities for moving RequestXLogStreaming call. We need to choose one.
And, of course, changes in the text...

regards, Sergei

Attachments:

0002_v7_allow_reload_walreceiver_conninfo.patchtext/x-diff; name=0002_v7_allow_reload_walreceiver_conninfo.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 14992a08d7..19f36a3318 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4000,9 +4000,15 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          If <varname>primary_conninfo</varname> is changed while WAL receiver is running,
+          the WAL receiver shuts down and then restarts with new setting,
+          except when primary_conninfo is an empty string.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -4017,9 +4023,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          The WAL receiver is restarted after an update of <varname>primary_slot_name</varname>.
          </para>
         </listitem>
        </varlistentry>
@@ -4137,7 +4147,11 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
         has been configured (using <xref linkend="guc-primary-slot-name"/>).
         The default is on.  The only reason to turn this off would be if the
         remote instance is currently out of available replication slots.  This
-        parameter can only be set at server start.
+        parameter can only be set in the <filename>postgresql.conf</filename>
+        file or on the server command line.
+       </para>
+       <para>
+        The WAL receiver is restarted after an update of <varname>wal_receiver_create_temp_slot</varname>.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 55e9294ae3..2861b9f22a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -803,6 +803,12 @@ static XLogSource readSource = 0;	/* XLOG_FROM_* code */
 static XLogSource currentSource = 0;	/* XLOG_FROM_* code */
 static bool lastSourceFailed = false;
 
+/*
+ * Need for restart running WalReceiver due the configuration change.
+ * Suitable only for XLOG_FROM_STREAM source
+ */
+static bool pendingWalRcvRestart = false;
+
 typedef struct XLogPageReadPrivate
 {
 	int			emode;
@@ -11818,6 +11824,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	for (;;)
 	{
 		int			oldSource = currentSource;
+		bool		startWalReceiver = false;
 
 		/*
 		 * First check if we failed to read from the current source, and
@@ -11851,54 +11858,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					if (!StandbyMode)
 						return false;
 
-					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
-						receivedUpto = 0;
-					}
-
 					/*
 					 * Move to XLOG_FROM_STREAM state in either case. We'll
 					 * get immediate failure if we didn't launch walreceiver,
 					 * and move on to the next state.
 					 */
 					currentSource = XLOG_FROM_STREAM;
+					startWalReceiver = true;
 					break;
 
 				case XLOG_FROM_STREAM:
@@ -12044,7 +12010,69 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					Assert(StandbyMode);
 
 					/*
-					 * Check if WAL receiver is still active.
+					 * shutdown WAL receiver if restart is requested.
+					 */
+					if (!startWalReceiver && pendingWalRcvRestart)
+					{
+						if (WalRcvRunning())
+							ShutdownWalRcv();
+
+						/*
+						 * Re-scan for possible new timelines if we were
+						 * requested to recover to the latest timeline.
+						 */
+						if (recoveryTargetTimeLineGoal ==
+							RECOVERY_TARGET_TIMELINE_LATEST)
+							rescanLatestTimeLine();
+
+						startWalReceiver = true;
+					}
+					pendingWalRcvRestart = false;
+
+					/*
+					 * Launch walreceiver if needed.
+					 *
+					 * If fetching_ckpt is true, RecPtr points to the initial
+					 * checkpoint location. In that case, we use RedoStartLSN
+					 * as the streaming start position instead of RecPtr, so
+					 * that when we later jump backwards to start redo at
+					 * RedoStartLSN, we will have the logs streamed already.
+					 */
+					if (startWalReceiver &&
+						PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+					{
+						XLogRecPtr	ptr;
+						TimeLineID	tli;
+
+						if (fetching_ckpt)
+						{
+							ptr = RedoStartLSN;
+							tli = ControlFile->checkPointCopy.ThisTimeLineID;
+						}
+						else
+						{
+							ptr = RecPtr;
+
+							/*
+							 * Use the record begin position to determine the
+							 * TLI, rather than the position we're reading.
+							 */
+							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+							if (curFileTLI > 0 && tli < curFileTLI)
+								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+									 (uint32) (tliRecPtr >> 32),
+									 (uint32) tliRecPtr,
+									 tli, curFileTLI);
+						}
+						curFileTLI = tli;
+						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+											 PrimarySlotName);
+						receivedUpto = 0;
+					}
+
+					/*
+					 * Check if WAL receiver is active or wait to start up.
 					 */
 					if (!WalRcvStreaming())
 					{
@@ -12172,6 +12200,61 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+/*
+ * Re-read config file and plan to restart running walreceiver if
+ * connection settings was changed.
+ */
+void
+ProcessStartupSigHup(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		tempSlot = wal_receiver_create_temp_slot;
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+	bool		tempSlotChanged = false;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * We need restart walreceiver if replication settings was changed.
+	 */
+	conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
+	slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
+
+	/*
+	 * wal_receiver_create_temp_slot is used only when we have no slot
+	 * configured. We do not need to track this change if it has no effect.
+	 */
+	if (!slotnameChanged && strcmp(PrimarySlotName, "") == 0)
+		tempSlotChanged = (tempSlot != wal_receiver_create_temp_slot);
+
+	pfree(conninfo);
+	pfree(slotname);
+
+	if ((conninfoChanged || slotnameChanged || tempSlotChanged) &&
+		currentSource == XLOG_FROM_STREAM
+		&& WalRcvRunning())
+	{
+		if (conninfoChanged && strcmp(PrimaryConnInfo, "") == 0)
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be shut down due to change of %s",
+							"primary_conninfo")));
+		else if (conninfoChanged && (slotnameChanged || tempSlotChanged))
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s and %s",
+							"primary_conninfo",
+							slotnameChanged ? "primary_slot_name" : "wal_receiver_create_temp_slot")));
+		else
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s",
+							conninfoChanged ? "primary_conninfo"
+							: (slotnameChanged ? "primary_slot_name" : "wal_receiver_create_temp_slot"))));
+
+		pendingWalRcvRestart = true;
+	}
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index c2250d7d4e..c396fdae9a 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -101,12 +101,12 @@ void
 HandleStartupProcInterrupts(void)
 {
 	/*
-	 * Check if we were requested to re-read config file.
+	 * Process any requests or signals received recently.
 	 */
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		ProcessStartupSigHup();
 	}
 
 	/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 264b544194..f0651aade1 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -666,7 +666,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 			   walrcv->walRcvState == WALRCV_STOPPING);
 		if (walrcv->walRcvState == WALRCV_RESTARTING)
 		{
-			/* we don't expect primary_conninfo to change */
+			/*
+			 * We don't need handle changes of primary_conninfo or
+			 * primary_slotname here. Startup process will shutdown running
+			 * walreceiver in this case.
+			 */
 			*startpoint = walrcv->receiveStart;
 			*startpointTLI = walrcv->receiveStartTLI;
 			walrcv->walRcvState = WALRCV_STREAMING;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index c4b36eb2ad..30703266a1 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1994,7 +1994,7 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 
 	{
-		{"wal_receiver_create_temp_slot", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
 		},
 		&wal_receiver_create_temp_slot,
@@ -3643,7 +3643,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3654,7 +3654,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 4454407b7c..3dae3e9f3c 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -309,9 +309,7 @@
 # These settings are ignored on a master server.
 
 #primary_conninfo = ''			# connection string to sending server
-					# (change requires restart)
 #primary_slot_name = ''			# replication slot on sending server
-					# (change requires restart)
 #promote_trigger_file = ''		# file name whose presence ends recovery
 #hot_standby = on			# "off" disallows queries during recovery
 					# (change requires restart)
@@ -322,7 +320,6 @@
 					# when reading streaming WAL;
 					# -1 allows indefinite delay
 #wal_receiver_create_temp_slot = on	# create temp slot if primary_slot_name not set
-					# (change requires restart)
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 12362421d7..94009eb7be 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -320,6 +320,7 @@ extern void SetWalWriterSleeping(bool sleeping);
 
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 3c743d7d7c..ae80f4df3a 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 32;
+use Test::More tests => 33;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -344,3 +346,21 @@ is($catalog_xmin, '',
 is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
0001_v7_move_temp_slot_logic_to_startup.patchtext/x-diff; name=0001_v7_move_temp_slot_logic_to_startup.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index e07dc01e80..14992a08d7 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4137,9 +4137,7 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
         has been configured (using <xref linkend="guc-primary-slot-name"/>).
         The default is on.  The only reason to turn this off would be if the
         remote instance is currently out of available replication slots.  This
-        parameter can only be set in the <filename>postgresql.conf</filename>
-        file or on the server command line.  Changes only take effect when the
-        WAL receiver process starts a new connection.
+        parameter can only be set at server start.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7f4f784c0e..55e9294ae3 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -283,6 +283,7 @@ bool		StandbyModeRequested = false;
 char	   *PrimaryConnInfo = NULL;
 char	   *PrimarySlotName = NULL;
 char	   *PromoteTriggerFile = NULL;
+bool		wal_receiver_create_temp_slot = true;
 
 /* are we currently in standby mode? */
 bool		StandbyMode = false;
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index a5e85d32f3..264b544194 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -73,7 +73,6 @@
 
 
 /* GUC variables */
-bool		wal_receiver_create_temp_slot;
 int			wal_receiver_status_interval;
 int			wal_receiver_timeout;
 bool		hot_standby_feedback;
@@ -349,42 +348,23 @@ WalReceiverMain(void)
 		WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI);
 
 		/*
-		 * Create temporary replication slot if no slot name is configured or
-		 * the slot from the previous run was temporary, unless
-		 * wal_receiver_create_temp_slot is disabled.  We also need to handle
-		 * the case where the previous run used a temporary slot but
-		 * wal_receiver_create_temp_slot was changed in the meantime.  In that
-		 * case, we delete the old slot name in shared memory.  (This would
+		 * Create temporary replication slot if requested.  In that
+		 * case, we update slot name in shared memory.  (This would
 		 * all be a bit easier if we just didn't copy the slot name into
 		 * shared memory, since we won't need it again later, but then we
 		 * can't see the slot name in the stats views.)
 		 */
-		if (slotname[0] == '\0' || is_temp_slot)
+		if (is_temp_slot)
 		{
-			bool		changed = false;
+			snprintf(slotname, sizeof(slotname),
+					 "pg_walreceiver_%lld",
+					 (long long int) walrcv_get_backend_pid(wrconn));
 
-			if (wal_receiver_create_temp_slot)
-			{
-				snprintf(slotname, sizeof(slotname),
-						 "pg_walreceiver_%lld",
-						 (long long int) walrcv_get_backend_pid(wrconn));
-
-				walrcv_create_slot(wrconn, slotname, true, 0, NULL);
-				changed = true;
-			}
-			else if (slotname[0] != '\0')
-			{
-				slotname[0] = '\0';
-				changed = true;
-			}
+			walrcv_create_slot(wrconn, slotname, true, 0, NULL);
 
-			if (changed)
-			{
-				SpinLockAcquire(&walrcv->mutex);
-				strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
-				walrcv->is_temp_slot = wal_receiver_create_temp_slot;
-				SpinLockRelease(&walrcv->mutex);
-			}
+			SpinLockAcquire(&walrcv->mutex);
+			strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
+			SpinLockRelease(&walrcv->mutex);
 		}
 
 		/*
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 89c903e45a..a36bc83d2c 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -248,10 +248,16 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
 	else
 		walrcv->conninfo[0] = '\0';
 
-	if (slotname != NULL)
+	if (slotname != NULL && slotname[0] != '\0')
+	{
 		strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
+		walrcv->is_temp_slot = false;
+	}
 	else
+	{
 		walrcv->slotname[0] = '\0';
+		walrcv->is_temp_slot = wal_receiver_create_temp_slot;
+	}
 
 	if (walrcv->walRcvState == WALRCV_STOPPED)
 	{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 9f179a9129..c4b36eb2ad 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1994,7 +1994,7 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 
 	{
-		{"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY,
+		{"wal_receiver_create_temp_slot", PGC_POSTMASTER, REPLICATION_STANDBY,
 			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
 		},
 		&wal_receiver_create_temp_slot,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index e1048c0047..4454407b7c 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -322,6 +322,7 @@
 					# when reading streaming WAL;
 					# -1 allows indefinite delay
 #wal_receiver_create_temp_slot = on	# create temp slot if primary_slot_name not set
+					# (change requires restart)
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 98b033fc20..12362421d7 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -129,6 +129,7 @@ extern int	recoveryTargetAction;
 extern int	recovery_min_apply_delay;
 extern char *PrimaryConnInfo;
 extern char *PrimarySlotName;
+extern bool wal_receiver_create_temp_slot;
 
 /* indirectly set via GUC system */
 extern TransactionId recoveryTargetXid;
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index e08afc6548..9d25a7f91b 100644
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -23,7 +23,6 @@
 #include "utils/tuplestore.h"
 
 /* user-settable parameters */
-extern bool wal_receiver_create_temp_slot;
 extern int	wal_receiver_status_interval;
 extern int	wal_receiver_timeout;
 extern bool hot_standby_feedback;
In reply to: Sergei Kornilov (#54)
2 attachment(s)
Re: allow online change primary_conninfo

Hello
Small rebase due to merge conflict of the tests. No functional changes since v7.

PS: also it is end of current CF, I will mark patch entry as moved to the next CF.

Attachments:

0002_v8_allow_reload_walreceiver_conninfo.patchtext/x-diff; name=0002_v8_allow_reload_walreceiver_conninfo.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 2e2af9e96e..7887391bbb 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4005,9 +4005,15 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          If <varname>primary_conninfo</varname> is changed while WAL receiver is running,
+          the WAL receiver shuts down and then restarts with new setting,
+          except when primary_conninfo is an empty string.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -4022,9 +4028,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          The WAL receiver is restarted after an update of <varname>primary_slot_name</varname>.
          </para>
         </listitem>
        </varlistentry>
@@ -4142,7 +4152,11 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
         has been configured (using <xref linkend="guc-primary-slot-name"/>).
         The default is on.  The only reason to turn this off would be if the
         remote instance is currently out of available replication slots.  This
-        parameter can only be set at server start.
+        parameter can only be set in the <filename>postgresql.conf</filename>
+        file or on the server command line.
+       </para>
+       <para>
+        The WAL receiver is restarted after an update of <varname>wal_receiver_create_temp_slot</varname>.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 408b9b489a..c24b93332e 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -803,6 +803,12 @@ static XLogSource readSource = 0;	/* XLOG_FROM_* code */
 static XLogSource currentSource = 0;	/* XLOG_FROM_* code */
 static bool lastSourceFailed = false;
 
+/*
+ * Need for restart running WalReceiver due the configuration change.
+ * Suitable only for XLOG_FROM_STREAM source
+ */
+static bool pendingWalRcvRestart = false;
+
 typedef struct XLogPageReadPrivate
 {
 	int			emode;
@@ -11831,6 +11837,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	for (;;)
 	{
 		int			oldSource = currentSource;
+		bool		startWalReceiver = false;
 
 		/*
 		 * First check if we failed to read from the current source, and
@@ -11864,54 +11871,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					if (!StandbyMode)
 						return false;
 
-					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
-						receivedUpto = 0;
-					}
-
 					/*
 					 * Move to XLOG_FROM_STREAM state in either case. We'll
 					 * get immediate failure if we didn't launch walreceiver,
 					 * and move on to the next state.
 					 */
 					currentSource = XLOG_FROM_STREAM;
+					startWalReceiver = true;
 					break;
 
 				case XLOG_FROM_STREAM:
@@ -12057,7 +12023,69 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					Assert(StandbyMode);
 
 					/*
-					 * Check if WAL receiver is still active.
+					 * shutdown WAL receiver if restart is requested.
+					 */
+					if (!startWalReceiver && pendingWalRcvRestart)
+					{
+						if (WalRcvRunning())
+							ShutdownWalRcv();
+
+						/*
+						 * Re-scan for possible new timelines if we were
+						 * requested to recover to the latest timeline.
+						 */
+						if (recoveryTargetTimeLineGoal ==
+							RECOVERY_TARGET_TIMELINE_LATEST)
+							rescanLatestTimeLine();
+
+						startWalReceiver = true;
+					}
+					pendingWalRcvRestart = false;
+
+					/*
+					 * Launch walreceiver if needed.
+					 *
+					 * If fetching_ckpt is true, RecPtr points to the initial
+					 * checkpoint location. In that case, we use RedoStartLSN
+					 * as the streaming start position instead of RecPtr, so
+					 * that when we later jump backwards to start redo at
+					 * RedoStartLSN, we will have the logs streamed already.
+					 */
+					if (startWalReceiver &&
+						PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+					{
+						XLogRecPtr	ptr;
+						TimeLineID	tli;
+
+						if (fetching_ckpt)
+						{
+							ptr = RedoStartLSN;
+							tli = ControlFile->checkPointCopy.ThisTimeLineID;
+						}
+						else
+						{
+							ptr = RecPtr;
+
+							/*
+							 * Use the record begin position to determine the
+							 * TLI, rather than the position we're reading.
+							 */
+							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+							if (curFileTLI > 0 && tli < curFileTLI)
+								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+									 (uint32) (tliRecPtr >> 32),
+									 (uint32) tliRecPtr,
+									 tli, curFileTLI);
+						}
+						curFileTLI = tli;
+						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+											 PrimarySlotName);
+						receivedUpto = 0;
+					}
+
+					/*
+					 * Check if WAL receiver is active or wait to start up.
 					 */
 					if (!WalRcvStreaming())
 					{
@@ -12185,6 +12213,61 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+/*
+ * Re-read config file and plan to restart running walreceiver if
+ * connection settings was changed.
+ */
+void
+ProcessStartupSigHup(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		tempSlot = wal_receiver_create_temp_slot;
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+	bool		tempSlotChanged = false;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * We need restart walreceiver if replication settings was changed.
+	 */
+	conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
+	slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
+
+	/*
+	 * wal_receiver_create_temp_slot is used only when we have no slot
+	 * configured. We do not need to track this change if it has no effect.
+	 */
+	if (!slotnameChanged && strcmp(PrimarySlotName, "") == 0)
+		tempSlotChanged = (tempSlot != wal_receiver_create_temp_slot);
+
+	pfree(conninfo);
+	pfree(slotname);
+
+	if ((conninfoChanged || slotnameChanged || tempSlotChanged) &&
+		currentSource == XLOG_FROM_STREAM
+		&& WalRcvRunning())
+	{
+		if (conninfoChanged && strcmp(PrimaryConnInfo, "") == 0)
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be shut down due to change of %s",
+							"primary_conninfo")));
+		else if (conninfoChanged && (slotnameChanged || tempSlotChanged))
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s and %s",
+							"primary_conninfo",
+							slotnameChanged ? "primary_slot_name" : "wal_receiver_create_temp_slot")));
+		else
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s",
+							conninfoChanged ? "primary_conninfo"
+							: (slotnameChanged ? "primary_slot_name" : "wal_receiver_create_temp_slot"))));
+
+		pendingWalRcvRestart = true;
+	}
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index c2250d7d4e..c396fdae9a 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -101,12 +101,12 @@ void
 HandleStartupProcInterrupts(void)
 {
 	/*
-	 * Check if we were requested to re-read config file.
+	 * Process any requests or signals received recently.
 	 */
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		ProcessStartupSigHup();
 	}
 
 	/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 264b544194..f0651aade1 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -666,7 +666,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 			   walrcv->walRcvState == WALRCV_STOPPING);
 		if (walrcv->walRcvState == WALRCV_RESTARTING)
 		{
-			/* we don't expect primary_conninfo to change */
+			/*
+			 * We don't need handle changes of primary_conninfo or
+			 * primary_slotname here. Startup process will shutdown running
+			 * walreceiver in this case.
+			 */
 			*startpoint = walrcv->receiveStart;
 			*startpointTLI = walrcv->receiveStartTLI;
 			walrcv->walRcvState = WALRCV_STREAMING;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index f20f572a62..8dd78fdd35 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1994,7 +1994,7 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 
 	{
-		{"wal_receiver_create_temp_slot", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
 		},
 		&wal_receiver_create_temp_slot,
@@ -3643,7 +3643,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3654,7 +3654,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 4454407b7c..3dae3e9f3c 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -309,9 +309,7 @@
 # These settings are ignored on a master server.
 
 #primary_conninfo = ''			# connection string to sending server
-					# (change requires restart)
 #primary_slot_name = ''			# replication slot on sending server
-					# (change requires restart)
 #promote_trigger_file = ''		# file name whose presence ends recovery
 #hot_standby = on			# "off" disallows queries during recovery
 					# (change requires restart)
@@ -322,7 +320,6 @@
 					# when reading streaming WAL;
 					# -1 allows indefinite delay
 #wal_receiver_create_temp_slot = on	# create temp slot if primary_slot_name not set
-					# (change requires restart)
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 12362421d7..94009eb7be 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -320,6 +320,7 @@ extern void SetWalWriterSleeping(bool sleeping);
 
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index d09ebe65a3..52585a1014 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 34;
+use Test::More tests => 35;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -345,6 +347,24 @@ is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
 
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
+
 # Test physical slot advancing and its durability.  Create a new slot on
 # the primary, not used by any of the standbys. This reserves WAL at creation.
 my $phys_slot = 'phys_slot';
0001_v8_move_temp_slot_logic_to_startup.patchtext/x-diff; name=0001_v8_move_temp_slot_logic_to_startup.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index c1128f89ec..2e2af9e96e 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4142,9 +4142,7 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
         has been configured (using <xref linkend="guc-primary-slot-name"/>).
         The default is on.  The only reason to turn this off would be if the
         remote instance is currently out of available replication slots.  This
-        parameter can only be set in the <filename>postgresql.conf</filename>
-        file or on the server command line.  Changes only take effect when the
-        WAL receiver process starts a new connection.
+        parameter can only be set at server start.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 3813eadfb4..408b9b489a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -283,6 +283,7 @@ bool		StandbyModeRequested = false;
 char	   *PrimaryConnInfo = NULL;
 char	   *PrimarySlotName = NULL;
 char	   *PromoteTriggerFile = NULL;
+bool		wal_receiver_create_temp_slot = true;
 
 /* are we currently in standby mode? */
 bool		StandbyMode = false;
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index a5e85d32f3..264b544194 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -73,7 +73,6 @@
 
 
 /* GUC variables */
-bool		wal_receiver_create_temp_slot;
 int			wal_receiver_status_interval;
 int			wal_receiver_timeout;
 bool		hot_standby_feedback;
@@ -349,42 +348,23 @@ WalReceiverMain(void)
 		WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI);
 
 		/*
-		 * Create temporary replication slot if no slot name is configured or
-		 * the slot from the previous run was temporary, unless
-		 * wal_receiver_create_temp_slot is disabled.  We also need to handle
-		 * the case where the previous run used a temporary slot but
-		 * wal_receiver_create_temp_slot was changed in the meantime.  In that
-		 * case, we delete the old slot name in shared memory.  (This would
+		 * Create temporary replication slot if requested.  In that
+		 * case, we update slot name in shared memory.  (This would
 		 * all be a bit easier if we just didn't copy the slot name into
 		 * shared memory, since we won't need it again later, but then we
 		 * can't see the slot name in the stats views.)
 		 */
-		if (slotname[0] == '\0' || is_temp_slot)
+		if (is_temp_slot)
 		{
-			bool		changed = false;
+			snprintf(slotname, sizeof(slotname),
+					 "pg_walreceiver_%lld",
+					 (long long int) walrcv_get_backend_pid(wrconn));
 
-			if (wal_receiver_create_temp_slot)
-			{
-				snprintf(slotname, sizeof(slotname),
-						 "pg_walreceiver_%lld",
-						 (long long int) walrcv_get_backend_pid(wrconn));
-
-				walrcv_create_slot(wrconn, slotname, true, 0, NULL);
-				changed = true;
-			}
-			else if (slotname[0] != '\0')
-			{
-				slotname[0] = '\0';
-				changed = true;
-			}
+			walrcv_create_slot(wrconn, slotname, true, 0, NULL);
 
-			if (changed)
-			{
-				SpinLockAcquire(&walrcv->mutex);
-				strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
-				walrcv->is_temp_slot = wal_receiver_create_temp_slot;
-				SpinLockRelease(&walrcv->mutex);
-			}
+			SpinLockAcquire(&walrcv->mutex);
+			strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
+			SpinLockRelease(&walrcv->mutex);
 		}
 
 		/*
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 89c903e45a..a36bc83d2c 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -248,10 +248,16 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
 	else
 		walrcv->conninfo[0] = '\0';
 
-	if (slotname != NULL)
+	if (slotname != NULL && slotname[0] != '\0')
+	{
 		strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
+		walrcv->is_temp_slot = false;
+	}
 	else
+	{
 		walrcv->slotname[0] = '\0';
+		walrcv->is_temp_slot = wal_receiver_create_temp_slot;
+	}
 
 	if (walrcv->walRcvState == WALRCV_STOPPED)
 	{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a16fe8cd5b..f20f572a62 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1994,7 +1994,7 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 
 	{
-		{"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY,
+		{"wal_receiver_create_temp_slot", PGC_POSTMASTER, REPLICATION_STANDBY,
 			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
 		},
 		&wal_receiver_create_temp_slot,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index e1048c0047..4454407b7c 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -322,6 +322,7 @@
 					# when reading streaming WAL;
 					# -1 allows indefinite delay
 #wal_receiver_create_temp_slot = on	# create temp slot if primary_slot_name not set
+					# (change requires restart)
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 98b033fc20..12362421d7 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -129,6 +129,7 @@ extern int	recoveryTargetAction;
 extern int	recovery_min_apply_delay;
 extern char *PrimaryConnInfo;
 extern char *PrimarySlotName;
+extern bool wal_receiver_create_temp_slot;
 
 /* indirectly set via GUC system */
 extern TransactionId recoveryTargetXid;
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index e08afc6548..9d25a7f91b 100644
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -23,7 +23,6 @@
 #include "utils/tuplestore.h"
 
 /* user-settable parameters */
-extern bool wal_receiver_create_temp_slot;
 extern int	wal_receiver_status_interval;
 extern int	wal_receiver_timeout;
 extern bool hot_standby_feedback;
#56Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Michael Paquier (#53)
Re: allow online change primary_conninfo

On 2020-Jan-22, Michael Paquier wrote:

On Tue, Jan 21, 2020 at 06:03:18PM +0300, Sergei Kornilov wrote:

PS: also, I surprised why it's ok for wal_receiver_create_temp_slot
to be PGC_SIGHUP and ignore change of this setting until walreceiver
will reconnect by unrelated reason. I means walreceiver does nothing
special on SIGHUP. In common case change of
wal_receiver_create_temp_slot setting will have effect only during
restart of walreceiver process. And therefore we will switch to
archive recovery. But such design was strongly rejected for my patch
year ago.

[ Looks at 3297308... ]
Yeah, you are right. I was not paying much attention but something
does not stick here. My understanding is that we should have the WAL
receiver receive the value it needs to use from the startup process
(aka via RequestXLogStreaming from xlog.c), and that we ought to make
this new parameter PGC_POSTMASTER instead of PGC_SIGHUP. HEAD is
inconsistent here.

I'm CCing Peter as committer of 329730827848.

What are the downsides of changing wal_receiver_create_temp_slot to
PGC_POSTMASTER? It seems pretty nasty to requires a full server
restart. Maybe we can signal all walreceivers at that point so that
they restart with the correct setting? That's much less problematic, I
would think.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#57Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Alvaro Herrera (#56)
Re: allow online change primary_conninfo

On 2020-Mar-13, Alvaro Herrera wrote:

What are the downsides of changing wal_receiver_create_temp_slot to
PGC_POSTMASTER? It seems pretty nasty to requires a full server
restart. Maybe we can signal all walreceivers at that point so that
they restart with the correct setting? That's much less problematic, I
would think.

... oh, that's exactly what 0002 does. So patch 0001 is only about
making a temporary change to the create_temp_slot to be consistent with
existing policy, before changing the policy.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#58Michael Paquier
michael@paquier.xyz
In reply to: Alvaro Herrera (#57)
Re: allow online change primary_conninfo

On Fri, Mar 13, 2020 at 11:17:38AM -0300, Alvaro Herrera wrote:

... oh, that's exactly what 0002 does. So patch 0001 is only about
making a temporary change to the create_temp_slot to be consistent with
existing policy, before changing the policy.

Yes. In my opinion, patch 0002 should not change the GUC mode of
wal_receiver_create_temp_slot as the discussion here is about
primary_conninfo, even if both may share some logic regarding WAL
receiver shutdown and its restart triggered by the startup process.

Patch 0001 has actually been presented on this thread first:
/messages/by-id/753391579708726@iva3-77ae5995f07f.qloud-c.yandex.net
And there is an independent patch registered in this CF:
https://commitfest.postgresql.org/27/2456/

Should we add patch 0001 as an open item for v13 as there is a risk of
forgetting this issue? I have created a wiki blank page a couple of
weeks back:
https://wiki.postgresql.org/wiki/PostgreSQL_13_Open_Items
--
Michael

In reply to: Michael Paquier (#58)
2 attachment(s)
Re: allow online change primary_conninfo

Hello

Sorry for late replies.

Yes. In my opinion, patch 0002 should not change the GUC mode of
wal_receiver_create_temp_slot as the discussion here is about
primary_conninfo, even if both may share some logic regarding WAL
receiver shutdown and its restart triggered by the startup process.

Ok, I removed related changes from main patch. Along with minor merge conflict.

Patch 0001 has actually been presented on this thread first:
/messages/by-id/753391579708726@iva3-77ae5995f07f.qloud-c.yandex.net
And there is an independent patch registered in this CF:
https://commitfest.postgresql.org/27/2456/

Yep, 0001 is separate patch. I will post copy of this patch here to make cfbot works. Main patch 0002 requires resetting of is_temp_slot in RequestXLogStreaming to works properly.

Should we add patch 0001 as an open item for v13 as there is a risk of
forgetting this issue?

I think yes.

Well, it seems better to move this patch to next commitfest?

regards, Sergei

Attachments:

0001-v3-change-wal_receiver_create_temp_slot.patchtext/x-diff; name=0001-v3-change-wal_receiver_create_temp_slot.patchDownload
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 98b033fc20..12362421d7 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -129,6 +129,7 @@ extern int	recoveryTargetAction;
 extern int	recovery_min_apply_delay;
 extern char *PrimaryConnInfo;
 extern char *PrimarySlotName;
+extern bool wal_receiver_create_temp_slot;
 
 /* indirectly set via GUC system */
 extern TransactionId recoveryTargetXid;
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index e08afc6548..cf3e43128c 100644
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -23,7 +23,6 @@
 #include "utils/tuplestore.h"
 
 /* user-settable parameters */
-extern bool wal_receiver_create_temp_slot;
 extern int	wal_receiver_status_interval;
 extern int	wal_receiver_timeout;
 extern bool hot_standby_feedback;
@@ -321,7 +320,8 @@ extern void ShutdownWalRcv(void);
 extern bool WalRcvStreaming(void);
 extern bool WalRcvRunning(void);
 extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr,
-								 const char *conninfo, const char *slotname);
+								 const char *conninfo, const char *slotname,
+								 bool create_temp_slot);
 extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI);
 extern int	GetReplicationApplyDelay(void);
 extern int	GetReplicationTransferLatency(void);
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 3813eadfb4..e0f3ed5c2a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -283,6 +283,7 @@ bool		StandbyModeRequested = false;
 char	   *PrimaryConnInfo = NULL;
 char	   *PrimarySlotName = NULL;
 char	   *PromoteTriggerFile = NULL;
+bool		wal_receiver_create_temp_slot = false;
 
 /* are we currently in standby mode? */
 bool		StandbyMode = false;
@@ -11901,7 +11902,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 						}
 						curFileTLI = tli;
 						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
+											 PrimarySlotName, wal_receiver_create_temp_slot);
 						receivedUpto = 0;
 					}
 
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 2ab15c3cbb..ff45482faa 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -15,6 +15,12 @@
  * WalRcv->receivedUpto variable in shared memory, to inform the startup
  * process of how far it can proceed with XLOG replay.
  *
+ * WAL receivers cannot load directly GUC parameters used when establishing
+ * their connection to the primary, and rely on parameter values passed down
+ * by the startup process when WAL streaming is requested.  This applies
+ * to for example the replication slot creation and the connection string to
+ * use for the connection with the primary.
+ *
  * If the primary server ends streaming, but doesn't disconnect, walreceiver
  * goes into "waiting" mode, and waits for the startup process to give new
  * instructions. The startup process will treat that the same as
@@ -73,7 +79,6 @@
 
 
 /* GUC variables */
-bool		wal_receiver_create_temp_slot;
 int			wal_receiver_status_interval;
 int			wal_receiver_timeout;
 bool		hot_standby_feedback;
@@ -348,42 +353,23 @@ WalReceiverMain(void)
 		WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI);
 
 		/*
-		 * Create temporary replication slot if no slot name is configured or
-		 * the slot from the previous run was temporary, unless
-		 * wal_receiver_create_temp_slot is disabled.  We also need to handle
-		 * the case where the previous run used a temporary slot but
-		 * wal_receiver_create_temp_slot was changed in the meantime.  In that
-		 * case, we delete the old slot name in shared memory.  (This would
+		 * Create temporary replication slot if requested.  In that
+		 * case, we update slot name in shared memory.  (This would
 		 * all be a bit easier if we just didn't copy the slot name into
 		 * shared memory, since we won't need it again later, but then we
 		 * can't see the slot name in the stats views.)
 		 */
-		if (slotname[0] == '\0' || is_temp_slot)
+		if (is_temp_slot)
 		{
-			bool		changed = false;
+			snprintf(slotname, sizeof(slotname),
+					 "pg_walreceiver_%lld",
+					 (long long int) walrcv_get_backend_pid(wrconn));
 
-			if (wal_receiver_create_temp_slot)
-			{
-				snprintf(slotname, sizeof(slotname),
-						 "pg_walreceiver_%lld",
-						 (long long int) walrcv_get_backend_pid(wrconn));
+			walrcv_create_slot(wrconn, slotname, true, 0, NULL);
 
-				walrcv_create_slot(wrconn, slotname, true, 0, NULL);
-				changed = true;
-			}
-			else if (slotname[0] != '\0')
-			{
-				slotname[0] = '\0';
-				changed = true;
-			}
-
-			if (changed)
-			{
-				SpinLockAcquire(&walrcv->mutex);
-				strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
-				walrcv->is_temp_slot = wal_receiver_create_temp_slot;
-				SpinLockRelease(&walrcv->mutex);
-			}
+			SpinLockAcquire(&walrcv->mutex);
+			strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
+			SpinLockRelease(&walrcv->mutex);
 		}
 
 		/*
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 89c903e45a..821680a857 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -215,13 +215,19 @@ ShutdownWalRcv(void)
 /*
  * Request postmaster to start walreceiver.
  *
- * recptr indicates the position where streaming should begin, conninfo
- * is a libpq connection string to use, and slotname is, optionally, the name
- * of a replication slot to acquire.
+ * "recptr" indicates the position where streaming should begin.  "conninfo"
+ * is a libpq connection string to use.  "slotname" is, optionally, the name
+ * of a replication slot to acquire.  "create_temp_slot" indicates to create
+ * a temporary slot when no "slotname" is given.
+ *
+ * WAL receivers do not directly load GUC parameters used for the connection
+ * to the primary, and rely on the values passed down by the caller of this
+ * routine instead.  Hence, the addition of any new parameters should happen
+ * through this code path.
  */
 void
 RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
-					 const char *slotname)
+					 const char *slotname, bool create_temp_slot)
 {
 	WalRcvData *walrcv = WalRcv;
 	bool		launch = false;
@@ -248,10 +254,22 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
 	else
 		walrcv->conninfo[0] = '\0';
 
+	/*
+	 * Use configured replication slot if present, and ignore the value
+	 * of create_temp_slot as the slot name should be persistent.  Otherwise,
+	 * use create_temp_slot to determine whether this WAL receiver should
+	 * create a temporary slot by itself and use it, or not.
+	 */
 	if (slotname != NULL)
+	{
 		strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
+		walrcv->is_temp_slot = false;
+	}
 	else
+	{
 		walrcv->slotname[0] = '\0';
+		walrcv->is_temp_slot = create_temp_slot;
+	}
 
 	if (walrcv->walRcvState == WALRCV_STOPPED)
 	{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 8228e1f390..ec9cd3696c 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2029,11 +2029,11 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 
 	{
-		{"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY,
+		{"wal_receiver_create_temp_slot", PGC_POSTMASTER, REPLICATION_STANDBY,
 			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
 		},
 		&wal_receiver_create_temp_slot,
-		true,
+		false,
 		NULL, NULL, NULL
 	},
 
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index e1048c0047..1dd2a86567 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -321,7 +321,9 @@
 #max_standby_streaming_delay = 30s	# max delay before canceling queries
 					# when reading streaming WAL;
 					# -1 allows indefinite delay
-#wal_receiver_create_temp_slot = on	# create temp slot if primary_slot_name not set
+#wal_receiver_create_temp_slot = off	# Create temp slot if primary_slot_name
+					# is not set.
+					# (change requires restart)
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index c1128f89ec..683d87c491 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4140,11 +4140,7 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
         Specifies whether a WAL receiver should create a temporary replication
         slot on the remote instance when no permanent replication slot to use
         has been configured (using <xref linkend="guc-primary-slot-name"/>).
-        The default is on.  The only reason to turn this off would be if the
-        remote instance is currently out of available replication slots.  This
-        parameter can only be set in the <filename>postgresql.conf</filename>
-        file or on the server command line.  Changes only take effect when the
-        WAL receiver process starts a new connection.
+        The default is off.  This parameter can only be set at server start.
        </para>
       </listitem>
      </varlistentry>
0002_v9_allow_reload_walreceiver_conninfo.patchtext/x-diff; name=0002_v9_allow_reload_walreceiver_conninfo.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 8b742c83c5..f1d2a77043 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4025,9 +4025,15 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          If <varname>primary_conninfo</varname> is changed while WAL receiver is running,
+          the WAL receiver shuts down and then restarts with new setting,
+          except when primary_conninfo is an empty string.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -4042,9 +4048,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          The WAL receiver is restarted after an update of <varname>primary_slot_name</varname>.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 483fedb218..e1f3e905bb 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -808,6 +808,12 @@ static XLogSource readSource = XLOG_FROM_ANY;
 static XLogSource currentSource = XLOG_FROM_ANY;
 static bool lastSourceFailed = false;
 
+/*
+ * Need for restart running WalReceiver due the configuration change.
+ * Suitable only for XLOG_FROM_STREAM source
+ */
+static bool pendingWalRcvRestart = false;
+
 typedef struct XLogPageReadPrivate
 {
 	int			emode;
@@ -11881,6 +11887,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	for (;;)
 	{
 		XLogSource	oldSource = currentSource;
+		bool		startWalReceiver = false;
 
 		/*
 		 * First check if we failed to read from the current source, and
@@ -11914,54 +11921,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					if (!StandbyMode)
 						return false;
 
-					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName, wal_receiver_create_temp_slot);
-						receivedUpto = 0;
-					}
-
 					/*
 					 * Move to XLOG_FROM_STREAM state in either case. We'll
 					 * get immediate failure if we didn't launch walreceiver,
 					 * and move on to the next state.
 					 */
 					currentSource = XLOG_FROM_STREAM;
+					startWalReceiver = true;
 					break;
 
 				case XLOG_FROM_STREAM:
@@ -12113,7 +12079,69 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					Assert(StandbyMode);
 
 					/*
-					 * Check if WAL receiver is still active.
+					 * shutdown WAL receiver if restart is requested.
+					 */
+					if (!startWalReceiver && pendingWalRcvRestart)
+					{
+						if (WalRcvRunning())
+							ShutdownWalRcv();
+
+						/*
+						 * Re-scan for possible new timelines if we were
+						 * requested to recover to the latest timeline.
+						 */
+						if (recoveryTargetTimeLineGoal ==
+							RECOVERY_TARGET_TIMELINE_LATEST)
+							rescanLatestTimeLine();
+
+						startWalReceiver = true;
+					}
+					pendingWalRcvRestart = false;
+
+					/*
+					 * Launch walreceiver if needed.
+					 *
+					 * If fetching_ckpt is true, RecPtr points to the initial
+					 * checkpoint location. In that case, we use RedoStartLSN
+					 * as the streaming start position instead of RecPtr, so
+					 * that when we later jump backwards to start redo at
+					 * RedoStartLSN, we will have the logs streamed already.
+					 */
+					if (startWalReceiver &&
+						PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+					{
+						XLogRecPtr	ptr;
+						TimeLineID	tli;
+
+						if (fetching_ckpt)
+						{
+							ptr = RedoStartLSN;
+							tli = ControlFile->checkPointCopy.ThisTimeLineID;
+						}
+						else
+						{
+							ptr = RecPtr;
+
+							/*
+							 * Use the record begin position to determine the
+							 * TLI, rather than the position we're reading.
+							 */
+							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+							if (curFileTLI > 0 && tli < curFileTLI)
+								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+									 (uint32) (tliRecPtr >> 32),
+									 (uint32) tliRecPtr,
+									 tli, curFileTLI);
+						}
+						curFileTLI = tli;
+						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+											 PrimarySlotName, wal_receiver_create_temp_slot);
+						receivedUpto = 0;
+					}
+
+					/*
+					 * Check if WAL receiver is active or wait to start up.
 					 */
 					if (!WalRcvStreaming())
 					{
@@ -12241,6 +12269,50 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+/*
+ * Re-read config file and plan to restart running walreceiver if
+ * connection settings was changed.
+ */
+void
+ProcessStartupSigHup(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * We need restart walreceiver if replication settings was changed.
+	 */
+	conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
+	slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
+
+	pfree(conninfo);
+	pfree(slotname);
+
+	if ((conninfoChanged || slotnameChanged) &&
+		currentSource == XLOG_FROM_STREAM
+		&& WalRcvRunning())
+	{
+		if (conninfoChanged && strcmp(PrimaryConnInfo, "") == 0)
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be shut down due to change of %s",
+							"primary_conninfo")));
+		else if (conninfoChanged && slotnameChanged)
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s and %s",
+							"primary_conninfo", "primary_slot_name")));
+		else
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s",
+							conninfoChanged ? "primary_conninfo" : "primary_slot_name")));
+
+		pendingWalRcvRestart = true;
+	}
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index c2250d7d4e..c396fdae9a 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -101,12 +101,12 @@ void
 HandleStartupProcInterrupts(void)
 {
 	/*
-	 * Check if we were requested to re-read config file.
+	 * Process any requests or signals received recently.
 	 */
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		ProcessStartupSigHup();
 	}
 
 	/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 779d19f1c1..0fd9886458 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -671,7 +671,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 			   walrcv->walRcvState == WALRCV_STOPPING);
 		if (walrcv->walRcvState == WALRCV_RESTARTING)
 		{
-			/* we don't expect primary_conninfo to change */
+			/*
+			 * We don't need handle changes of primary_conninfo or
+			 * primary_slotname here. Startup process will shutdown running
+			 * walreceiver in this case.
+			 */
 			*startpoint = walrcv->receiveStart;
 			*startpointTLI = walrcv->receiveStartTLI;
 			walrcv->walRcvState = WALRCV_STREAMING;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 80b6801c82..8d9bdc0eaf 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3697,7 +3697,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3708,7 +3708,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index f2e55d1bd3..31096725fd 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -309,9 +309,7 @@
 # These settings are ignored on a master server.
 
 #primary_conninfo = ''			# connection string to sending server
-					# (change requires restart)
 #primary_slot_name = ''			# replication slot on sending server
-					# (change requires restart)
 #promote_trigger_file = ''		# file name whose presence ends recovery
 #hot_standby = on			# "off" disallows queries during recovery
 					# (change requires restart)
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 12362421d7..94009eb7be 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -320,6 +320,7 @@ extern void SetWalWriterSleeping(bool sleeping);
 
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index d09ebe65a3..52585a1014 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 34;
+use Test::More tests => 35;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -345,6 +347,24 @@ is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
 
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
+
 # Test physical slot advancing and its durability.  Create a new slot on
 # the primary, not used by any of the standbys. This reserves WAL at creation.
 my $phys_slot = 'phys_slot';
#60Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Sergei Kornilov (#59)
Re: allow online change primary_conninfo

On 2020-Mar-17, Sergei Kornilov wrote:

Well, it seems better to move this patch to next commitfest?

What? You want to make wal_receiver_create_temp_slot unchangeable and
default to off in pg13, and delay the patch that fixes those things to
pg14? That makes no sense to me. Please keep them both here so that we
can get things to a usable state.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

In reply to: Alvaro Herrera (#60)
3 attachment(s)
Re: allow online change primary_conninfo

Hello

 Well, it seems better to move this patch to next commitfest?

What? You want to make wal_receiver_create_temp_slot unchangeable and
default to off in pg13, and delay the patch that fixes those things to
pg14? That makes no sense to me.

I want to handle similar things in a similar way.
wal_receiver_create_temp_slot has good design? I will change my patch in same way in this case. But something like that was strongly rejected a year ago.

Please keep them both here so that we can get things to a usable state.

Yes, of course.

Here I attached 3 patches:
0001 is copy from https://commitfest.postgresql.org/27/2456/ It changes wal_receiver_create_temp_slot to PGC_POSTMASTER, changes the default value to off, and moves the logic to the startup process.
0002 changes primary_conninfo and primary_slot_name to be PGC_SIGHUP
0003 changes wal_receiver_create_temp_slot back to be PGC_SIGHUP. Michael Paquier asks to remove this from 0002, you ask to leave it in this thread. So, I made separate patch on top of 0002.

Thank you

regards, Sergei

Attachments:

0003_v9_reload_wal_receiver_create_temp_slot.patchtext/x-diff; name=0003_v9_reload_wal_receiver_create_temp_slot.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index f1d2a77043..eb6b3fb876 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4170,7 +4170,11 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
         Specifies whether a WAL receiver should create a temporary replication
         slot on the remote instance when no permanent replication slot to use
         has been configured (using <xref linkend="guc-primary-slot-name"/>).
-        The default is off.  This parameter can only be set at server start.
+        The default is off.  This parameter can only be set in the 
+        <filename>postgresql.conf</filename> file or on the server command line.
+       </para>
+       <para>
+        The WAL receiver is restarted after an update of <varname>wal_receiver_create_temp_slot</varname>.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index e1f3e905bb..47d69d92d2 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -12278,8 +12278,10 @@ ProcessStartupSigHup(void)
 {
 	char	   *conninfo = pstrdup(PrimaryConnInfo);
 	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		tempSlot = wal_receiver_create_temp_slot;
 	bool		conninfoChanged;
 	bool		slotnameChanged;
+	bool		tempSlotChanged = false;
 
 	ProcessConfigFile(PGC_SIGHUP);
 
@@ -12289,10 +12291,17 @@ ProcessStartupSigHup(void)
 	conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
 	slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
 
+	/*
+	 * wal_receiver_create_temp_slot is used only when we have no slot
+	 * configured. We do not need to track this change if it has no effect.
+	 */
+	if (!slotnameChanged && strcmp(PrimarySlotName, "") == 0)
+		tempSlotChanged = (tempSlot != wal_receiver_create_temp_slot);
+
 	pfree(conninfo);
 	pfree(slotname);
 
-	if ((conninfoChanged || slotnameChanged) &&
+	if ((conninfoChanged || slotnameChanged || tempSlotChanged) &&
 		currentSource == XLOG_FROM_STREAM
 		&& WalRcvRunning())
 	{
@@ -12300,14 +12309,16 @@ ProcessStartupSigHup(void)
 			ereport(LOG,
 					(errmsg("The WAL receiver is going to be shut down due to change of %s",
 							"primary_conninfo")));
-		else if (conninfoChanged && slotnameChanged)
+		else if (conninfoChanged && (slotnameChanged || tempSlotChanged))
 			ereport(LOG,
 					(errmsg("The WAL receiver is going to be restarted due to change of %s and %s",
-							"primary_conninfo", "primary_slot_name")));
+							"primary_conninfo",
+							slotnameChanged ? "primary_slot_name" : "wal_receiver_create_temp_slot")));
 		else
 			ereport(LOG,
 					(errmsg("The WAL receiver is going to be restarted due to change of %s",
-							conninfoChanged ? "primary_conninfo" : "primary_slot_name")));
+							conninfoChanged ? "primary_conninfo"
+							: (slotnameChanged ? "primary_slot_name" : "wal_receiver_create_temp_slot"))));
 
 		pendingWalRcvRestart = true;
 	}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 8d9bdc0eaf..a3650518c4 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2030,7 +2030,7 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 
 	{
-		{"wal_receiver_create_temp_slot", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
 		},
 		&wal_receiver_create_temp_slot,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 31096725fd..f01e43b818 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -321,7 +321,6 @@
 					# -1 allows indefinite delay
 #wal_receiver_create_temp_slot = off	# Create temp slot if primary_slot_name
 					# is not set.
-					# (change requires restart)
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
0002_v9_allow_reload_walreceiver_conninfo.patchtext/x-diff; name=0002_v9_allow_reload_walreceiver_conninfo.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 8b742c83c5..f1d2a77043 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4025,9 +4025,15 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          If <varname>primary_conninfo</varname> is changed while WAL receiver is running,
+          the WAL receiver shuts down and then restarts with new setting,
+          except when primary_conninfo is an empty string.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -4042,9 +4048,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          The WAL receiver is restarted after an update of <varname>primary_slot_name</varname>.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 483fedb218..e1f3e905bb 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -808,6 +808,12 @@ static XLogSource readSource = XLOG_FROM_ANY;
 static XLogSource currentSource = XLOG_FROM_ANY;
 static bool lastSourceFailed = false;
 
+/*
+ * Need for restart running WalReceiver due the configuration change.
+ * Suitable only for XLOG_FROM_STREAM source
+ */
+static bool pendingWalRcvRestart = false;
+
 typedef struct XLogPageReadPrivate
 {
 	int			emode;
@@ -11881,6 +11887,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	for (;;)
 	{
 		XLogSource	oldSource = currentSource;
+		bool		startWalReceiver = false;
 
 		/*
 		 * First check if we failed to read from the current source, and
@@ -11914,54 +11921,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					if (!StandbyMode)
 						return false;
 
-					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName, wal_receiver_create_temp_slot);
-						receivedUpto = 0;
-					}
-
 					/*
 					 * Move to XLOG_FROM_STREAM state in either case. We'll
 					 * get immediate failure if we didn't launch walreceiver,
 					 * and move on to the next state.
 					 */
 					currentSource = XLOG_FROM_STREAM;
+					startWalReceiver = true;
 					break;
 
 				case XLOG_FROM_STREAM:
@@ -12113,7 +12079,69 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					Assert(StandbyMode);
 
 					/*
-					 * Check if WAL receiver is still active.
+					 * shutdown WAL receiver if restart is requested.
+					 */
+					if (!startWalReceiver && pendingWalRcvRestart)
+					{
+						if (WalRcvRunning())
+							ShutdownWalRcv();
+
+						/*
+						 * Re-scan for possible new timelines if we were
+						 * requested to recover to the latest timeline.
+						 */
+						if (recoveryTargetTimeLineGoal ==
+							RECOVERY_TARGET_TIMELINE_LATEST)
+							rescanLatestTimeLine();
+
+						startWalReceiver = true;
+					}
+					pendingWalRcvRestart = false;
+
+					/*
+					 * Launch walreceiver if needed.
+					 *
+					 * If fetching_ckpt is true, RecPtr points to the initial
+					 * checkpoint location. In that case, we use RedoStartLSN
+					 * as the streaming start position instead of RecPtr, so
+					 * that when we later jump backwards to start redo at
+					 * RedoStartLSN, we will have the logs streamed already.
+					 */
+					if (startWalReceiver &&
+						PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+					{
+						XLogRecPtr	ptr;
+						TimeLineID	tli;
+
+						if (fetching_ckpt)
+						{
+							ptr = RedoStartLSN;
+							tli = ControlFile->checkPointCopy.ThisTimeLineID;
+						}
+						else
+						{
+							ptr = RecPtr;
+
+							/*
+							 * Use the record begin position to determine the
+							 * TLI, rather than the position we're reading.
+							 */
+							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+							if (curFileTLI > 0 && tli < curFileTLI)
+								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+									 (uint32) (tliRecPtr >> 32),
+									 (uint32) tliRecPtr,
+									 tli, curFileTLI);
+						}
+						curFileTLI = tli;
+						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+											 PrimarySlotName, wal_receiver_create_temp_slot);
+						receivedUpto = 0;
+					}
+
+					/*
+					 * Check if WAL receiver is active or wait to start up.
 					 */
 					if (!WalRcvStreaming())
 					{
@@ -12241,6 +12269,50 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+/*
+ * Re-read config file and plan to restart running walreceiver if
+ * connection settings was changed.
+ */
+void
+ProcessStartupSigHup(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	/*
+	 * We need restart walreceiver if replication settings was changed.
+	 */
+	conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
+	slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
+
+	pfree(conninfo);
+	pfree(slotname);
+
+	if ((conninfoChanged || slotnameChanged) &&
+		currentSource == XLOG_FROM_STREAM
+		&& WalRcvRunning())
+	{
+		if (conninfoChanged && strcmp(PrimaryConnInfo, "") == 0)
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be shut down due to change of %s",
+							"primary_conninfo")));
+		else if (conninfoChanged && slotnameChanged)
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s and %s",
+							"primary_conninfo", "primary_slot_name")));
+		else
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s",
+							conninfoChanged ? "primary_conninfo" : "primary_slot_name")));
+
+		pendingWalRcvRestart = true;
+	}
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index c2250d7d4e..c396fdae9a 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -101,12 +101,12 @@ void
 HandleStartupProcInterrupts(void)
 {
 	/*
-	 * Check if we were requested to re-read config file.
+	 * Process any requests or signals received recently.
 	 */
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		ProcessStartupSigHup();
 	}
 
 	/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 779d19f1c1..0fd9886458 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -671,7 +671,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 			   walrcv->walRcvState == WALRCV_STOPPING);
 		if (walrcv->walRcvState == WALRCV_RESTARTING)
 		{
-			/* we don't expect primary_conninfo to change */
+			/*
+			 * We don't need handle changes of primary_conninfo or
+			 * primary_slotname here. Startup process will shutdown running
+			 * walreceiver in this case.
+			 */
 			*startpoint = walrcv->receiveStart;
 			*startpointTLI = walrcv->receiveStartTLI;
 			walrcv->walRcvState = WALRCV_STREAMING;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 80b6801c82..8d9bdc0eaf 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3697,7 +3697,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3708,7 +3708,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index f2e55d1bd3..31096725fd 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -309,9 +309,7 @@
 # These settings are ignored on a master server.
 
 #primary_conninfo = ''			# connection string to sending server
-					# (change requires restart)
 #primary_slot_name = ''			# replication slot on sending server
-					# (change requires restart)
 #promote_trigger_file = ''		# file name whose presence ends recovery
 #hot_standby = on			# "off" disallows queries during recovery
 					# (change requires restart)
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 12362421d7..94009eb7be 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -320,6 +320,7 @@ extern void SetWalWriterSleeping(bool sleeping);
 
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index d09ebe65a3..52585a1014 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 34;
+use Test::More tests => 35;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -345,6 +347,24 @@ is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
 
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
+
 # Test physical slot advancing and its durability.  Create a new slot on
 # the primary, not used by any of the standbys. This reserves WAL at creation.
 my $phys_slot = 'phys_slot';
0001-v3-change-wal_receiver_create_temp_slot.patchtext/x-diff; name=0001-v3-change-wal_receiver_create_temp_slot.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 672bf6f1ee..8b742c83c5 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4160,11 +4160,7 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
         Specifies whether a WAL receiver should create a temporary replication
         slot on the remote instance when no permanent replication slot to use
         has been configured (using <xref linkend="guc-primary-slot-name"/>).
-        The default is on.  The only reason to turn this off would be if the
-        remote instance is currently out of available replication slots.  This
-        parameter can only be set in the <filename>postgresql.conf</filename>
-        file or on the server command line.  Changes only take effect when the
-        WAL receiver process starts a new connection.
+        The default is off.  This parameter can only be set at server start.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index de2d4ee582..483fedb218 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -284,6 +284,7 @@ bool		StandbyModeRequested = false;
 char	   *PrimaryConnInfo = NULL;
 char	   *PrimarySlotName = NULL;
 char	   *PromoteTriggerFile = NULL;
+bool		wal_receiver_create_temp_slot = false;
 
 /* are we currently in standby mode? */
 bool		StandbyMode = false;
@@ -11951,7 +11952,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 						}
 						curFileTLI = tli;
 						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
+											 PrimarySlotName, wal_receiver_create_temp_slot);
 						receivedUpto = 0;
 					}
 
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 25e0333c9e..779d19f1c1 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -15,6 +15,12 @@
  * WalRcv->receivedUpto variable in shared memory, to inform the startup
  * process of how far it can proceed with XLOG replay.
  *
+ * WAL receivers cannot load directly GUC parameters used when establishing
+ * their connection to the primary, and rely on parameter values passed down
+ * by the startup process when WAL streaming is requested.  This applies
+ * to for example the replication slot creation and the connection string to
+ * use for the connection with the primary.
+ *
  * If the primary server ends streaming, but doesn't disconnect, walreceiver
  * goes into "waiting" mode, and waits for the startup process to give new
  * instructions. The startup process will treat that the same as
@@ -74,7 +80,6 @@
 
 
 /* GUC variables */
-bool		wal_receiver_create_temp_slot;
 int			wal_receiver_status_interval;
 int			wal_receiver_timeout;
 bool		hot_standby_feedback;
@@ -349,42 +354,23 @@ WalReceiverMain(void)
 		WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI);
 
 		/*
-		 * Create temporary replication slot if no slot name is configured or
-		 * the slot from the previous run was temporary, unless
-		 * wal_receiver_create_temp_slot is disabled.  We also need to handle
-		 * the case where the previous run used a temporary slot but
-		 * wal_receiver_create_temp_slot was changed in the meantime.  In that
-		 * case, we delete the old slot name in shared memory.  (This would
+		 * Create temporary replication slot if requested.  In that
+		 * case, we update slot name in shared memory.  (This would
 		 * all be a bit easier if we just didn't copy the slot name into
 		 * shared memory, since we won't need it again later, but then we
 		 * can't see the slot name in the stats views.)
 		 */
-		if (slotname[0] == '\0' || is_temp_slot)
+		if (is_temp_slot)
 		{
-			bool		changed = false;
+			snprintf(slotname, sizeof(slotname),
+					 "pg_walreceiver_%lld",
+					 (long long int) walrcv_get_backend_pid(wrconn));
 
-			if (wal_receiver_create_temp_slot)
-			{
-				snprintf(slotname, sizeof(slotname),
-						 "pg_walreceiver_%lld",
-						 (long long int) walrcv_get_backend_pid(wrconn));
+			walrcv_create_slot(wrconn, slotname, true, 0, NULL);
 
-				walrcv_create_slot(wrconn, slotname, true, 0, NULL);
-				changed = true;
-			}
-			else if (slotname[0] != '\0')
-			{
-				slotname[0] = '\0';
-				changed = true;
-			}
-
-			if (changed)
-			{
-				SpinLockAcquire(&walrcv->mutex);
-				strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
-				walrcv->is_temp_slot = wal_receiver_create_temp_slot;
-				SpinLockRelease(&walrcv->mutex);
-			}
+			SpinLockAcquire(&walrcv->mutex);
+			strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
+			SpinLockRelease(&walrcv->mutex);
 		}
 
 		/*
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 89c903e45a..21d1823607 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -215,13 +215,19 @@ ShutdownWalRcv(void)
 /*
  * Request postmaster to start walreceiver.
  *
- * recptr indicates the position where streaming should begin, conninfo
- * is a libpq connection string to use, and slotname is, optionally, the name
- * of a replication slot to acquire.
+ * "recptr" indicates the position where streaming should begin.  "conninfo"
+ * is a libpq connection string to use.  "slotname" is, optionally, the name
+ * of a replication slot to acquire.  "create_temp_slot" indicates to create
+ * a temporary slot when no "slotname" is given.
+ *
+ * WAL receivers do not directly load GUC parameters used for the connection
+ * to the primary, and rely on the values passed down by the caller of this
+ * routine instead.  Hence, the addition of any new parameters should happen
+ * through this code path.
  */
 void
 RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
-					 const char *slotname)
+					 const char *slotname, bool create_temp_slot)
 {
 	WalRcvData *walrcv = WalRcv;
 	bool		launch = false;
@@ -248,10 +254,22 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
 	else
 		walrcv->conninfo[0] = '\0';
 
-	if (slotname != NULL)
+	/*
+	 * Use configured replication slot if present, and ignore the value
+	 * of create_temp_slot as the slot name should be persistent.  Otherwise,
+	 * use create_temp_slot to determine whether this WAL receiver should
+	 * create a temporary slot by itself and use it, or not.
+	 */
+	if (slotname != NULL && slotname[0] != '\0')
+	{
 		strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
+		walrcv->is_temp_slot = false;
+	}
 	else
+	{
 		walrcv->slotname[0] = '\0';
+		walrcv->is_temp_slot = create_temp_slot;
+	}
 
 	if (walrcv->walRcvState == WALRCV_STOPPED)
 	{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 68082315ac..80b6801c82 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2030,11 +2030,11 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 
 	{
-		{"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY,
+		{"wal_receiver_create_temp_slot", PGC_POSTMASTER, REPLICATION_STANDBY,
 			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
 		},
 		&wal_receiver_create_temp_slot,
-		true,
+		false,
 		NULL, NULL, NULL
 	},
 
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index aa44f0c9bf..f2e55d1bd3 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -321,7 +321,9 @@
 #max_standby_streaming_delay = 30s	# max delay before canceling queries
 					# when reading streaming WAL;
 					# -1 allows indefinite delay
-#wal_receiver_create_temp_slot = on	# create temp slot if primary_slot_name not set
+#wal_receiver_create_temp_slot = off	# Create temp slot if primary_slot_name
+					# is not set.
+					# (change requires restart)
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 98b033fc20..12362421d7 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -129,6 +129,7 @@ extern int	recoveryTargetAction;
 extern int	recovery_min_apply_delay;
 extern char *PrimaryConnInfo;
 extern char *PrimarySlotName;
+extern bool wal_receiver_create_temp_slot;
 
 /* indirectly set via GUC system */
 extern TransactionId recoveryTargetXid;
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index e08afc6548..cf3e43128c 100644
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -23,7 +23,6 @@
 #include "utils/tuplestore.h"
 
 /* user-settable parameters */
-extern bool wal_receiver_create_temp_slot;
 extern int	wal_receiver_status_interval;
 extern int	wal_receiver_timeout;
 extern bool hot_standby_feedback;
@@ -321,7 +320,8 @@ extern void ShutdownWalRcv(void);
 extern bool WalRcvStreaming(void);
 extern bool WalRcvRunning(void);
 extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr,
-								 const char *conninfo, const char *slotname);
+								 const char *conninfo, const char *slotname,
+								 bool create_temp_slot);
 extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI);
 extern int	GetReplicationApplyDelay(void);
 extern int	GetReplicationTransferLatency(void);
#62Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Sergei Kornilov (#61)
Re: allow online change primary_conninfo

I think the startup sighup handler should be in startup.c, not xlog.c,
which has enough random code already. We can add an accessor in xlog.c
to let changing the walrcv status flag, to be called from the signal
handler.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#63Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Alvaro Herrera (#62)
3 attachment(s)
Re: allow online change primary_conninfo

On 2020-Mar-24, Alvaro Herrera wrote:

I think the startup sighup handler should be in startup.c, not xlog.c,
which has enough random code already. We can add an accessor in xlog.c
to let changing the walrcv status flag, to be called from the signal
handler.

... as in the attached version.

Sergei included LOG messages to indicate which setting has been changed.
I put these in "#if 0" for now, but I'm thinking to remove these
altogether; we already have LOG messages when a setting changes value,
per ProcessConfigFileInternal(); the log sequence looks like this, taken
from tmp_check/log/001_stream_rep_standby_2.log after running the tests:

2020-03-25 20:54:19.413 -03 [17493] LOG: received SIGHUP, reloading configuration files
2020-03-25 20:54:19.415 -03 [17493] LOG: parameter "primary_slot_name" changed to "standby_2"
2020-03-25 20:54:19.415 -03 [17493] LOG: parameter "wal_receiver_status_interval" changed to "1"
2020-03-25 20:54:19.422 -03 [17569] LOG: started streaming WAL from primary at 0/3000000 on timeline 1
2020-03-25 20:54:19.426 -03 [17494] LOG: wal receiver process shutdown requested
2020-03-25 20:54:19.426 -03 [17569] FATAL: terminating walreceiver process due to administrator command
2020-03-25 20:54:19.433 -03 [17572] LOG: started streaming WAL from primary at 0/3000000 on timeline 1

which looks sufficient. Maybe we can reword that new message, say "wal
receiver process shutdown forced by parameter change". Not sure if we
can or should adjust the FATAL line; probably not worth the trouble.

Thoughts welcome. I'm thinking on getting this applied shortly.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Attachments:

v10-0001-Set-wal_receiver_create_temp_slot-PGC_POSTMASTER.patchtext/x-diff; charset=us-asciiDownload
From 0bd43454e2981d9551b12424902d537674d48e31 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Wed, 25 Mar 2020 20:48:44 -0300
Subject: [PATCH v10 1/3] Set wal_receiver_create_temp_slot PGC_POSTMASTER

and off by default
---
 doc/src/sgml/config.sgml                      |  6 +--
 src/backend/access/transam/xlog.c             |  3 +-
 src/backend/replication/walreceiver.c         | 46 +++++++------------
 src/backend/replication/walreceiverfuncs.c    | 28 +++++++++--
 src/backend/utils/misc/guc.c                  |  4 +-
 src/backend/utils/misc/postgresql.conf.sample |  4 +-
 src/include/access/xlog.h                     |  1 +
 src/include/replication/walreceiver.h         |  4 +-
 8 files changed, 50 insertions(+), 46 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 355b408b0a..b5de6c3237 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4163,11 +4163,7 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
         Specifies whether a WAL receiver should create a temporary replication
         slot on the remote instance when no permanent replication slot to use
         has been configured (using <xref linkend="guc-primary-slot-name"/>).
-        The default is on.  The only reason to turn this off would be if the
-        remote instance is currently out of available replication slots.  This
-        parameter can only be set in the <filename>postgresql.conf</filename>
-        file or on the server command line.  Changes only take effect when the
-        WAL receiver process starts a new connection.
+        The default is off.  This parameter can only be set at server start.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7621fc05e2..19df1d5a80 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -290,6 +290,7 @@ bool		StandbyModeRequested = false;
 char	   *PrimaryConnInfo = NULL;
 char	   *PrimarySlotName = NULL;
 char	   *PromoteTriggerFile = NULL;
+bool		wal_receiver_create_temp_slot = false;
 
 /* are we currently in standby mode? */
 bool		StandbyMode = false;
@@ -11975,7 +11976,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 						}
 						curFileTLI = tli;
 						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
+											 PrimarySlotName, wal_receiver_create_temp_slot);
 						receivedUpto = 0;
 					}
 
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 25e0333c9e..779d19f1c1 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -15,6 +15,12 @@
  * WalRcv->receivedUpto variable in shared memory, to inform the startup
  * process of how far it can proceed with XLOG replay.
  *
+ * WAL receivers cannot load directly GUC parameters used when establishing
+ * their connection to the primary, and rely on parameter values passed down
+ * by the startup process when WAL streaming is requested.  This applies
+ * to for example the replication slot creation and the connection string to
+ * use for the connection with the primary.
+ *
  * If the primary server ends streaming, but doesn't disconnect, walreceiver
  * goes into "waiting" mode, and waits for the startup process to give new
  * instructions. The startup process will treat that the same as
@@ -74,7 +80,6 @@
 
 
 /* GUC variables */
-bool		wal_receiver_create_temp_slot;
 int			wal_receiver_status_interval;
 int			wal_receiver_timeout;
 bool		hot_standby_feedback;
@@ -349,42 +354,23 @@ WalReceiverMain(void)
 		WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI);
 
 		/*
-		 * Create temporary replication slot if no slot name is configured or
-		 * the slot from the previous run was temporary, unless
-		 * wal_receiver_create_temp_slot is disabled.  We also need to handle
-		 * the case where the previous run used a temporary slot but
-		 * wal_receiver_create_temp_slot was changed in the meantime.  In that
-		 * case, we delete the old slot name in shared memory.  (This would
+		 * Create temporary replication slot if requested.  In that
+		 * case, we update slot name in shared memory.  (This would
 		 * all be a bit easier if we just didn't copy the slot name into
 		 * shared memory, since we won't need it again later, but then we
 		 * can't see the slot name in the stats views.)
 		 */
-		if (slotname[0] == '\0' || is_temp_slot)
+		if (is_temp_slot)
 		{
-			bool		changed = false;
+			snprintf(slotname, sizeof(slotname),
+					 "pg_walreceiver_%lld",
+					 (long long int) walrcv_get_backend_pid(wrconn));
 
-			if (wal_receiver_create_temp_slot)
-			{
-				snprintf(slotname, sizeof(slotname),
-						 "pg_walreceiver_%lld",
-						 (long long int) walrcv_get_backend_pid(wrconn));
+			walrcv_create_slot(wrconn, slotname, true, 0, NULL);
 
-				walrcv_create_slot(wrconn, slotname, true, 0, NULL);
-				changed = true;
-			}
-			else if (slotname[0] != '\0')
-			{
-				slotname[0] = '\0';
-				changed = true;
-			}
-
-			if (changed)
-			{
-				SpinLockAcquire(&walrcv->mutex);
-				strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
-				walrcv->is_temp_slot = wal_receiver_create_temp_slot;
-				SpinLockRelease(&walrcv->mutex);
-			}
+			SpinLockAcquire(&walrcv->mutex);
+			strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
+			SpinLockRelease(&walrcv->mutex);
 		}
 
 		/*
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 89c903e45a..21d1823607 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -215,13 +215,19 @@ ShutdownWalRcv(void)
 /*
  * Request postmaster to start walreceiver.
  *
- * recptr indicates the position where streaming should begin, conninfo
- * is a libpq connection string to use, and slotname is, optionally, the name
- * of a replication slot to acquire.
+ * "recptr" indicates the position where streaming should begin.  "conninfo"
+ * is a libpq connection string to use.  "slotname" is, optionally, the name
+ * of a replication slot to acquire.  "create_temp_slot" indicates to create
+ * a temporary slot when no "slotname" is given.
+ *
+ * WAL receivers do not directly load GUC parameters used for the connection
+ * to the primary, and rely on the values passed down by the caller of this
+ * routine instead.  Hence, the addition of any new parameters should happen
+ * through this code path.
  */
 void
 RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
-					 const char *slotname)
+					 const char *slotname, bool create_temp_slot)
 {
 	WalRcvData *walrcv = WalRcv;
 	bool		launch = false;
@@ -248,10 +254,22 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
 	else
 		walrcv->conninfo[0] = '\0';
 
-	if (slotname != NULL)
+	/*
+	 * Use configured replication slot if present, and ignore the value
+	 * of create_temp_slot as the slot name should be persistent.  Otherwise,
+	 * use create_temp_slot to determine whether this WAL receiver should
+	 * create a temporary slot by itself and use it, or not.
+	 */
+	if (slotname != NULL && slotname[0] != '\0')
+	{
 		strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
+		walrcv->is_temp_slot = false;
+	}
 	else
+	{
 		walrcv->slotname[0] = '\0';
+		walrcv->is_temp_slot = create_temp_slot;
+	}
 
 	if (walrcv->walRcvState == WALRCV_STOPPED)
 	{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index af876d1f01..2c3cbbaa68 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2050,11 +2050,11 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 
 	{
-		{"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY,
+		{"wal_receiver_create_temp_slot", PGC_POSTMASTER, REPLICATION_STANDBY,
 			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
 		},
 		&wal_receiver_create_temp_slot,
-		true,
+		false,
 		NULL, NULL, NULL
 	},
 
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index aa44f0c9bf..f2e55d1bd3 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -321,7 +321,9 @@
 #max_standby_streaming_delay = 30s	# max delay before canceling queries
 					# when reading streaming WAL;
 					# -1 allows indefinite delay
-#wal_receiver_create_temp_slot = on	# create temp slot if primary_slot_name not set
+#wal_receiver_create_temp_slot = off	# Create temp slot if primary_slot_name
+					# is not set.
+					# (change requires restart)
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 331497bcfb..a4485efc00 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -129,6 +129,7 @@ extern int	recoveryTargetAction;
 extern int	recovery_min_apply_delay;
 extern char *PrimaryConnInfo;
 extern char *PrimarySlotName;
+extern bool wal_receiver_create_temp_slot;
 
 /* indirectly set via GUC system */
 extern TransactionId recoveryTargetXid;
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index e08afc6548..cf3e43128c 100644
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -23,7 +23,6 @@
 #include "utils/tuplestore.h"
 
 /* user-settable parameters */
-extern bool wal_receiver_create_temp_slot;
 extern int	wal_receiver_status_interval;
 extern int	wal_receiver_timeout;
 extern bool hot_standby_feedback;
@@ -321,7 +320,8 @@ extern void ShutdownWalRcv(void);
 extern bool WalRcvStreaming(void);
 extern bool WalRcvRunning(void);
 extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr,
-								 const char *conninfo, const char *slotname);
+								 const char *conninfo, const char *slotname,
+								 bool create_temp_slot);
 extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI);
 extern int	GetReplicationApplyDelay(void);
 extern int	GetReplicationTransferLatency(void);
-- 
2.20.1

v10-0002-Allow-changing-primary_conninfo-and-primary_slot.patchtext/x-diff; charset=us-asciiDownload
From 2aedb8e73c1b22c49763d09559f5489bbd2e07b7 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Wed, 25 Mar 2020 20:48:56 -0300
Subject: [PATCH v10 2/3] Allow changing primary_conninfo and primary_slot_name
 in SIGHUP

---
 doc/src/sgml/config.sgml                      |  16 +-
 src/backend/access/transam/xlog.c             | 144 ++++++++++++------
 src/backend/access/transam/xlogreader.c       |   6 +-
 src/backend/postmaster/startup.c              |  30 +++-
 src/backend/replication/walreceiver.c         |   6 +-
 src/backend/utils/misc/guc.c                  |   4 +-
 src/backend/utils/misc/postgresql.conf.sample |   2 -
 src/include/access/xlog.h                     |   2 +
 src/test/recovery/t/001_stream_rep.pl         |  24 ++-
 9 files changed, 173 insertions(+), 61 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index b5de6c3237..4d17dafa7e 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4028,9 +4028,15 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          If <varname>primary_conninfo</varname> is changed while WAL receiver is running,
+          the WAL receiver shuts down and then restarts with new setting,
+          except when primary_conninfo is an empty string.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -4045,9 +4051,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          The WAL receiver is restarted after an update of <varname>primary_slot_name</varname>.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 19df1d5a80..fab6187120 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -816,9 +816,13 @@ static XLogSource readSource = XLOG_FROM_ANY;
  * currently have a WAL file open. If lastSourceFailed is set, our last
  * attempt to read from currentSource failed, and we should try another source
  * next.
+ *
+ * pendingWalRcvRestart is set when a config change occurs that requires a
+ * walreceiver restart.  This is only valid in XLOG_FROM_STREAM state.
  */
 static XLogSource currentSource = XLOG_FROM_ANY;
 static bool lastSourceFailed = false;
+static bool pendingWalRcvRestart = false;
 
 typedef struct XLogPageReadPrivate
 {
@@ -11905,6 +11909,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	for (;;)
 	{
 		XLogSource	oldSource = currentSource;
+		bool		startWalReceiver = false;
 
 		/*
 		 * First check if we failed to read from the current source, and
@@ -11939,53 +11944,11 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 						return false;
 
 					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName, wal_receiver_create_temp_slot);
-						receivedUpto = 0;
-					}
-
-					/*
-					 * Move to XLOG_FROM_STREAM state in either case. We'll
-					 * get immediate failure if we didn't launch walreceiver,
-					 * and move on to the next state.
+					 * Move to XLOG_FROM_STREAM state, and set to start a
+					 * walreceiver if necessary.
 					 */
 					currentSource = XLOG_FROM_STREAM;
+					startWalReceiver = true;
 					break;
 
 				case XLOG_FROM_STREAM:
@@ -12137,7 +12100,70 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					Assert(StandbyMode);
 
 					/*
-					 * Check if WAL receiver is still active.
+					 * First, shutdown walreceiver if its restart has been
+					 * requested -- but no point if we're already slated for
+					 * starting it.
+					 */
+					if (pendingWalRcvRestart && !startWalReceiver)
+					{
+						ShutdownWalRcv();
+
+						/*
+						 * Re-scan for possible new timelines if we were
+						 * requested to recover to the latest timeline.
+						 */
+						if (recoveryTargetTimeLineGoal ==
+							RECOVERY_TARGET_TIMELINE_LATEST)
+							rescanLatestTimeLine();
+
+						startWalReceiver = true;
+					}
+					pendingWalRcvRestart = false;
+
+					/*
+					 * Launch walreceiver if needed.
+					 *
+					 * If fetching_ckpt is true, RecPtr points to the initial
+					 * checkpoint location. In that case, we use RedoStartLSN
+					 * as the streaming start position instead of RecPtr, so
+					 * that when we later jump backwards to start redo at
+					 * RedoStartLSN, we will have the logs streamed already.
+					 */
+					if (startWalReceiver &&
+						PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+					{
+						XLogRecPtr	ptr;
+						TimeLineID	tli;
+
+						if (fetching_ckpt)
+						{
+							ptr = RedoStartLSN;
+							tli = ControlFile->checkPointCopy.ThisTimeLineID;
+						}
+						else
+						{
+							ptr = RecPtr;
+
+							/*
+							 * Use the record begin position to determine the
+							 * TLI, rather than the position we're reading.
+							 */
+							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+							if (curFileTLI > 0 && tli < curFileTLI)
+								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+									 (uint32) (tliRecPtr >> 32),
+									 (uint32) tliRecPtr,
+									 tli, curFileTLI);
+						}
+						curFileTLI = tli;
+						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+											 PrimarySlotName, wal_receiver_create_temp_slot);
+						receivedUpto = 0;
+					}
+
+					/*
+					 * Check if WAL receiver is active or wait to start up.
 					 */
 					if (!WalRcvStreaming())
 					{
@@ -12265,6 +12291,32 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+void
+StartupRequestWalReceiverRestart(void)
+{
+	if (currentSource == XLOG_FROM_STREAM && WalRcvRunning())
+	{
+		ereport(LOG,
+				(errmsg("wal receiver process shutdown requested")));
+#if 0
+		if (conninfoChanged && strcmp(PrimaryConnInfo, "") == 0)
+			ereport(LOG,
+					(errmsg("shutting down walreceiver")
+							"primary_conninfo")));
+		else if (conninfoChanged && slotnameChanged)
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s and %s",
+							"primary_conninfo", "primary_slot_name")));
+		else
+			ereport(LOG,
+					(errmsg("The WAL receiver is going to be restarted due to change of %s",
+							conninfoChanged ? "primary_conninfo" : "primary_slot_name")));
+#endif
+
+		pendingWalRcvRestart = true;
+	}
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 32f02256ed..f3fea5132f 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -585,9 +585,9 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
 	/*
 	 * Data is not in our buffer.
 	 *
-	 * Every time we actually read the page, even if we looked at parts of it
-	 * before, we need to do verification as the read_page callback might now
-	 * be rereading data from a different source.
+	 * Every time we actually read the segment, even if we looked at parts of
+	 * it before, we need to do verification as the read_page callback might
+	 * now be rereading data from a different source.
 	 *
 	 * Whenever switching to a new WAL segment, we read the first page of the
 	 * file and validate its header, even if that's not where the target
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 8952676765..f35b5adf7d 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -96,17 +96,43 @@ StartupProcShutdownHandler(SIGNAL_ARGS)
 	errno = save_errno;
 }
 
+/*
+ * Re-read the config file.
+ *
+ * If one of the critical walreceiver options has changed, flag xlog.c
+ * to restart it.
+ */
+static void
+StartupRereadConfig(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	conninfoChanged = strcmp(conninfo, PrimaryConnInfo) != 0;
+	slotnameChanged = strcmp(slotname, PrimarySlotName) != 0;
+
+	pfree(conninfo);
+	pfree(slotname);
+
+	if (conninfoChanged || slotnameChanged)
+		StartupRequestWalReceiverRestart();
+}
+
 /* Handle various signals that might be sent to the startup process */
 void
 HandleStartupProcInterrupts(void)
 {
 	/*
-	 * Check if we were requested to re-read config file.
+	 * Process any requests or signals received recently.
 	 */
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		StartupRereadConfig();
 	}
 
 	/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 779d19f1c1..0fd9886458 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -671,7 +671,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 			   walrcv->walRcvState == WALRCV_STOPPING);
 		if (walrcv->walRcvState == WALRCV_RESTARTING)
 		{
-			/* we don't expect primary_conninfo to change */
+			/*
+			 * We don't need handle changes of primary_conninfo or
+			 * primary_slotname here. Startup process will shutdown running
+			 * walreceiver in this case.
+			 */
 			*startpoint = walrcv->receiveStart;
 			*startpointTLI = walrcv->receiveStartTLI;
 			walrcv->walRcvState = WALRCV_STREAMING;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 2c3cbbaa68..2d149dfaa5 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3717,7 +3717,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3728,7 +3728,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index f2e55d1bd3..31096725fd 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -309,9 +309,7 @@
 # These settings are ignored on a master server.
 
 #primary_conninfo = ''			# connection string to sending server
-					# (change requires restart)
 #primary_slot_name = ''			# replication slot on sending server
-					# (change requires restart)
 #promote_trigger_file = ''		# file name whose presence ends recovery
 #hot_standby = on			# "off" disallows queries during recovery
 					# (change requires restart)
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index a4485efc00..997db214cc 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -319,8 +319,10 @@ extern bool CheckPromoteSignal(void);
 extern void WakeupRecovery(void);
 extern void SetWalWriterSleeping(bool sleeping);
 
+extern void StartupRequestWalReceiverRestart(void);
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index d09ebe65a3..52585a1014 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 34;
+use Test::More tests => 35;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -345,6 +347,24 @@ is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
 
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
+
 # Test physical slot advancing and its durability.  Create a new slot on
 # the primary, not used by any of the standbys. This reserves WAL at creation.
 my $phys_slot = 'phys_slot';
-- 
2.20.1

v10-0003-Set-wal_receiver_create_temp_slot-as-PGC_SIGHUP.patchtext/x-diff; charset=us-asciiDownload
From f5f20625b1f297cac177917c6e9b1f110ffea707 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Wed, 25 Mar 2020 20:54:27 -0300
Subject: [PATCH v10 3/3] Set wal_receiver_create_temp_slot as PGC_SIGHUP

.. using the new infrastructure
---
 doc/src/sgml/config.sgml                      |  6 +++++-
 src/backend/postmaster/startup.c              | 10 +++++++++-
 src/backend/utils/misc/guc.c                  |  2 +-
 src/backend/utils/misc/postgresql.conf.sample |  1 -
 4 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 4d17dafa7e..c5160fe907 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4173,7 +4173,11 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
         Specifies whether a WAL receiver should create a temporary replication
         slot on the remote instance when no permanent replication slot to use
         has been configured (using <xref linkend="guc-primary-slot-name"/>).
-        The default is off.  This parameter can only be set at server start.
+        The default is off.  This parameter can only be set in the 
+        <filename>postgresql.conf</filename> file or on the server command line.
+       </para>
+       <para>
+        The WAL receiver is restarted after an update of <varname>wal_receiver_create_temp_slot</varname>.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index f35b5adf7d..fd9ac35dac 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -107,18 +107,26 @@ StartupRereadConfig(void)
 {
 	char	   *conninfo = pstrdup(PrimaryConnInfo);
 	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		tempSlot = wal_receiver_create_temp_slot;
 	bool		conninfoChanged;
 	bool		slotnameChanged;
+	bool		tempSlotChanged = false;
 
 	ProcessConfigFile(PGC_SIGHUP);
 
 	conninfoChanged = strcmp(conninfo, PrimaryConnInfo) != 0;
 	slotnameChanged = strcmp(slotname, PrimarySlotName) != 0;
 
+	/*
+	 * wal_receiver_create_temp_slot is used only when we have no slot
+	 * configured.  We do not need to track this change if it has no effect.
+	 */
+	if (!slotnameChanged && strcmp(PrimarySlotName, "") == 0)
+		tempSlotChanged = tempSlot != wal_receiver_create_temp_slot;
 	pfree(conninfo);
 	pfree(slotname);
 
-	if (conninfoChanged || slotnameChanged)
+	if (conninfoChanged || slotnameChanged || tempSlotChanged)
 		StartupRequestWalReceiverRestart();
 }
 
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 2d149dfaa5..53665971f5 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2050,7 +2050,7 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 
 	{
-		{"wal_receiver_create_temp_slot", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
 		},
 		&wal_receiver_create_temp_slot,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 31096725fd..f01e43b818 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -321,7 +321,6 @@
 					# -1 allows indefinite delay
 #wal_receiver_create_temp_slot = off	# Create temp slot if primary_slot_name
 					# is not set.
-					# (change requires restart)
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
-- 
2.20.1

#64Michael Paquier
michael@paquier.xyz
In reply to: Alvaro Herrera (#63)
Re: allow online change primary_conninfo

On Wed, Mar 25, 2020 at 09:08:12PM -0300, Alvaro Herrera wrote:

... as in the attached version.

Patch 0001 is one that has already been discussed previously (thanks
for keeping the extra comments about GUCs and WAL receivers). That
looks fine to me.

Sergei included LOG messages to indicate which setting has been changed.
I put these in "#if 0" for now, but I'm thinking to remove these
altogether; we already have LOG messages when a setting changes value,
per ProcessConfigFileInternal(); the log sequence looks like this, taken
from tmp_check/log/001_stream_rep_standby_2.log after running the tests:

Yes, it makes sense to remove any knowledge of GUC updates from
StartupRequestWalReceiverRestart(). I would add a description at the
top of this routine by the way.

+extern void ProcessStartupSigHup(void);
This is declared, but used nowhere in patch 0002.

Wouldn't it be better to be more careful about the NULL-ness of the
string parameters in StartupRereadConfig()?

+   if (!slotnameChanged && strcmp(PrimarySlotName, "") == 0)
+       tempSlotChanged = tempSlot != wal_receiver_create_temp_slot;
I would add parens here for clarity.

which looks sufficient. Maybe we can reword that new message, say "wal
receiver process shutdown forced by parameter change". Not sure if we
can or should adjust the FATAL line; probably not worth the trouble.

There is merit to keep the LOG in StartupRequestWalReceiverRestart()
unchanged, as the routine may get called in the future in some other
context.
--
Michael

In reply to: Alvaro Herrera (#63)
Re: allow online change primary_conninfo

Hello

Thank you! You were ahead of me. I wanted to double-check my changes this morning before posting.

Sergei included LOG messages to indicate which setting has been changed.
I put these in "#if 0" for now, but I'm thinking to remove these
altogether;

No objections.

Not sure if we can or should adjust the FATAL line; probably not worth the trouble.

I think it's ok. walreceiver is terminated exactly due to administrator command.

regards, Sergei

#66Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Sergei Kornilov (#65)
3 attachment(s)
Re: allow online change primary_conninfo

Now, would anyone be too upset if I push these in this other order? I
realized that the reason the tests broke after Sergei's patch is that
recovery/t/001_stream_rep.pl's get_slot_xmins() is broken for temp
walreceiver slots, since it's using the non-temp name it tries to give
to the slot, rather than the temp name under which it is actually
created. The workaround proposed by 0002 is to edit standby_1's config
to set walreceiver's slot to be non-temp.

Thanks to Justin Pryzby for offlist typo corrections.

The reason is that I think I would like to get Sergei's patch pushed
right away (0001+0002, as a single commit); but that I think there's
more to attack in the walreceiver temp slot code than 0003 here, and I
don't want to delay the new feature any longer while I figure out the
fix for that.

(The thing is: if I specify primary_slot_name in the config, why is the
temp walreceiver slot code not obeying that name? I think walreceiver
should create a temp slot, sure, but using the given name rather than
coming up with a random name.)

(The other reason is that I would like to push one patch to fix
walreceiver tmp slot rather than two, setting the thing first this way
and then the opposite way.)

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Attachments:

v11-0001-Allow-changing-primary_conninfo-and-primary_slot.patchtext/x-diff; charset=us-asciiDownload
From f7a9717caa9dd73c00fb5e6a1dddb354ad78d09a Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Wed, 25 Mar 2020 20:48:56 -0300
Subject: [PATCH v11 1/3] Allow changing primary_conninfo and primary_slot_name
 in SIGHUP

---
 doc/src/sgml/config.sgml                      |  16 ++-
 src/backend/access/transam/xlog.c             | 130 +++++++++++-------
 src/backend/access/transam/xlogreader.c       |   6 +-
 src/backend/postmaster/startup.c              |  30 +++-
 src/backend/replication/walreceiver.c         |  12 +-
 src/backend/utils/misc/guc.c                  |   4 +-
 src/backend/utils/misc/postgresql.conf.sample |   2 -
 src/include/access/xlog.h                     |   2 +
 src/test/recovery/t/001_stream_rep.pl         |  24 +++-
 9 files changed, 165 insertions(+), 61 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 355b408b0a..1cf88e953d 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4028,9 +4028,15 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          If <varname>primary_conninfo</varname> is changed while WAL receiver is running,
+          the WAL receiver shuts down and then restarts with new setting,
+          except when primary_conninfo is an empty string.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -4045,9 +4051,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          The WAL receiver is restarted after an update of <varname>primary_slot_name</varname>.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7621fc05e2..bbf8d4eee5 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -815,9 +815,13 @@ static XLogSource readSource = XLOG_FROM_ANY;
  * currently have a WAL file open. If lastSourceFailed is set, our last
  * attempt to read from currentSource failed, and we should try another source
  * next.
+ *
+ * pendingWalRcvRestart is set when a config change occurs that requires a
+ * walreceiver restart.  This is only valid in XLOG_FROM_STREAM state.
  */
 static XLogSource currentSource = XLOG_FROM_ANY;
 static bool lastSourceFailed = false;
+static bool pendingWalRcvRestart = false;
 
 typedef struct XLogPageReadPrivate
 {
@@ -11904,6 +11908,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	for (;;)
 	{
 		XLogSource	oldSource = currentSource;
+		bool		startWalReceiver = false;
 
 		/*
 		 * First check if we failed to read from the current source, and
@@ -11938,53 +11943,11 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 						return false;
 
 					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
-						receivedUpto = 0;
-					}
-
-					/*
-					 * Move to XLOG_FROM_STREAM state in either case. We'll
-					 * get immediate failure if we didn't launch walreceiver,
-					 * and move on to the next state.
+					 * Move to XLOG_FROM_STREAM state, and set to start a
+					 * walreceiver if necessary.
 					 */
 					currentSource = XLOG_FROM_STREAM;
+					startWalReceiver = true;
 					break;
 
 				case XLOG_FROM_STREAM:
@@ -12136,7 +12099,70 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					Assert(StandbyMode);
 
 					/*
-					 * Check if WAL receiver is still active.
+					 * First, shutdown walreceiver if its restart has been
+					 * requested -- but no point if we're already slated for
+					 * starting it.
+					 */
+					if (pendingWalRcvRestart && !startWalReceiver)
+					{
+						ShutdownWalRcv();
+
+						/*
+						 * Re-scan for possible new timelines if we were
+						 * requested to recover to the latest timeline.
+						 */
+						if (recoveryTargetTimeLineGoal ==
+							RECOVERY_TARGET_TIMELINE_LATEST)
+							rescanLatestTimeLine();
+
+						startWalReceiver = true;
+					}
+					pendingWalRcvRestart = false;
+
+					/*
+					 * Launch walreceiver if needed.
+					 *
+					 * If fetching_ckpt is true, RecPtr points to the initial
+					 * checkpoint location. In that case, we use RedoStartLSN
+					 * as the streaming start position instead of RecPtr, so
+					 * that when we later jump backwards to start redo at
+					 * RedoStartLSN, we will have the logs streamed already.
+					 */
+					if (startWalReceiver &&
+						PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+					{
+						XLogRecPtr	ptr;
+						TimeLineID	tli;
+
+						if (fetching_ckpt)
+						{
+							ptr = RedoStartLSN;
+							tli = ControlFile->checkPointCopy.ThisTimeLineID;
+						}
+						else
+						{
+							ptr = RecPtr;
+
+							/*
+							 * Use the record begin position to determine the
+							 * TLI, rather than the position we're reading.
+							 */
+							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+							if (curFileTLI > 0 && tli < curFileTLI)
+								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+									 (uint32) (tliRecPtr >> 32),
+									 (uint32) tliRecPtr,
+									 tli, curFileTLI);
+						}
+						curFileTLI = tli;
+						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+											 PrimarySlotName);
+						receivedUpto = 0;
+					}
+
+					/*
+					 * Check if WAL receiver is active or wait to start up.
 					 */
 					if (!WalRcvStreaming())
 					{
@@ -12264,6 +12290,18 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+void
+StartupRequestWalReceiverRestart(void)
+{
+	if (currentSource == XLOG_FROM_STREAM && WalRcvRunning())
+	{
+		ereport(LOG,
+				(errmsg("wal receiver process shutdown requested")));
+
+		pendingWalRcvRestart = true;
+	}
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 32f02256ed..f3fea5132f 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -585,9 +585,9 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
 	/*
 	 * Data is not in our buffer.
 	 *
-	 * Every time we actually read the page, even if we looked at parts of it
-	 * before, we need to do verification as the read_page callback might now
-	 * be rereading data from a different source.
+	 * Every time we actually read the segment, even if we looked at parts of
+	 * it before, we need to do verification as the read_page callback might
+	 * now be rereading data from a different source.
 	 *
 	 * Whenever switching to a new WAL segment, we read the first page of the
 	 * file and validate its header, even if that's not where the target
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 8952676765..f35b5adf7d 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -96,17 +96,43 @@ StartupProcShutdownHandler(SIGNAL_ARGS)
 	errno = save_errno;
 }
 
+/*
+ * Re-read the config file.
+ *
+ * If one of the critical walreceiver options has changed, flag xlog.c
+ * to restart it.
+ */
+static void
+StartupRereadConfig(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	conninfoChanged = strcmp(conninfo, PrimaryConnInfo) != 0;
+	slotnameChanged = strcmp(slotname, PrimarySlotName) != 0;
+
+	pfree(conninfo);
+	pfree(slotname);
+
+	if (conninfoChanged || slotnameChanged)
+		StartupRequestWalReceiverRestart();
+}
+
 /* Handle various signals that might be sent to the startup process */
 void
 HandleStartupProcInterrupts(void)
 {
 	/*
-	 * Check if we were requested to re-read config file.
+	 * Process any requests or signals received recently.
 	 */
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		StartupRereadConfig();
 	}
 
 	/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 25e0333c9e..b471a49d12 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -15,6 +15,12 @@
  * WalRcv->receivedUpto variable in shared memory, to inform the startup
  * process of how far it can proceed with XLOG replay.
  *
+ * A WAL receiver cannot directly load GUC parameters used when establishing
+ * its connection to the primary. Instead it relies on parameter values
+ * that are passed down by the startup process when streaming is requested.
+ * This applies, for example, to the replication slot and the connection
+ * string to be used for the connection with the primary.
+ *
  * If the primary server ends streaming, but doesn't disconnect, walreceiver
  * goes into "waiting" mode, and waits for the startup process to give new
  * instructions. The startup process will treat that the same as
@@ -685,7 +691,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 			   walrcv->walRcvState == WALRCV_STOPPING);
 		if (walrcv->walRcvState == WALRCV_RESTARTING)
 		{
-			/* we don't expect primary_conninfo to change */
+			/*
+			 * No need to handle changes in primary_conninfo or
+			 * primary_slotname here. Startup process will signal us in case
+			 * those change.
+			 */
 			*startpoint = walrcv->receiveStart;
 			*startpointTLI = walrcv->receiveStartTLI;
 			walrcv->walRcvState = WALRCV_STREAMING;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index af876d1f01..c0b2b75407 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3717,7 +3717,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3728,7 +3728,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index aa44f0c9bf..c4f2f881c9 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -309,9 +309,7 @@
 # These settings are ignored on a master server.
 
 #primary_conninfo = ''			# connection string to sending server
-					# (change requires restart)
 #primary_slot_name = ''			# replication slot on sending server
-					# (change requires restart)
 #promote_trigger_file = ''		# file name whose presence ends recovery
 #hot_standby = on			# "off" disallows queries during recovery
 					# (change requires restart)
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 331497bcfb..42c4df0262 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -318,8 +318,10 @@ extern bool CheckPromoteSignal(void);
 extern void WakeupRecovery(void);
 extern void SetWalWriterSleeping(bool sleeping);
 
+extern void StartupRequestWalReceiverRestart(void);
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index d09ebe65a3..52585a1014 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 34;
+use Test::More tests => 35;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -345,6 +347,24 @@ is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
 
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
+
 # Test physical slot advancing and its durability.  Create a new slot on
 # the primary, not used by any of the standbys. This reserves WAL at creation.
 my $phys_slot = 'phys_slot';
-- 
2.20.1

v11-0002-Fix-tests.patchtext/x-diff; charset=us-asciiDownload
From 14ac73a42247fdfc5d02a19ac1b9b13dc9efeccf Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Thu, 26 Mar 2020 21:04:42 -0300
Subject: [PATCH v11 2/3] Fix tests

---
 src/test/recovery/t/001_stream_rep.pl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 52585a1014..3408eaf551 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -22,6 +22,7 @@ $node_master->backup($backup_name);
 my $node_standby_1 = get_new_node('standby_1');
 $node_standby_1->init_from_backup($node_master, $backup_name,
 	has_streaming => 1);
+$node_standby_1->append_conf('postgresql.conf', "wal_receiver_create_temp_slot = false");
 $node_standby_1->start;
 
 # Take backup of standby 1 (not mandatory, but useful to check if
-- 
2.20.1

v11-0003-Rework-wal_receiver_create_temp_slot-using-new-i.patchtext/x-diff; charset=us-asciiDownload
From 29acdc47bf6a805c9ed24cb20d6a8a01a7fe93fc Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Wed, 25 Mar 2020 20:48:44 -0300
Subject: [PATCH v11 3/3] Rework wal_receiver_create_temp_slot using new
 infrastructure

---
 doc/src/sgml/config.sgml                      | 10 ++---
 src/backend/access/transam/xlog.c             |  3 +-
 src/backend/postmaster/startup.c              | 10 ++++-
 src/backend/replication/walreceiver.c         | 40 +++++--------------
 src/backend/replication/walreceiverfuncs.c    | 28 ++++++++++---
 src/backend/utils/misc/guc.c                  |  2 +-
 src/backend/utils/misc/postgresql.conf.sample |  3 +-
 src/include/access/xlog.h                     |  1 +
 src/include/replication/walreceiver.h         |  4 +-
 9 files changed, 55 insertions(+), 46 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 1cf88e953d..c5160fe907 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4173,11 +4173,11 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
         Specifies whether a WAL receiver should create a temporary replication
         slot on the remote instance when no permanent replication slot to use
         has been configured (using <xref linkend="guc-primary-slot-name"/>).
-        The default is on.  The only reason to turn this off would be if the
-        remote instance is currently out of available replication slots.  This
-        parameter can only be set in the <filename>postgresql.conf</filename>
-        file or on the server command line.  Changes only take effect when the
-        WAL receiver process starts a new connection.
+        The default is off.  This parameter can only be set in the 
+        <filename>postgresql.conf</filename> file or on the server command line.
+       </para>
+       <para>
+        The WAL receiver is restarted after an update of <varname>wal_receiver_create_temp_slot</varname>.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index bbf8d4eee5..2ecb319438 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -290,6 +290,7 @@ bool		StandbyModeRequested = false;
 char	   *PrimaryConnInfo = NULL;
 char	   *PrimarySlotName = NULL;
 char	   *PromoteTriggerFile = NULL;
+bool		wal_receiver_create_temp_slot = false;
 
 /* are we currently in standby mode? */
 bool		StandbyMode = false;
@@ -12157,7 +12158,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 						}
 						curFileTLI = tli;
 						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
+											 PrimarySlotName, wal_receiver_create_temp_slot);
 						receivedUpto = 0;
 					}
 
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index f35b5adf7d..fd9ac35dac 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -107,18 +107,26 @@ StartupRereadConfig(void)
 {
 	char	   *conninfo = pstrdup(PrimaryConnInfo);
 	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		tempSlot = wal_receiver_create_temp_slot;
 	bool		conninfoChanged;
 	bool		slotnameChanged;
+	bool		tempSlotChanged = false;
 
 	ProcessConfigFile(PGC_SIGHUP);
 
 	conninfoChanged = strcmp(conninfo, PrimaryConnInfo) != 0;
 	slotnameChanged = strcmp(slotname, PrimarySlotName) != 0;
 
+	/*
+	 * wal_receiver_create_temp_slot is used only when we have no slot
+	 * configured.  We do not need to track this change if it has no effect.
+	 */
+	if (!slotnameChanged && strcmp(PrimarySlotName, "") == 0)
+		tempSlotChanged = tempSlot != wal_receiver_create_temp_slot;
 	pfree(conninfo);
 	pfree(slotname);
 
-	if (conninfoChanged || slotnameChanged)
+	if (conninfoChanged || slotnameChanged || tempSlotChanged)
 		StartupRequestWalReceiverRestart();
 }
 
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index b471a49d12..0aa0c52c49 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -80,7 +80,6 @@
 
 
 /* GUC variables */
-bool		wal_receiver_create_temp_slot;
 int			wal_receiver_status_interval;
 int			wal_receiver_timeout;
 bool		hot_standby_feedback;
@@ -355,42 +354,23 @@ WalReceiverMain(void)
 		WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI);
 
 		/*
-		 * Create temporary replication slot if no slot name is configured or
-		 * the slot from the previous run was temporary, unless
-		 * wal_receiver_create_temp_slot is disabled.  We also need to handle
-		 * the case where the previous run used a temporary slot but
-		 * wal_receiver_create_temp_slot was changed in the meantime.  In that
-		 * case, we delete the old slot name in shared memory.  (This would
+		 * Create temporary replication slot if requested.  In that
+		 * case, we update slot name in shared memory.  (This would
 		 * all be a bit easier if we just didn't copy the slot name into
 		 * shared memory, since we won't need it again later, but then we
 		 * can't see the slot name in the stats views.)
 		 */
-		if (slotname[0] == '\0' || is_temp_slot)
+		if (is_temp_slot)
 		{
-			bool		changed = false;
+			snprintf(slotname, sizeof(slotname),
+					 "pg_walreceiver_%lld",
+					 (long long int) walrcv_get_backend_pid(wrconn));
 
-			if (wal_receiver_create_temp_slot)
-			{
-				snprintf(slotname, sizeof(slotname),
-						 "pg_walreceiver_%lld",
-						 (long long int) walrcv_get_backend_pid(wrconn));
+			walrcv_create_slot(wrconn, slotname, true, 0, NULL);
 
-				walrcv_create_slot(wrconn, slotname, true, 0, NULL);
-				changed = true;
-			}
-			else if (slotname[0] != '\0')
-			{
-				slotname[0] = '\0';
-				changed = true;
-			}
-
-			if (changed)
-			{
-				SpinLockAcquire(&walrcv->mutex);
-				strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
-				walrcv->is_temp_slot = wal_receiver_create_temp_slot;
-				SpinLockRelease(&walrcv->mutex);
-			}
+			SpinLockAcquire(&walrcv->mutex);
+			strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
+			SpinLockRelease(&walrcv->mutex);
 		}
 
 		/*
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 89c903e45a..21d1823607 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -215,13 +215,19 @@ ShutdownWalRcv(void)
 /*
  * Request postmaster to start walreceiver.
  *
- * recptr indicates the position where streaming should begin, conninfo
- * is a libpq connection string to use, and slotname is, optionally, the name
- * of a replication slot to acquire.
+ * "recptr" indicates the position where streaming should begin.  "conninfo"
+ * is a libpq connection string to use.  "slotname" is, optionally, the name
+ * of a replication slot to acquire.  "create_temp_slot" indicates to create
+ * a temporary slot when no "slotname" is given.
+ *
+ * WAL receivers do not directly load GUC parameters used for the connection
+ * to the primary, and rely on the values passed down by the caller of this
+ * routine instead.  Hence, the addition of any new parameters should happen
+ * through this code path.
  */
 void
 RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
-					 const char *slotname)
+					 const char *slotname, bool create_temp_slot)
 {
 	WalRcvData *walrcv = WalRcv;
 	bool		launch = false;
@@ -248,10 +254,22 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
 	else
 		walrcv->conninfo[0] = '\0';
 
-	if (slotname != NULL)
+	/*
+	 * Use configured replication slot if present, and ignore the value
+	 * of create_temp_slot as the slot name should be persistent.  Otherwise,
+	 * use create_temp_slot to determine whether this WAL receiver should
+	 * create a temporary slot by itself and use it, or not.
+	 */
+	if (slotname != NULL && slotname[0] != '\0')
+	{
 		strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
+		walrcv->is_temp_slot = false;
+	}
 	else
+	{
 		walrcv->slotname[0] = '\0';
+		walrcv->is_temp_slot = create_temp_slot;
+	}
 
 	if (walrcv->walRcvState == WALRCV_STOPPED)
 	{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index c0b2b75407..53665971f5 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2054,7 +2054,7 @@ static struct config_bool ConfigureNamesBool[] =
 			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
 		},
 		&wal_receiver_create_temp_slot,
-		true,
+		false,
 		NULL, NULL, NULL
 	},
 
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index c4f2f881c9..f01e43b818 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -319,7 +319,8 @@
 #max_standby_streaming_delay = 30s	# max delay before canceling queries
 					# when reading streaming WAL;
 					# -1 allows indefinite delay
-#wal_receiver_create_temp_slot = on	# create temp slot if primary_slot_name not set
+#wal_receiver_create_temp_slot = off	# Create temp slot if primary_slot_name
+					# is not set.
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 42c4df0262..997db214cc 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -129,6 +129,7 @@ extern int	recoveryTargetAction;
 extern int	recovery_min_apply_delay;
 extern char *PrimaryConnInfo;
 extern char *PrimarySlotName;
+extern bool wal_receiver_create_temp_slot;
 
 /* indirectly set via GUC system */
 extern TransactionId recoveryTargetXid;
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index e08afc6548..cf3e43128c 100644
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -23,7 +23,6 @@
 #include "utils/tuplestore.h"
 
 /* user-settable parameters */
-extern bool wal_receiver_create_temp_slot;
 extern int	wal_receiver_status_interval;
 extern int	wal_receiver_timeout;
 extern bool hot_standby_feedback;
@@ -321,7 +320,8 @@ extern void ShutdownWalRcv(void);
 extern bool WalRcvStreaming(void);
 extern bool WalRcvRunning(void);
 extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr,
-								 const char *conninfo, const char *slotname);
+								 const char *conninfo, const char *slotname,
+								 bool create_temp_slot);
 extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI);
 extern int	GetReplicationApplyDelay(void);
 extern int	GetReplicationTransferLatency(void);
-- 
2.20.1

#67Michael Paquier
michael@paquier.xyz
In reply to: Alvaro Herrera (#66)
Re: allow online change primary_conninfo

On Thu, Mar 26, 2020 at 09:39:17PM -0300, Alvaro Herrera wrote:

Now, would anyone be too upset if I push these in this other order? I
realized that the reason the tests broke after Sergei's patch is that
recovery/t/001_stream_rep.pl's get_slot_xmins() is broken for temp
walreceiver slots, since it's using the non-temp name it tries to give
to the slot, rather than the temp name under which it is actually
created. The workaround proposed by 0002 is to edit standby_1's config
to set walreceiver's slot to be non-temp.

FWIW, I find a bit strange that the change introduced in
001_stream_rep.pl as of patch 0002 is basically undone in 0003 by
changing the default value of wal_receiver_create_temp_slot.

The reason is that I think I would like to get Sergei's patch pushed
right away (0001+0002, as a single commit); but that I think there's
more to attack in the walreceiver temp slot code than 0003 here, and I
don't want to delay the new feature any longer while I figure out the
fix for that.

Not sure I agree with this approach. I'd rather fix all the existing
known problems first, and then introduce the new features on top of
what we consider to be a clean state. If we lack of time between the
first and second patches, we may have a risk of keeping the code with
the new feature but without the fixes discussed for
wal_receiver_create_temp_slot.

(The thing is: if I specify primary_slot_name in the config, why is the
temp walreceiver slot code not obeying that name? I think walreceiver
should create a temp slot, sure, but using the given name rather than
coming up with a random name.)

Good point. I am not sure either why the current logic has been
chosen. The discussion related to the original patch is in this area:
/messages/by-id/4792e456-d75f-8e6a-2d47-34b8f78c266f@2ndquadrant.com

(The other reason is that I would like to push one patch to fix
walreceiver tmp slot rather than two, setting the thing first this way
and then the opposite way.)

So your problem here is that by applying first 0003 and then 0001-0002
you would finish by switching wal_receiver_create_temp_slot to
PGC_POSTMASTER, and then back to PGC_SIGHUP again?
--
Michael

In reply to: Alvaro Herrera (#66)
Re: allow online change primary_conninfo

Hello

I realized that the reason the tests broke after Sergei's patch is that
recovery/t/001_stream_rep.pl's get_slot_xmins() is broken for temp
walreceiver slots, since it's using the non-temp name it tries to give
to the slot, rather than the temp name under which it is actually
created. The workaround proposed by 0002 is to edit standby_1's config
to set walreceiver's slot to be non-temp.

This is bug in behavior, not in tests.
We need walrcv->is_temp_slot = false; somewhere in RequestXLogStreaming to works correctly.

HEAD is not affected since primary_slot_name cannot be changed online.

(The thing is: if I specify primary_slot_name in the config, why is the
temp walreceiver slot code not obeying that name? I think walreceiver
should create a temp slot, sure, but using the given name rather than
coming up with a random name.)

Hm, interesting idea.

regards, Sergei

In reply to: Sergei Kornilov (#68)
3 attachment(s)
Re: allow online change primary_conninfo

Hello

In other words, patches in reverse order:
0001 will change primary_conninfo and primary_slot_name to reloadable
0002 will move wal_receiver_create_temp_slot logic to startup process (without changing to PGC_POSTMASTER)
0003 is new draft patch: wal_receiver_create_temp_slot will use the given name or generate new one.

regards, Sergei

Attachments:

v12-0003-draft-temp-slot-name.patchtext/x-diff; name=v12-0003-draft-temp-slot-name.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index c5160fe907..d18921725c 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4171,9 +4171,9 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
       <listitem>
        <para>
         Specifies whether a WAL receiver should create a temporary replication
-        slot on the remote instance when no permanent replication slot to use
-        has been configured (using <xref linkend="guc-primary-slot-name"/>).
-        The default is off.  This parameter can only be set in the 
+        slot on the remote instance. The slot will be created with the name
+        configured by <xref linkend="guc-primary-slot-name"/> setting, otherwise
+        with the generated name. The default is off.  This parameter can only be set in the 
         <filename>postgresql.conf</filename> file or on the server command line.
        </para>
        <para>
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 0aa0c52c49..af9b9d586c 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -362,9 +362,10 @@ WalReceiverMain(void)
 		 */
 		if (is_temp_slot)
 		{
-			snprintf(slotname, sizeof(slotname),
-					 "pg_walreceiver_%lld",
-					 (long long int) walrcv_get_backend_pid(wrconn));
+			if (slotname[0] == '\0')
+				snprintf(slotname, sizeof(slotname),
+						"pg_walreceiver_%lld",
+						(long long int) walrcv_get_backend_pid(wrconn));
 
 			walrcv_create_slot(wrconn, slotname, true, 0, NULL);
 
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 21d1823607..28117883e1 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -218,7 +218,7 @@ ShutdownWalRcv(void)
  * "recptr" indicates the position where streaming should begin.  "conninfo"
  * is a libpq connection string to use.  "slotname" is, optionally, the name
  * of a replication slot to acquire.  "create_temp_slot" indicates to create
- * a temporary slot when no "slotname" is given.
+ * a temporary slot.
  *
  * WAL receivers do not directly load GUC parameters used for the connection
  * to the primary, and rely on the values passed down by the caller of this
@@ -254,23 +254,17 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
 	else
 		walrcv->conninfo[0] = '\0';
 
-	/*
-	 * Use configured replication slot if present, and ignore the value
-	 * of create_temp_slot as the slot name should be persistent.  Otherwise,
-	 * use create_temp_slot to determine whether this WAL receiver should
-	 * create a temporary slot by itself and use it, or not.
-	 */
 	if (slotname != NULL && slotname[0] != '\0')
 	{
 		strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
-		walrcv->is_temp_slot = false;
 	}
 	else
 	{
 		walrcv->slotname[0] = '\0';
-		walrcv->is_temp_slot = create_temp_slot;
 	}
 
+	walrcv->is_temp_slot = create_temp_slot;
+
 	if (walrcv->walRcvState == WALRCV_STOPPED)
 	{
 		launch = true;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 53665971f5..51ef6987b5 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2051,7 +2051,7 @@ static struct config_bool ConfigureNamesBool[] =
 
 	{
 		{"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY,
-			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
+			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot."),
 		},
 		&wal_receiver_create_temp_slot,
 		false,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index f01e43b818..5308d602f4 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -319,8 +319,7 @@
 #max_standby_streaming_delay = 30s	# max delay before canceling queries
 					# when reading streaming WAL;
 					# -1 allows indefinite delay
-#wal_receiver_create_temp_slot = off	# Create temp slot if primary_slot_name
-					# is not set.
+#wal_receiver_create_temp_slot = off	# Create temp replication slot
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
v12-0002-Rework-wal_receiver_create_temp_slot-using-new-i.patchtext/x-diff; name=v12-0002-Rework-wal_receiver_create_temp_slot-using-new-i.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 1cf88e953d..c5160fe907 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4173,11 +4173,11 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
         Specifies whether a WAL receiver should create a temporary replication
         slot on the remote instance when no permanent replication slot to use
         has been configured (using <xref linkend="guc-primary-slot-name"/>).
-        The default is on.  The only reason to turn this off would be if the
-        remote instance is currently out of available replication slots.  This
-        parameter can only be set in the <filename>postgresql.conf</filename>
-        file or on the server command line.  Changes only take effect when the
-        WAL receiver process starts a new connection.
+        The default is off.  This parameter can only be set in the 
+        <filename>postgresql.conf</filename> file or on the server command line.
+       </para>
+       <para>
+        The WAL receiver is restarted after an update of <varname>wal_receiver_create_temp_slot</varname>.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index bbf8d4eee5..2ecb319438 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -290,6 +290,7 @@ bool		StandbyModeRequested = false;
 char	   *PrimaryConnInfo = NULL;
 char	   *PrimarySlotName = NULL;
 char	   *PromoteTriggerFile = NULL;
+bool		wal_receiver_create_temp_slot = false;
 
 /* are we currently in standby mode? */
 bool		StandbyMode = false;
@@ -12157,7 +12158,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 						}
 						curFileTLI = tli;
 						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
+											 PrimarySlotName, wal_receiver_create_temp_slot);
 						receivedUpto = 0;
 					}
 
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index f35b5adf7d..0dbf50934e 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -107,8 +107,10 @@ StartupRereadConfig(void)
 {
 	char	   *conninfo = pstrdup(PrimaryConnInfo);
 	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		tempSlot = wal_receiver_create_temp_slot;
 	bool		conninfoChanged;
 	bool		slotnameChanged;
+	bool		tempSlotChanged = false;
 
 	ProcessConfigFile(PGC_SIGHUP);
 
@@ -118,7 +120,14 @@ StartupRereadConfig(void)
 	pfree(conninfo);
 	pfree(slotname);
 
-	if (conninfoChanged || slotnameChanged)
+	/*
+	 * wal_receiver_create_temp_slot is used only when we have no slot
+	 * configured.  We do not need to track this change if it has no effect.
+	 */
+	if (!slotnameChanged && strcmp(PrimarySlotName, "") == 0)
+		tempSlotChanged = tempSlot != wal_receiver_create_temp_slot;
+
+	if (conninfoChanged || slotnameChanged || tempSlotChanged)
 		StartupRequestWalReceiverRestart();
 }
 
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index d11c957b15..0aa0c52c49 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -15,6 +15,12 @@
  * WalRcv->receivedUpto variable in shared memory, to inform the startup
  * process of how far it can proceed with XLOG replay.
  *
+ * A WAL receiver cannot directly load GUC parameters used when establishing
+ * its connection to the primary. Instead it relies on parameter values
+ * that are passed down by the startup process when streaming is requested.
+ * This applies, for example, to the replication slot and the connection
+ * string to be used for the connection with the primary.
+ *
  * If the primary server ends streaming, but doesn't disconnect, walreceiver
  * goes into "waiting" mode, and waits for the startup process to give new
  * instructions. The startup process will treat that the same as
@@ -74,7 +80,6 @@
 
 
 /* GUC variables */
-bool		wal_receiver_create_temp_slot;
 int			wal_receiver_status_interval;
 int			wal_receiver_timeout;
 bool		hot_standby_feedback;
@@ -349,42 +354,23 @@ WalReceiverMain(void)
 		WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI);
 
 		/*
-		 * Create temporary replication slot if no slot name is configured or
-		 * the slot from the previous run was temporary, unless
-		 * wal_receiver_create_temp_slot is disabled.  We also need to handle
-		 * the case where the previous run used a temporary slot but
-		 * wal_receiver_create_temp_slot was changed in the meantime.  In that
-		 * case, we delete the old slot name in shared memory.  (This would
+		 * Create temporary replication slot if requested.  In that
+		 * case, we update slot name in shared memory.  (This would
 		 * all be a bit easier if we just didn't copy the slot name into
 		 * shared memory, since we won't need it again later, but then we
 		 * can't see the slot name in the stats views.)
 		 */
-		if (slotname[0] == '\0' || is_temp_slot)
+		if (is_temp_slot)
 		{
-			bool		changed = false;
+			snprintf(slotname, sizeof(slotname),
+					 "pg_walreceiver_%lld",
+					 (long long int) walrcv_get_backend_pid(wrconn));
 
-			if (wal_receiver_create_temp_slot)
-			{
-				snprintf(slotname, sizeof(slotname),
-						 "pg_walreceiver_%lld",
-						 (long long int) walrcv_get_backend_pid(wrconn));
+			walrcv_create_slot(wrconn, slotname, true, 0, NULL);
 
-				walrcv_create_slot(wrconn, slotname, true, 0, NULL);
-				changed = true;
-			}
-			else if (slotname[0] != '\0')
-			{
-				slotname[0] = '\0';
-				changed = true;
-			}
-
-			if (changed)
-			{
-				SpinLockAcquire(&walrcv->mutex);
-				strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
-				walrcv->is_temp_slot = wal_receiver_create_temp_slot;
-				SpinLockRelease(&walrcv->mutex);
-			}
+			SpinLockAcquire(&walrcv->mutex);
+			strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
+			SpinLockRelease(&walrcv->mutex);
 		}
 
 		/*
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 6926a7b514..21d1823607 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -215,13 +215,19 @@ ShutdownWalRcv(void)
 /*
  * Request postmaster to start walreceiver.
  *
- * recptr indicates the position where streaming should begin, conninfo
- * is a libpq connection string to use, and slotname is, optionally, the name
- * of a replication slot to acquire.
+ * "recptr" indicates the position where streaming should begin.  "conninfo"
+ * is a libpq connection string to use.  "slotname" is, optionally, the name
+ * of a replication slot to acquire.  "create_temp_slot" indicates to create
+ * a temporary slot when no "slotname" is given.
+ *
+ * WAL receivers do not directly load GUC parameters used for the connection
+ * to the primary, and rely on the values passed down by the caller of this
+ * routine instead.  Hence, the addition of any new parameters should happen
+ * through this code path.
  */
 void
 RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
-					 const char *slotname)
+					 const char *slotname, bool create_temp_slot)
 {
 	WalRcvData *walrcv = WalRcv;
 	bool		launch = false;
@@ -248,15 +254,22 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
 	else
 		walrcv->conninfo[0] = '\0';
 
-	if (slotname != NULL)
+	/*
+	 * Use configured replication slot if present, and ignore the value
+	 * of create_temp_slot as the slot name should be persistent.  Otherwise,
+	 * use create_temp_slot to determine whether this WAL receiver should
+	 * create a temporary slot by itself and use it, or not.
+	 */
+	if (slotname != NULL && slotname[0] != '\0')
+	{
 		strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
+		walrcv->is_temp_slot = false;
+	}
 	else
+	{
 		walrcv->slotname[0] = '\0';
-
-	/*
-	 * walreceiver will handle temporary slot by itself
-	 */
-	walrcv->is_temp_slot = false;
+		walrcv->is_temp_slot = create_temp_slot;
+	}
 
 	if (walrcv->walRcvState == WALRCV_STOPPED)
 	{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index c0b2b75407..53665971f5 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2054,7 +2054,7 @@ static struct config_bool ConfigureNamesBool[] =
 			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
 		},
 		&wal_receiver_create_temp_slot,
-		true,
+		false,
 		NULL, NULL, NULL
 	},
 
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index c4f2f881c9..f01e43b818 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -319,7 +319,8 @@
 #max_standby_streaming_delay = 30s	# max delay before canceling queries
 					# when reading streaming WAL;
 					# -1 allows indefinite delay
-#wal_receiver_create_temp_slot = on	# create temp slot if primary_slot_name not set
+#wal_receiver_create_temp_slot = off	# Create temp slot if primary_slot_name
+					# is not set.
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 42c4df0262..997db214cc 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -129,6 +129,7 @@ extern int	recoveryTargetAction;
 extern int	recovery_min_apply_delay;
 extern char *PrimaryConnInfo;
 extern char *PrimarySlotName;
+extern bool wal_receiver_create_temp_slot;
 
 /* indirectly set via GUC system */
 extern TransactionId recoveryTargetXid;
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index e08afc6548..cf3e43128c 100644
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -23,7 +23,6 @@
 #include "utils/tuplestore.h"
 
 /* user-settable parameters */
-extern bool wal_receiver_create_temp_slot;
 extern int	wal_receiver_status_interval;
 extern int	wal_receiver_timeout;
 extern bool hot_standby_feedback;
@@ -321,7 +320,8 @@ extern void ShutdownWalRcv(void);
 extern bool WalRcvStreaming(void);
 extern bool WalRcvRunning(void);
 extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr,
-								 const char *conninfo, const char *slotname);
+								 const char *conninfo, const char *slotname,
+								 bool create_temp_slot);
 extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI);
 extern int	GetReplicationApplyDelay(void);
 extern int	GetReplicationTransferLatency(void);
v12-0001-Allow-changing-primary_conninfo-and-primary_slot.patchtext/x-diff; name=v12-0001-Allow-changing-primary_conninfo-and-primary_slot.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 355b408b0a..1cf88e953d 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4028,9 +4028,15 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          If <varname>primary_conninfo</varname> is changed while WAL receiver is running,
+          the WAL receiver shuts down and then restarts with new setting,
+          except when primary_conninfo is an empty string.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -4045,9 +4051,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          The WAL receiver is restarted after an update of <varname>primary_slot_name</varname>.
          </para>
         </listitem>
        </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7621fc05e2..bbf8d4eee5 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -815,9 +815,13 @@ static XLogSource readSource = XLOG_FROM_ANY;
  * currently have a WAL file open. If lastSourceFailed is set, our last
  * attempt to read from currentSource failed, and we should try another source
  * next.
+ *
+ * pendingWalRcvRestart is set when a config change occurs that requires a
+ * walreceiver restart.  This is only valid in XLOG_FROM_STREAM state.
  */
 static XLogSource currentSource = XLOG_FROM_ANY;
 static bool lastSourceFailed = false;
+static bool pendingWalRcvRestart = false;
 
 typedef struct XLogPageReadPrivate
 {
@@ -11904,6 +11908,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	for (;;)
 	{
 		XLogSource	oldSource = currentSource;
+		bool		startWalReceiver = false;
 
 		/*
 		 * First check if we failed to read from the current source, and
@@ -11938,53 +11943,11 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 						return false;
 
 					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName);
-						receivedUpto = 0;
-					}
-
-					/*
-					 * Move to XLOG_FROM_STREAM state in either case. We'll
-					 * get immediate failure if we didn't launch walreceiver,
-					 * and move on to the next state.
+					 * Move to XLOG_FROM_STREAM state, and set to start a
+					 * walreceiver if necessary.
 					 */
 					currentSource = XLOG_FROM_STREAM;
+					startWalReceiver = true;
 					break;
 
 				case XLOG_FROM_STREAM:
@@ -12136,7 +12099,70 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					Assert(StandbyMode);
 
 					/*
-					 * Check if WAL receiver is still active.
+					 * First, shutdown walreceiver if its restart has been
+					 * requested -- but no point if we're already slated for
+					 * starting it.
+					 */
+					if (pendingWalRcvRestart && !startWalReceiver)
+					{
+						ShutdownWalRcv();
+
+						/*
+						 * Re-scan for possible new timelines if we were
+						 * requested to recover to the latest timeline.
+						 */
+						if (recoveryTargetTimeLineGoal ==
+							RECOVERY_TARGET_TIMELINE_LATEST)
+							rescanLatestTimeLine();
+
+						startWalReceiver = true;
+					}
+					pendingWalRcvRestart = false;
+
+					/*
+					 * Launch walreceiver if needed.
+					 *
+					 * If fetching_ckpt is true, RecPtr points to the initial
+					 * checkpoint location. In that case, we use RedoStartLSN
+					 * as the streaming start position instead of RecPtr, so
+					 * that when we later jump backwards to start redo at
+					 * RedoStartLSN, we will have the logs streamed already.
+					 */
+					if (startWalReceiver &&
+						PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+					{
+						XLogRecPtr	ptr;
+						TimeLineID	tli;
+
+						if (fetching_ckpt)
+						{
+							ptr = RedoStartLSN;
+							tli = ControlFile->checkPointCopy.ThisTimeLineID;
+						}
+						else
+						{
+							ptr = RecPtr;
+
+							/*
+							 * Use the record begin position to determine the
+							 * TLI, rather than the position we're reading.
+							 */
+							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+							if (curFileTLI > 0 && tli < curFileTLI)
+								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+									 (uint32) (tliRecPtr >> 32),
+									 (uint32) tliRecPtr,
+									 tli, curFileTLI);
+						}
+						curFileTLI = tli;
+						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+											 PrimarySlotName);
+						receivedUpto = 0;
+					}
+
+					/*
+					 * Check if WAL receiver is active or wait to start up.
 					 */
 					if (!WalRcvStreaming())
 					{
@@ -12264,6 +12290,18 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+void
+StartupRequestWalReceiverRestart(void)
+{
+	if (currentSource == XLOG_FROM_STREAM && WalRcvRunning())
+	{
+		ereport(LOG,
+				(errmsg("wal receiver process shutdown requested")));
+
+		pendingWalRcvRestart = true;
+	}
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 32f02256ed..f3fea5132f 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -585,9 +585,9 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
 	/*
 	 * Data is not in our buffer.
 	 *
-	 * Every time we actually read the page, even if we looked at parts of it
-	 * before, we need to do verification as the read_page callback might now
-	 * be rereading data from a different source.
+	 * Every time we actually read the segment, even if we looked at parts of
+	 * it before, we need to do verification as the read_page callback might
+	 * now be rereading data from a different source.
 	 *
 	 * Whenever switching to a new WAL segment, we read the first page of the
 	 * file and validate its header, even if that's not where the target
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 8952676765..f35b5adf7d 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -96,17 +96,43 @@ StartupProcShutdownHandler(SIGNAL_ARGS)
 	errno = save_errno;
 }
 
+/*
+ * Re-read the config file.
+ *
+ * If one of the critical walreceiver options has changed, flag xlog.c
+ * to restart it.
+ */
+static void
+StartupRereadConfig(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	conninfoChanged = strcmp(conninfo, PrimaryConnInfo) != 0;
+	slotnameChanged = strcmp(slotname, PrimarySlotName) != 0;
+
+	pfree(conninfo);
+	pfree(slotname);
+
+	if (conninfoChanged || slotnameChanged)
+		StartupRequestWalReceiverRestart();
+}
+
 /* Handle various signals that might be sent to the startup process */
 void
 HandleStartupProcInterrupts(void)
 {
 	/*
-	 * Check if we were requested to re-read config file.
+	 * Process any requests or signals received recently.
 	 */
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		StartupRereadConfig();
 	}
 
 	/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 25e0333c9e..d11c957b15 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -685,7 +685,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 			   walrcv->walRcvState == WALRCV_STOPPING);
 		if (walrcv->walRcvState == WALRCV_RESTARTING)
 		{
-			/* we don't expect primary_conninfo to change */
+			/*
+			 * No need to handle changes in primary_conninfo or
+			 * primary_slotname here. Startup process will signal us in case
+			 * those change.
+			 */
 			*startpoint = walrcv->receiveStart;
 			*startpointTLI = walrcv->receiveStartTLI;
 			walrcv->walRcvState = WALRCV_STREAMING;
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 89c903e45a..6926a7b514 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -253,6 +253,11 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
 	else
 		walrcv->slotname[0] = '\0';
 
+	/*
+	 * walreceiver will handle temporary slot by itself
+	 */
+	walrcv->is_temp_slot = false;
+
 	if (walrcv->walRcvState == WALRCV_STOPPED)
 	{
 		launch = true;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index af876d1f01..c0b2b75407 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3717,7 +3717,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3728,7 +3728,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index aa44f0c9bf..c4f2f881c9 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -309,9 +309,7 @@
 # These settings are ignored on a master server.
 
 #primary_conninfo = ''			# connection string to sending server
-					# (change requires restart)
 #primary_slot_name = ''			# replication slot on sending server
-					# (change requires restart)
 #promote_trigger_file = ''		# file name whose presence ends recovery
 #hot_standby = on			# "off" disallows queries during recovery
 					# (change requires restart)
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 331497bcfb..42c4df0262 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -318,8 +318,10 @@ extern bool CheckPromoteSignal(void);
 extern void WakeupRecovery(void);
 extern void SetWalWriterSleeping(bool sleeping);
 
+extern void StartupRequestWalReceiverRestart(void);
 extern void XLogRequestWalReceiverReply(void);
 
+extern void ProcessStartupSigHup(void);
 extern void assign_max_wal_size(int newval, void *extra);
 extern void assign_checkpoint_completion_target(double newval, void *extra);
 
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index d09ebe65a3..52585a1014 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 34;
+use Test::More tests => 35;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -345,6 +347,24 @@ is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
 
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
+
 # Test physical slot advancing and its durability.  Create a new slot on
 # the primary, not used by any of the standbys. This reserves WAL at creation.
 my $phys_slot = 'phys_slot';
#70Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Sergei Kornilov (#68)
Re: allow online change primary_conninfo

On 2020-Mar-27, Sergei Kornilov wrote:

Hello

I realized that the reason the tests broke after Sergei's patch is that
recovery/t/001_stream_rep.pl's get_slot_xmins() is broken for temp
walreceiver slots, since it's using the non-temp name it tries to give
to the slot, rather than the temp name under which it is actually
created. The workaround proposed by 0002 is to edit standby_1's config
to set walreceiver's slot to be non-temp.

This is bug in behavior, not in tests.
We need walrcv->is_temp_slot = false; somewhere in RequestXLogStreaming to works correctly.

HEAD is not affected since primary_slot_name cannot be changed online.

I pushed the wal_receiver_create_temp_slot bugfix, because I realized
after looking for long enough at WalReceiverMain() that the code was
beyond saving. I'll be pushing the rest of this later today.

Thanks,

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#71Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Alvaro Herrera (#70)
1 attachment(s)
Re: allow online change primary_conninfo

On 2020-Mar-27, Alvaro Herrera wrote:

I pushed the wal_receiver_create_temp_slot bugfix, because I realized
after looking for long enough at WalReceiverMain() that the code was
beyond saving. I'll be pushing the rest of this later today.

So here's the next one. I still have to go over the doc changes here,
but at least the tests are passing for me.

I think I should set aside your new draft for now, but I think we should
still get it in pg13 to avoid having the change the semantics of the
walreceiver temp slot in the next release.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Attachments:

v13-0001-Allow-changing-primary_conninfo-and-primary_slot.patchtext/x-diff; charset=us-asciiDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index b5de6c3237..c5160fe907 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4028,9 +4028,15 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           <varname>primary_conninfo</varname> string.
          </para>
          <para>
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if the server is not in standby mode.
          </para>
+         <para>
+          If <varname>primary_conninfo</varname> is changed while WAL receiver is running,
+          the WAL receiver shuts down and then restarts with new setting,
+          except when primary_conninfo is an empty string.
+         </para>
         </listitem>
        </varlistentry>
        <varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -4045,9 +4051,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
           connecting to the sending server via streaming replication to control
           resource removal on the upstream node
           (see <xref linkend="streaming-replication-slots"/>).
-          This parameter can only be set at server start.
+          This parameter can only be set in the <filename>postgresql.conf</filename>
+          file or on the server command line.
           This setting has no effect if <varname>primary_conninfo</varname> is not
-          set.
+          set or the server is not in standby mode.
+         </para>
+         <para>
+          The WAL receiver is restarted after an update of <varname>primary_slot_name</varname>.
          </para>
         </listitem>
        </varlistentry>
@@ -4163,7 +4173,11 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
         Specifies whether a WAL receiver should create a temporary replication
         slot on the remote instance when no permanent replication slot to use
         has been configured (using <xref linkend="guc-primary-slot-name"/>).
-        The default is off.  This parameter can only be set at server start.
+        The default is off.  This parameter can only be set in the 
+        <filename>postgresql.conf</filename> file or on the server command line.
+       </para>
+       <para>
+        The WAL receiver is restarted after an update of <varname>wal_receiver_create_temp_slot</varname>.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index c7d93d3735..d3a299ba4c 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -816,9 +816,13 @@ static XLogSource readSource = XLOG_FROM_ANY;
  * currently have a WAL file open. If lastSourceFailed is set, our last
  * attempt to read from currentSource failed, and we should try another source
  * next.
+ *
+ * pendingWalRcvRestart is set when a config change occurs that requires a
+ * walreceiver restart.  This is only valid in XLOG_FROM_STREAM state.
  */
 static XLogSource currentSource = XLOG_FROM_ANY;
 static bool lastSourceFailed = false;
+static bool pendingWalRcvRestart = false;
 
 typedef struct XLogPageReadPrivate
 {
@@ -11905,6 +11909,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	for (;;)
 	{
 		XLogSource	oldSource = currentSource;
+		bool		startWalReceiver = false;
 
 		/*
 		 * First check if we failed to read from the current source, and
@@ -11939,54 +11944,11 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 						return false;
 
 					/*
-					 * If primary_conninfo is set, launch walreceiver to try
-					 * to stream the missing WAL.
-					 *
-					 * If fetching_ckpt is true, RecPtr points to the initial
-					 * checkpoint location. In that case, we use RedoStartLSN
-					 * as the streaming start position instead of RecPtr, so
-					 * that when we later jump backwards to start redo at
-					 * RedoStartLSN, we will have the logs streamed already.
-					 */
-					if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-					{
-						XLogRecPtr	ptr;
-						TimeLineID	tli;
-
-						if (fetching_ckpt)
-						{
-							ptr = RedoStartLSN;
-							tli = ControlFile->checkPointCopy.ThisTimeLineID;
-						}
-						else
-						{
-							ptr = RecPtr;
-
-							/*
-							 * Use the record begin position to determine the
-							 * TLI, rather than the position we're reading.
-							 */
-							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-									 (uint32) (tliRecPtr >> 32),
-									 (uint32) tliRecPtr,
-									 tli, curFileTLI);
-						}
-						curFileTLI = tli;
-						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-											 PrimarySlotName,
-											 wal_receiver_create_temp_slot);
-						receivedUpto = 0;
-					}
-
-					/*
-					 * Move to XLOG_FROM_STREAM state in either case. We'll
-					 * get immediate failure if we didn't launch walreceiver,
-					 * and move on to the next state.
+					 * Move to XLOG_FROM_STREAM state, and set to start a
+					 * walreceiver if necessary.
 					 */
 					currentSource = XLOG_FROM_STREAM;
+					startWalReceiver = true;
 					break;
 
 				case XLOG_FROM_STREAM:
@@ -12138,7 +12100,71 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					Assert(StandbyMode);
 
 					/*
-					 * Check if WAL receiver is still active.
+					 * First, shutdown walreceiver if its restart has been
+					 * requested -- but no point if we're already slated for
+					 * starting it.
+					 */
+					if (pendingWalRcvRestart && !startWalReceiver)
+					{
+						ShutdownWalRcv();
+
+						/*
+						 * Re-scan for possible new timelines if we were
+						 * requested to recover to the latest timeline.
+						 */
+						if (recoveryTargetTimeLineGoal ==
+							RECOVERY_TARGET_TIMELINE_LATEST)
+							rescanLatestTimeLine();
+
+						startWalReceiver = true;
+					}
+					pendingWalRcvRestart = false;
+
+					/*
+					 * Launch walreceiver if needed.
+					 *
+					 * If fetching_ckpt is true, RecPtr points to the initial
+					 * checkpoint location. In that case, we use RedoStartLSN
+					 * as the streaming start position instead of RecPtr, so
+					 * that when we later jump backwards to start redo at
+					 * RedoStartLSN, we will have the logs streamed already.
+					 */
+					if (startWalReceiver &&
+						PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+					{
+						XLogRecPtr	ptr;
+						TimeLineID	tli;
+
+						if (fetching_ckpt)
+						{
+							ptr = RedoStartLSN;
+							tli = ControlFile->checkPointCopy.ThisTimeLineID;
+						}
+						else
+						{
+							ptr = RecPtr;
+
+							/*
+							 * Use the record begin position to determine the
+							 * TLI, rather than the position we're reading.
+							 */
+							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+							if (curFileTLI > 0 && tli < curFileTLI)
+								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+									 (uint32) (tliRecPtr >> 32),
+									 (uint32) tliRecPtr,
+									 tli, curFileTLI);
+						}
+						curFileTLI = tli;
+						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+											 PrimarySlotName,
+											 wal_receiver_create_temp_slot);
+						receivedUpto = 0;
+					}
+
+					/*
+					 * Check if WAL receiver is active or wait to start up.
 					 */
 					if (!WalRcvStreaming())
 					{
@@ -12266,6 +12292,18 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 	return false;				/* not reached */
 }
 
+void
+StartupRequestWalReceiverRestart(void)
+{
+	if (currentSource == XLOG_FROM_STREAM && WalRcvRunning())
+	{
+		ereport(LOG,
+				(errmsg("wal receiver process shutdown requested")));
+
+		pendingWalRcvRestart = true;
+	}
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 32f02256ed..f3fea5132f 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -585,9 +585,9 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
 	/*
 	 * Data is not in our buffer.
 	 *
-	 * Every time we actually read the page, even if we looked at parts of it
-	 * before, we need to do verification as the read_page callback might now
-	 * be rereading data from a different source.
+	 * Every time we actually read the segment, even if we looked at parts of
+	 * it before, we need to do verification as the read_page callback might
+	 * now be rereading data from a different source.
 	 *
 	 * Whenever switching to a new WAL segment, we read the first page of the
 	 * file and validate its header, even if that's not where the target
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 8952676765..fd9ac35dac 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -96,17 +96,51 @@ StartupProcShutdownHandler(SIGNAL_ARGS)
 	errno = save_errno;
 }
 
+/*
+ * Re-read the config file.
+ *
+ * If one of the critical walreceiver options has changed, flag xlog.c
+ * to restart it.
+ */
+static void
+StartupRereadConfig(void)
+{
+	char	   *conninfo = pstrdup(PrimaryConnInfo);
+	char	   *slotname = pstrdup(PrimarySlotName);
+	bool		tempSlot = wal_receiver_create_temp_slot;
+	bool		conninfoChanged;
+	bool		slotnameChanged;
+	bool		tempSlotChanged = false;
+
+	ProcessConfigFile(PGC_SIGHUP);
+
+	conninfoChanged = strcmp(conninfo, PrimaryConnInfo) != 0;
+	slotnameChanged = strcmp(slotname, PrimarySlotName) != 0;
+
+	/*
+	 * wal_receiver_create_temp_slot is used only when we have no slot
+	 * configured.  We do not need to track this change if it has no effect.
+	 */
+	if (!slotnameChanged && strcmp(PrimarySlotName, "") == 0)
+		tempSlotChanged = tempSlot != wal_receiver_create_temp_slot;
+	pfree(conninfo);
+	pfree(slotname);
+
+	if (conninfoChanged || slotnameChanged || tempSlotChanged)
+		StartupRequestWalReceiverRestart();
+}
+
 /* Handle various signals that might be sent to the startup process */
 void
 HandleStartupProcInterrupts(void)
 {
 	/*
-	 * Check if we were requested to re-read config file.
+	 * Process any requests or signals received recently.
 	 */
 	if (got_SIGHUP)
 	{
 		got_SIGHUP = false;
-		ProcessConfigFile(PGC_SIGHUP);
+		StartupRereadConfig();
 	}
 
 	/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index a1459ca7f6..fad20986db 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -679,7 +679,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 			   walrcv->walRcvState == WALRCV_STOPPING);
 		if (walrcv->walRcvState == WALRCV_RESTARTING)
 		{
-			/* we don't expect primary_conninfo to change */
+			/*
+			 * No need to handle changes in primary_conninfo or
+			 * primary_slotname here. Startup process will signal us in case
+			 * those change.
+			 */
 			*startpoint = walrcv->receiveStart;
 			*startpointTLI = walrcv->receiveStartTLI;
 			walrcv->walRcvState = WALRCV_STREAMING;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 2c3cbbaa68..53665971f5 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2050,7 +2050,7 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 
 	{
-		{"wal_receiver_create_temp_slot", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
 		},
 		&wal_receiver_create_temp_slot,
@@ -3717,7 +3717,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the connection string to be used to connect to the sending server."),
 			NULL,
 			GUC_SUPERUSER_ONLY
@@ -3728,7 +3728,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+		{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Sets the name of the replication slot to use on the sending server."),
 			NULL
 		},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index f2e55d1bd3..f01e43b818 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -309,9 +309,7 @@
 # These settings are ignored on a master server.
 
 #primary_conninfo = ''			# connection string to sending server
-					# (change requires restart)
 #primary_slot_name = ''			# replication slot on sending server
-					# (change requires restart)
 #promote_trigger_file = ''		# file name whose presence ends recovery
 #hot_standby = on			# "off" disallows queries during recovery
 					# (change requires restart)
@@ -323,7 +321,6 @@
 					# -1 allows indefinite delay
 #wal_receiver_create_temp_slot = off	# Create temp slot if primary_slot_name
 					# is not set.
-					# (change requires restart)
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index a4485efc00..2b1b67d35c 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -319,6 +319,7 @@ extern bool CheckPromoteSignal(void);
 extern void WakeupRecovery(void);
 extern void SetWalWriterSleeping(bool sleeping);
 
+extern void StartupRequestWalReceiverRestart(void);
 extern void XLogRequestWalReceiverReply(void);
 
 extern void assign_max_wal_size(int newval, void *extra);
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index d09ebe65a3..52585a1014 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 34;
+use Test::More tests => 35;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
 	"primary_slot_name = $slotname_2");
 $node_standby_2->append_conf('postgresql.conf',
 	"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
 
 # Fetch xmin columns from slot's pg_replication_slots row, after waiting for
 # given boolean condition to be true to ensure we've reached a quiescent state
@@ -345,6 +347,24 @@ is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
 is($catalog_xmin, '',
 	'catalog xmin of cascaded slot still null with hs_feedback reset');
 
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+	"primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+	$node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+	qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
+
 # Test physical slot advancing and its durability.  Create a new slot on
 # the primary, not used by any of the standbys. This reserves WAL at creation.
 my $phys_slot = 'phys_slot';
In reply to: Alvaro Herrera (#71)
Re: allow online change primary_conninfo

Hello

Thank you!

I think I should set aside your new draft for now

I agree, this patch definitely needs a bit more time to review. (currently it applies on top of v13 patch cleanly)

but I think we should still get it in pg13 to avoid having the change the semantics of the
walreceiver temp slot in the next release.

Good point.

regards, Sergei

#73Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Alvaro Herrera (#71)
Re: allow online change primary_conninfo

On 2020-Mar-27, Alvaro Herrera wrote:

On 2020-Mar-27, Alvaro Herrera wrote:

I pushed the wal_receiver_create_temp_slot bugfix, because I realized
after looking for long enough at WalReceiverMain() that the code was
beyond saving. I'll be pushing the rest of this later today.

So here's the next one. I still have to go over the doc changes here,
but at least the tests are passing for me.

Pushed with some documentation tweaks.

Many thanks for working on this!

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

In reply to: Alvaro Herrera (#73)
1 attachment(s)
Re: allow online change primary_conninfo

Hello

Thank you very much!

I attached updated patch: walreceiver will use configured primary_slot_name as temporary slot name if wal_receiver_create_temp_slot is enabled.

regards, Sergei

Attachments:

v14_primary_slot_name_temporary.patchtext/x-diff; name=v14_primary_slot_name_temporary.patchDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 2de21903a1..8983cb5f5e 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4170,8 +4170,8 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
       <listitem>
        <para>
         Specifies whether the WAL receiver process should create a temporary replication
-        slot on the remote instance when no permanent replication slot to use
-        has been configured (using <xref linkend="guc-primary-slot-name"/>).
+        slot on the remote instance. The slot name can be configured
+        (using <xref linkend="guc-primary-slot-name"/>), otherwise it will be generated.
         The default is off.  This parameter can only be set in the 
         <filename>postgresql.conf</filename> file or on the server command line.
         If this parameter is changed while the WAL receiver process is running,
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index fd9ac35dac..134600db7d 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -116,13 +116,8 @@ StartupRereadConfig(void)
 
 	conninfoChanged = strcmp(conninfo, PrimaryConnInfo) != 0;
 	slotnameChanged = strcmp(slotname, PrimarySlotName) != 0;
+	tempSlotChanged = (tempSlot != wal_receiver_create_temp_slot);
 
-	/*
-	 * wal_receiver_create_temp_slot is used only when we have no slot
-	 * configured.  We do not need to track this change if it has no effect.
-	 */
-	if (!slotnameChanged && strcmp(PrimarySlotName, "") == 0)
-		tempSlotChanged = tempSlot != wal_receiver_create_temp_slot;
 	pfree(conninfo);
 	pfree(slotname);
 
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 760e3c7ab0..303e739b5b 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -245,12 +245,6 @@ WalReceiverMain(void)
 	startpoint = walrcv->receiveStart;
 	startpointTLI = walrcv->receiveStartTLI;
 
-	/*
-	 * At most one of is_temp_slot and slotname can be set; otherwise,
-	 * RequestXLogStreaming messed up.
-	 */
-	Assert(!is_temp_slot || (slotname[0] == '\0'));
-
 	/* Initialise to a sanish value */
 	walrcv->lastMsgSendTime =
 		walrcv->lastMsgReceiptTime = walrcv->latestWalEndTime = now;
@@ -365,14 +359,14 @@ WalReceiverMain(void)
 
 		/*
 		 * Create temporary replication slot if requested, and update slot
-		 * name in shared memory.  (Note the slot name cannot already be set
-		 * in this case.)
+		 * name in shared memory.
 		 */
 		if (is_temp_slot)
 		{
-			snprintf(slotname, sizeof(slotname),
-					 "pg_walreceiver_%lld",
-					 (long long int) walrcv_get_backend_pid(wrconn));
+			if (slotname[0] == '\0')
+				snprintf(slotname, sizeof(slotname),
+						"pg_walreceiver_%lld",
+						(long long int) walrcv_get_backend_pid(wrconn));
 
 			walrcv_create_slot(wrconn, slotname, true, 0, NULL);
 
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 21d1823607..28117883e1 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -218,7 +218,7 @@ ShutdownWalRcv(void)
  * "recptr" indicates the position where streaming should begin.  "conninfo"
  * is a libpq connection string to use.  "slotname" is, optionally, the name
  * of a replication slot to acquire.  "create_temp_slot" indicates to create
- * a temporary slot when no "slotname" is given.
+ * a temporary slot.
  *
  * WAL receivers do not directly load GUC parameters used for the connection
  * to the primary, and rely on the values passed down by the caller of this
@@ -254,23 +254,17 @@ RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
 	else
 		walrcv->conninfo[0] = '\0';
 
-	/*
-	 * Use configured replication slot if present, and ignore the value
-	 * of create_temp_slot as the slot name should be persistent.  Otherwise,
-	 * use create_temp_slot to determine whether this WAL receiver should
-	 * create a temporary slot by itself and use it, or not.
-	 */
 	if (slotname != NULL && slotname[0] != '\0')
 	{
 		strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
-		walrcv->is_temp_slot = false;
 	}
 	else
 	{
 		walrcv->slotname[0] = '\0';
-		walrcv->is_temp_slot = create_temp_slot;
 	}
 
+	walrcv->is_temp_slot = create_temp_slot;
+
 	if (walrcv->walRcvState == WALRCV_STOPPED)
 	{
 		launch = true;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 79bc7ac8ca..e702e3eddc 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2051,7 +2051,7 @@ static struct config_bool ConfigureNamesBool[] =
 
 	{
 		{"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY,
-			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
+			gettext_noop("Sets whether a WAL receiver should create a temporary replication slot."),
 		},
 		&wal_receiver_create_temp_slot,
 		false,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index e9f8ca775d..e09be86940 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -319,8 +319,7 @@
 #max_standby_streaming_delay = 30s	# max delay before canceling queries
 					# when reading streaming WAL;
 					# -1 allows indefinite delay
-#wal_receiver_create_temp_slot = off	# Create temp slot if primary_slot_name
-					# is not set.
+#wal_receiver_create_temp_slot = off	# Create temporary replication slot
 #wal_receiver_status_interval = 10s	# send replies at least this often
 					# 0 disables
 #hot_standby_feedback = off		# send info from standby to prevent
#75Peter Eisentraut
peter.eisentraut@2ndquadrant.com
In reply to: Sergei Kornilov (#74)
Re: allow online change primary_conninfo

On 2020-03-28 11:49, Sergei Kornilov wrote:

I attached updated patch: walreceiver will use configured primary_slot_name as temporary slot name if wal_receiver_create_temp_slot is enabled.

The original setup worked consistently with pg_basebackup. In
pg_basebackup, if you specify -S/--slot, then it uses a permanent slot
with the given name. This is the same behavior as primary_slot_name has
now. We should be careful that we don't create two sets of
options/settings that look similar but combine in different ways.

--
Peter Eisentraut http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#76Maxim Orlov
m.orlov@postgrespro.ru
In reply to: Alvaro Herrera (#73)
1 attachment(s)
Re: allow online change primary_conninfo

On 2020-03-28 02:39, Alvaro Herrera wrote:

On 2020-Mar-27, Alvaro Herrera wrote:

On 2020-Mar-27, Alvaro Herrera wrote:

I pushed the wal_receiver_create_temp_slot bugfix, because I realized
after looking for long enough at WalReceiverMain() that the code was
beyond saving. I'll be pushing the rest of this later today.

So here's the next one. I still have to go over the doc changes here,
but at least the tests are passing for me.

Pushed with some documentation tweaks.

Many thanks for working on this!

Nice work! What do you think of possibility to add restore_command?

Is it a good idea to make a restore_command to be reloadable via SUGHUP
too?

On the one hand it looks reasonable since primary_conninfo got such
ability. On the other hand we don't exactly know whether the actual
process after reload config would use "new" command or "old" since it
may take a long time to finish running old command (in case of scp or
smth).

Also setting faulty restore_command lead to server termination, which is
rather unexpectedly.
If anyone makes some minor typo in restore_command, say write
restore_command='cp1 /mnt/srv/%f %p' instead of restore_command='cp
/mnt/srv/%f %p' and do SELECT pg_reload_conf() then server will
terminate.

Do you consider this feature useful? Any suggestions?

---
Best regards,
Maxim Orlov.

Attachments:

restore_command-sighup.patchtext/x-diff; name=restore_command-sighup.patchDownload
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a62d64eaa47..87fd5939246 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3699,7 +3699,7 @@ static struct config_string ConfigureNamesString[] =
 	},
 
 	{
-		{"restore_command", PGC_POSTMASTER, WAL_ARCHIVE_RECOVERY,
+		{"restore_command", PGC_SIGHUP, WAL_ARCHIVE_RECOVERY,
 			gettext_noop("Sets the shell command that will be called to retrieve an archived WAL file."),
 			NULL
 		},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 9cb571f7cc7..9c9091e601e 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -253,7 +253,6 @@
 				# placeholders: %p = path of file to restore
 				#               %f = file name only
 				# e.g. 'cp /mnt/server/archivedir/%f %p'
-				# (change requires restart)
 #archive_cleanup_command = ''	# command to execute at every restartpoint
 #recovery_end_command = ''	# command to execute at completion of recovery
 
In reply to: Maxim Orlov (#76)
Re: allow online change primary_conninfo

Hello

Yep, I think it's useful and I already posted patch in this thread: /messages/by-id/3090621585393698@myt5-1466095fe4e5.qloud-c.yandex.net
Currently without consensus

regards, Sergei