*** a/doc/src/sgml/config.sgml --- b/doc/src/sgml/config.sgml *************** *** 2175,2180 **** SET ENABLE_SEQSCAN TO OFF; --- 2175,2204 ---- + + + wal_receiver_replication_timeout (integer) + + wal_receiver_replication_timeout configuration parameter + + + + Terminate replication connections that are inactive longer + than the specified number of milliseconds. This is useful for + the receiving standby server to detect a primary node crash or network outage. + A value of zero disables the timeout mechanism. This parameter + can only be set in + the postgresql.conf file or on the server command line. + The default value is 60 seconds. + + + To prevent connections from being terminated prematurely, + + must be enabled on the primary, and its value must be less than the + value of wal_receiver_replication_timeout. + + + *************** *** 2397,2402 **** SET ENABLE_SEQSCAN TO OFF; --- 2421,2450 ---- + + + wal_send_status_interval (integer) + + wal_send_status_interval configuration parameter + + + + Specifies the minimum frequency for the WAL sender + process on the primary to send heart-beat message to the standby. + This parameter's value is the maximum interval, in seconds, between heat-beat. + Updates are sent each time it receives response from standby, or at least as + often as specified by this parameter. Setting this parameter to zero + disables status updates completely. This parameter can only be set in + the postgresql.conf file or on the server command line. + The default value is 10 seconds. + + + When is enabled on a receiving server, + wal_send_status_interval must be enabled, and its value + must be less than the value of wal_receiver_replication_timeout. + + + hot_standby_feedback (boolean) *** a/src/backend/replication/walreceiver.c --- b/src/backend/replication/walreceiver.c *************** *** 62,67 **** walrcv_connect_type walrcv_connect = NULL; --- 62,69 ---- walrcv_receive_type walrcv_receive = NULL; walrcv_send_type walrcv_send = NULL; walrcv_disconnect_type walrcv_disconnect = NULL; + int wal_receiver_replication_timeout = 60 * 1000; /* maximum time to receive one + * WAL data message */ #define NAPTIME_PER_CYCLE 100 /* max sleep time between cycles (100ms) */ *************** *** 174,179 **** WalReceiverMain(void) --- 176,184 ---- /* use volatile pointer to prevent code rearrangement */ volatile WalRcvData *walrcv = WalRcv; + TimestampTz last_recv_timestamp; + TimestampTz timeout = 0; + /* * WalRcv should be set up already (if we are a backend, we inherit this * by fork() or EXEC_BACKEND mechanism from the postmaster). *************** *** 282,287 **** WalReceiverMain(void) --- 287,295 ---- MemSet(&reply_message, 0, sizeof(reply_message)); MemSet(&feedback_message, 0, sizeof(feedback_message)); + /* Initialize the last recv timestamp */ + last_recv_timestamp = GetCurrentTimestamp(); + /* Loop until end-of-streaming or error */ for (;;) { *************** *** 316,327 **** WalReceiverMain(void) --- 324,343 ---- /* Wait a while for data to arrive */ if (walrcv_receive(NAPTIME_PER_CYCLE, &type, &buf, &len)) { + /* Something is received from master, so reset last receive time*/ + last_recv_timestamp = GetCurrentTimestamp(); + /* Accept the received data, and process it */ XLogWalRcvProcessMsg(type, buf, len); /* Receive any more data we can without sleeping */ while (walrcv_receive(0, &type, &buf, &len)) + { + /* Something is received from master, so reset last receive time*/ + last_recv_timestamp = GetCurrentTimestamp(); + XLogWalRcvProcessMsg(type, buf, len); + } /* Let the master know that we received some data. */ XLogWalRcvSendReply(); *************** *** 334,339 **** WalReceiverMain(void) --- 350,369 ---- } else { + /* Check if time since last receive from standby has reached the configured limit + * No need to check if it is disabled by giving value as 0*/ + if (wal_receiver_replication_timeout > 0) + { + timeout = TimestampTzPlusMilliseconds(last_recv_timestamp, + wal_receiver_replication_timeout); + + if (GetCurrentTimestamp() >= timeout) + { + ereport(ERROR, + (errmsg("Could not receive any message from WalSender for configured timeout period"))); + } + } + /* * We didn't receive anything new, but send a status update to the * master anyway, to report any progress in applying WAL. *** a/src/backend/replication/walsender.c --- b/src/backend/replication/walsender.c *************** *** 82,87 **** bool am_cascading_walsender = false; /* Am I cascading WAL to --- 82,89 ---- int max_wal_senders = 0; /* the maximum number of concurrent walsenders */ int replication_timeout = 60 * 1000; /* maximum time to send one * WAL data message */ + int wal_send_status_interval = 10 * 1000; /* send replies at least this often to standby */ + /* * State for WalSndWakeupRequest */ *************** *** 832,843 **** WalSndLoop(void) long sleeptime = 10000; /* 10 s */ int wakeEvents; wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE | WL_TIMEOUT; if (pq_is_send_pending()) wakeEvents |= WL_SOCKET_WRITEABLE; ! else if (MyWalSnd->sendKeepalive) { WalSndKeepalive(output_message); /* Try to flush pending output to the client */ --- 834,855 ---- long sleeptime = 10000; /* 10 s */ int wakeEvents; + /* sleeptime should be equal to wal send interval if it is greater than zero*/ + if (wal_send_status_interval > 0) + { + sleeptime = wal_send_status_interval*1000; + } + wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE | WL_TIMEOUT; + /* + * send keepalive message if sendkeepalive is enabled or WAL send status + * interval is greater than zero. + */ if (pq_is_send_pending()) wakeEvents |= WL_SOCKET_WRITEABLE; ! else if (MyWalSnd->sendKeepalive || (wal_send_status_interval > 0)) { WalSndKeepalive(output_message); /* Try to flush pending output to the client */ *************** *** 850,856 **** WalSndLoop(void) { timeout = TimestampTzPlusMilliseconds(last_reply_timestamp, replication_timeout); ! sleeptime = 1 + (replication_timeout / 10); } /* Sleep until something happens or replication timeout */ --- 862,871 ---- { timeout = TimestampTzPlusMilliseconds(last_reply_timestamp, replication_timeout); ! if (wal_send_status_interval <= 0) ! { ! sleeptime = 1 + (replication_timeout / 10); ! } } /* Sleep until something happens or replication timeout */ *** a/src/backend/utils/misc/guc.c --- b/src/backend/utils/misc/guc.c *************** *** 1596,1601 **** static struct config_int ConfigureNamesInt[] = --- 1596,1612 ---- }, { + {"wal_receiver_replication_timeout", PGC_SIGHUP, REPLICATION_STANDBY, + gettext_noop("Sets the maximum wait time to receive data from master."), + NULL, + GUC_UNIT_MS + }, + &wal_receiver_replication_timeout, + 60 * 1000, 0, INT_MAX, + NULL, NULL, NULL + }, + + { {"max_connections", PGC_POSTMASTER, CONN_AUTH_SETTINGS, gettext_noop("Sets the maximum number of concurrent connections."), NULL *************** *** 2030,2035 **** static struct config_int ConfigureNamesInt[] = --- 2041,2057 ---- }, { + {"wal_send_status_interval", PGC_SIGHUP, REPLICATION_SENDING, + gettext_noop("Sets the maximum interval between WAL sender reports to the standby."), + NULL, + GUC_UNIT_S + }, + &wal_send_status_interval, + 10, 0, INT_MAX / 1000, + NULL, NULL, NULL + }, + + { {"commit_delay", PGC_USERSET, WAL_SETTINGS, gettext_noop("Sets the delay in microseconds between transaction commit and " "flushing WAL to disk."), *************** *** 2381,2387 **** static struct config_int ConfigureNamesInt[] = 1024, 100, 102400, NULL, NULL, NULL }, ! /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL --- 2403,2409 ---- 1024, 100, 102400, NULL, NULL, NULL }, ! /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL *** a/src/backend/utils/misc/postgresql.conf.sample --- b/src/backend/utils/misc/postgresql.conf.sample *************** *** 220,225 **** --- 220,227 ---- # comma-separated list of application_name # from standby(s); '*' = all #vacuum_defer_cleanup_age = 0 # number of xacts by which cleanup is delayed + #wal_send_status_interval = 10s # send replies at least this often to standby + # in seconds; 0 disables # - Standby Servers - *************** *** 234,242 **** # when reading streaming WAL; # -1 allows indefinite delay #wal_receiver_status_interval = 10s # send replies at least this often ! # 0 disables #hot_standby_feedback = off # send info from standby to prevent # query conflicts #------------------------------------------------------------------------------ --- 236,246 ---- # when reading streaming WAL; # -1 allows indefinite delay #wal_receiver_status_interval = 10s # send replies at least this often ! # in seconds; 0 disables #hot_standby_feedback = off # send info from standby to prevent # query conflicts + #wal_receiver_replication_timeout = 60s # in milliseconds; 0 disables; time + # till receiver waits for communication from master. #------------------------------------------------------------------------------ *** a/src/include/replication/walreceiver.h --- b/src/include/replication/walreceiver.h *************** *** 19,24 **** --- 19,25 ---- extern int wal_receiver_status_interval; extern bool hot_standby_feedback; + extern int wal_receiver_replication_timeout; /* * MAXCONNINFO: maximum size of a connection string. *** a/src/include/replication/walsender.h --- b/src/include/replication/walsender.h *************** *** 26,31 **** extern bool wake_wal_senders; --- 26,33 ---- /* user-settable parameters */ extern int max_wal_senders; extern int replication_timeout; + extern int wal_send_status_interval; + extern void WalSenderMain(void) __attribute__((noreturn)); extern void WalSndSignals(void);