diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index fc63172efd..45f021fe9a 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4815,6 +4815,35 @@ ANY num_sync ( + standby_can_hibernate (integer) + + standby_can_hibernate configuration parameter + + + + + Specifies whether standby workers will hibernate if there is no incoming + replication traffic and all changes have been applied. Hibernation will + reduce power consumption when idle, though workers will wake immediately when + new replication traffic arrives. + + + When hibernating, the startup process will take longer to notice that the + standby has been promoted, which could take up to 60s, rather than the + normal recheck cycle time of 5s. This will increase the failover time + in a high availability cluster configuration and also increase the + time required to restart the wal receiver should it be disconnected. + + + When hibernating, the wal receiver process will sleep for + wal_receiver_timeout / 2, rather than the normal cycle time of 100ms. + You may wish to increase the value of wal_receiver_timeout to + 120s or more to improve hibernation. + + + + @@ -4884,6 +4913,32 @@ ANY num_sync ( + subscription_can_hibernate (integer) + + subscription_can_hibernate configuration parameter + + + + + Specifies whether subscription processes will hibernate if there is no incoming + replication traffic and all changes have been applied. Hibernation will + reduce power consumption when idle, though workers will wake immediately when + new replication traffic arrives. + + + When hibernating, the logical launcher process will take longer to notice + that subscription workers have disconnected before it restarts them. + + + When hibernating, the logical worker process will sleep for + wal_receiver_timeout / 2, rather than the normal cycle time of 100ms. + You may wish to increase the value of wal_receiver_timeout to + 120s or more to improve hibernation. + + + + diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 958220c495..bd471504b7 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -12829,6 +12829,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, case XLOG_FROM_STREAM: { bool havedata; + DECLARE_HIBERNATE_VARS(); /* * We should be able to move to XLOG_FROM_STREAM only in @@ -13013,14 +13014,18 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, } /* - * Wait for more WAL to arrive. Time out after 5 seconds + * Wait for more WAL to arrive. Time out allows startup * to react to a trigger file promptly and to check if the - * WAL receiver is still active. + * WAL receiver is still active. Hence, hibernation must + * be explicitly approved. */ + SET_DELAY_OR_HIBERNATE(havedata || !standby_can_hibernate, + wal_retrieve_retry_interval); (void) WaitLatch(&XLogCtl->recoveryWakeupLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, - 5000L, WAIT_EVENT_RECOVERY_WAL_STREAM); + cur_timeout, + WAIT_EVENT_RECOVERY_WAL_STREAM); ResetLatch(&XLogCtl->recoveryWakeupLatch); break; } diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 681ef91b81..942cc4a07f 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -134,7 +134,6 @@ int Log_autovacuum_min_duration = 600000; /* the minimum allowed time between two awakenings of the launcher */ #define MIN_AUTOVAC_SLEEPTIME 100.0 /* milliseconds */ -#define MAX_AUTOVAC_SLEEPTIME 300 /* seconds */ /* Flags to tell if we are in an autovacuum process */ static bool am_autovacuum_launcher = false; @@ -932,8 +931,8 @@ launcher_determine_sleep(bool canlaunch, bool recursing, struct timeval *nap) * infinite sleep in strange cases like the system clock going backwards a * few years. */ - if (nap->tv_sec > MAX_AUTOVAC_SLEEPTIME) - nap->tv_sec = MAX_AUTOVAC_SLEEPTIME; + if (nap->tv_sec > HIBERNATE_DELAY_SEC) + nap->tv_sec = HIBERNATE_DELAY_SEC; } /* diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index d1f5d12eff..9566fd2b3d 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -60,12 +60,6 @@ */ int BgWriterDelay = 200; -/* - * Multiplier to apply to BgWriterDelay when we decide to hibernate. - * (Perhaps this needs to be configurable?) - */ -#define HIBERNATE_FACTOR 50 - /* * Interval in which standby snapshots are logged into the WAL stream, in * milliseconds. @@ -337,7 +331,7 @@ BackgroundWriterMain(void) /* Sleep ... */ (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, - BgWriterDelay * HIBERNATE_FACTOR, + HIBERNATE_DELAY_MS, WAIT_EVENT_BGWRITER_HIBERNATE); /* Reset the notification request in case we timed out */ StrategyNotifyBgWriter(-1); diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index d916ed39a8..156a4b6404 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -53,8 +53,6 @@ * Timer definitions. * ---------- */ -#define PGARCH_AUTOWAKE_INTERVAL 60 /* How often to force a poll of the - * archive status directory; in seconds. */ #define PGARCH_RESTART_INTERVAL 10 /* How often to attempt to restart a * failed archiver; in seconds. */ @@ -346,7 +344,7 @@ pgarch_MainLoop(void) /* * Sleep until a signal is received, or until a poll is forced by - * PGARCH_AUTOWAKE_INTERVAL having passed since last_copy_time, or + * HIBERNATE_DELAY_SEC having passed since last_copy_time, or * until postmaster dies. */ if (!time_to_stop) /* Don't wait during last iteration */ @@ -354,7 +352,7 @@ pgarch_MainLoop(void) pg_time_t curtime = (pg_time_t) time(NULL); int timeout; - timeout = PGARCH_AUTOWAKE_INTERVAL - (curtime - last_copy_time); + timeout = HIBERNATE_DELAY_SEC - (curtime - last_copy_time); if (timeout > 0) { int rc; diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index ce90877154..6de5cb3a9f 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -1636,7 +1636,7 @@ DetermineSleepTime(struct timeval *timeout) } else { - timeout->tv_sec = 60; + timeout->tv_sec = HIBERNATE_DELAY_SEC; timeout->tv_usec = 0; } return; @@ -1694,15 +1694,15 @@ DetermineSleepTime(struct timeval *timeout) timeout->tv_usec = microsecs; /* Ensure we don't exceed one minute */ - if (timeout->tv_sec > 60) + if (timeout->tv_sec > HIBERNATE_DELAY_SEC) { - timeout->tv_sec = 60; + timeout->tv_sec = HIBERNATE_DELAY_SEC; timeout->tv_usec = 0; } } else { - timeout->tv_sec = 60; + timeout->tv_sec = HIBERNATE_DELAY_SEC; timeout->tv_usec = 0; } } diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c index 102fa2a089..7f0b74d0de 100644 --- a/src/backend/postmaster/walwriter.c +++ b/src/backend/postmaster/walwriter.c @@ -70,13 +70,7 @@ int WalWriterDelay = 200; int WalWriterFlushAfter = 128; -/* - * Number of do-nothing loops before lengthening the delay time, and the - * multiplier to apply to WalWriterDelay when we do decide to hibernate. - * (Perhaps these need to be configurable?) - */ -#define LOOPS_UNTIL_HIBERNATE 50 -#define HIBERNATE_FACTOR 25 +DECLARE_HIBERNATE_VARS(); /* Prototypes for private functions */ static void HandleWalWriterInterrupts(void); @@ -210,7 +204,7 @@ WalWriterMain(void) /* * Reset hibernation state after any error. */ - left_till_hibernate = LOOPS_UNTIL_HIBERNATE; + RESET_TO_NON_HIBERNATE(); hibernating = false; SetWalWriterSleeping(false); @@ -225,8 +219,6 @@ WalWriterMain(void) */ for (;;) { - long cur_timeout; - /* * Advertise whether we might hibernate in this cycle. We do this * before resetting the latch to ensure that any async commits will @@ -252,24 +244,11 @@ WalWriterMain(void) * Do what we're here for; then, if XLogBackgroundFlush() found useful * work to do, reset hibernation counter. */ - if (XLogBackgroundFlush()) - left_till_hibernate = LOOPS_UNTIL_HIBERNATE; - else if (left_till_hibernate > 0) - left_till_hibernate--; + SET_DELAY_OR_HIBERNATE(XLogBackgroundFlush(), WalWriterDelay); /* Send WAL statistics to the stats collector */ pgstat_send_wal(false); - /* - * Sleep until we are signaled or WalWriterDelay has elapsed. If we - * haven't done anything useful for quite some time, lengthen the - * sleep time so as to reduce the server's idle power consumption. - */ - if (left_till_hibernate > 0) - cur_timeout = WalWriterDelay; /* in ms */ - else - cur_timeout = WalWriterDelay * HIBERNATE_FACTOR; - (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, cur_timeout, diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 7b473903a6..d89e084352 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -48,11 +48,9 @@ #include "utils/snapmgr.h" #include "utils/timeout.h" -/* max sleep time between cycles (3min) */ -#define DEFAULT_NAPTIME_PER_CYCLE 180000L - int max_logical_replication_workers = 4; int max_sync_workers_per_subscription = 2; +bool subscription_can_hibernate = false; LogicalRepWorker *MyLogicalRepWorker = NULL; @@ -802,6 +800,7 @@ void ApplyLauncherMain(Datum main_arg) { TimestampTz last_start_time = 0; + DECLARE_HIBERNATE_VARS(); ereport(DEBUG1, (errmsg_internal("logical replication launcher started"))); @@ -831,11 +830,12 @@ ApplyLauncherMain(Datum main_arg) MemoryContext subctx; MemoryContext oldctx; TimestampTz now; - long wait_time = DEFAULT_NAPTIME_PER_CYCLE; + bool work_done; CHECK_FOR_INTERRUPTS(); now = GetCurrentTimestamp(); + work_done = false; /* Limit the start retry to once a wal_retrieve_retry_interval */ if (TimestampDifferenceExceeds(last_start_time, now, @@ -866,7 +866,7 @@ ApplyLauncherMain(Datum main_arg) if (w == NULL) { last_start_time = now; - wait_time = wal_retrieve_retry_interval; + work_done = true; logicalrep_worker_launch(sub->dbid, sub->oid, sub->name, sub->owner, InvalidOid); @@ -886,13 +886,16 @@ ApplyLauncherMain(Datum main_arg) * usually means crash of the worker, so we should retry in * wal_retrieve_retry_interval again. */ - wait_time = wal_retrieve_retry_interval; + work_done = true; } + SET_DELAY_OR_HIBERNATE(work_done || !subscription_can_hibernate, + wal_retrieve_retry_interval); + /* Wait for more work. */ rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, - wait_time, + cur_timeout, WAIT_EVENT_LOGICAL_LAUNCHER_MAIN); if (rc & WL_LATCH_SET) diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index d77bb32bb9..e8d91d541f 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -194,8 +194,6 @@ #include "utils/syscache.h" #include "utils/timeout.h" -#define NAPTIME_PER_CYCLE 1000 /* max sleep time between cycles (1s) */ - typedef struct FlushPosition { dlist_node node; @@ -2566,6 +2564,7 @@ LogicalRepApplyLoop(XLogRecPtr last_received) bool ping_sent = false; TimeLineID tli; ErrorContextCallback errcallback; + DECLARE_HIBERNATE_VARS(); /* * Init the ApplyMessageContext which we clean up after each replication @@ -2602,7 +2601,6 @@ LogicalRepApplyLoop(XLogRecPtr last_received) int len; char *buf = NULL; bool endofstream = false; - long wait_time; CHECK_FOR_INTERRUPTS(); @@ -2636,6 +2634,7 @@ LogicalRepApplyLoop(XLogRecPtr last_received) /* Reset timeout. */ last_recv_timestamp = GetCurrentTimestamp(); ping_sent = false; + RESET_TO_NON_HIBERNATE(); /* Ensure we are reading the data into our memory context. */ MemoryContextSwitchTo(ApplyMessageContext); @@ -2724,15 +2723,15 @@ LogicalRepApplyLoop(XLogRecPtr last_received) * no particular urgency about waking up unless we get data or a * signal. */ - if (!dlist_is_empty(&lsn_mapping)) - wait_time = WalWriterDelay; - else - wait_time = NAPTIME_PER_CYCLE; + SET_DELAY_OR_HIBERNATE_OPT(!dlist_is_empty(&lsn_mapping) || !subscription_can_hibernate, + WalWriterDelay, + wal_receiver_timeout / 2); rc = WaitLatchOrSocket(MyLatch, WL_SOCKET_READABLE | WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, - fd, wait_time, + fd, + cur_timeout, WAIT_EVENT_LOGICAL_APPLY_MAIN); if (rc & WL_LATCH_SET) diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index b39fce8c23..d028359684 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -89,12 +89,13 @@ int wal_receiver_status_interval; int wal_receiver_timeout; bool hot_standby_feedback; +bool standby_can_hibernate = false; /* libpqwalreceiver connection */ static WalReceiverConn *wrconn = NULL; WalReceiverFunctionsType *WalReceiverFunctions = NULL; -#define NAPTIME_PER_CYCLE 100 /* max sleep time between cycles (100ms) */ +#define WALRECEIVER_WAIT_PER_CYCLE 100 /* normal sleep time between cycles (100ms) */ /* * These variables are used similarly to openLogFile/SegNo, @@ -184,6 +185,7 @@ WalReceiverMain(void) char *err; char *sender_host = NULL; int sender_port = 0; + DECLARE_HIBERNATE_VARS(); /* * WalRcv should be set up already (if we are a backend, we inherit this @@ -416,6 +418,7 @@ WalReceiverMain(void) /* Initialize the last recv timestamp */ last_recv_timestamp = GetCurrentTimestamp(); ping_sent = false; + RESET_TO_NON_HIBERNATE(); /* Loop until end-of-streaming or error */ for (;;) @@ -465,6 +468,7 @@ WalReceiverMain(void) ping_sent = false; XLogWalRcvProcessMsg(buf[0], &buf[1], len - 1, startpointTLI); + RESET_TO_NON_HIBERNATE(); } else if (len == 0) break; @@ -496,6 +500,11 @@ WalReceiverMain(void) if (endofwal) break; + /* If we get here, we're idle so hibernate, if allowed */ + SET_DELAY_OR_HIBERNATE_OPT(false || !standby_can_hibernate, + WALRECEIVER_WAIT_PER_CYCLE, + wal_receiver_timeout / 2); + /* * Ideally we would reuse a WaitEventSet object repeatedly * here to avoid the overheads of WaitLatchOrSocket on epoll @@ -512,7 +521,7 @@ WalReceiverMain(void) WL_EXIT_ON_PM_DEATH | WL_SOCKET_READABLE | WL_TIMEOUT | WL_LATCH_SET, wait_fd, - NAPTIME_PER_CYCLE, + cur_timeout, WAIT_EVENT_WAL_RECEIVER_MAIN); if (rc & WL_LATCH_SET) { diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index f505413a7f..98ddcbd9bb 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1405,6 +1405,24 @@ static struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { + {"standby_can_hibernate", PGC_SIGHUP, REPLICATION_STANDBY, + gettext_noop("Allow standby workers to hibernate if idle for long periods."), + NULL + }, + &standby_can_hibernate, + false, + NULL, NULL, NULL + }, + { + {"subscription_can_hibernate", PGC_SIGHUP, REPLICATION_SUBSCRIBERS, + gettext_noop("Allow subscription procs to hibernate if idle for long periods."), + NULL + }, + &subscription_can_hibernate, + false, + NULL, NULL, NULL + }, { {"restart_after_crash", PGC_SIGHUP, ERROR_HANDLING_OPTIONS, gettext_noop("Reinitialize server after backend crash."), diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h index 92f73a55b8..f5eca294f5 100644 --- a/src/include/replication/walreceiver.h +++ b/src/include/replication/walreceiver.h @@ -28,6 +28,8 @@ extern int wal_receiver_status_interval; extern int wal_receiver_timeout; extern bool hot_standby_feedback; +extern bool standby_can_hibernate; +extern bool subscription_can_hibernate; /* * MAXCONNINFO: maximum size of a connection string. diff --git a/src/include/storage/latch.h b/src/include/storage/latch.h index 3aa7b33834..18a8522376 100644 --- a/src/include/storage/latch.h +++ b/src/include/storage/latch.h @@ -83,6 +83,30 @@ * only, so using any latch other than the process latch effectively precludes * use of any generic handler. * + * Since not all servers have a 24/7 duty cycle, if a worker has no current + * work then it can be enhanced to eventually hibernate. The goal here is + * to reduce the CPU wakeups and thus reduce electrical power consumption, + * making a small contribution to reducing global warming. To support + * hibernation, we add some additional macros to judge when, and if, to + * switch from a normal loop wait to hibernate timeout (default 60s). + * Using these macros helps have a common design pattern for hibernation. + * 1. Add in DECLARE_HIBERNATE_VARS(); to add private vars to this module + * 2. Add SET_DELAY_OR_HIBERNATE() to eventually hibernate if idle + * 3. Use the variable "curr_timeout" in WaitLatch() + * Hibernation for this worker is independent of any other worker. + * + * #define normal_wait_timeout = 100L; + * DECLARE_HIBERNATE_VARS(); + * for (;;) + * { + * if (work to do) + * Do Stuff(); // in particular, exit loop if some condition satisfied + * SET_DELAY_OR_HIBERNATE(work_to_do, normal_wait_timeout); + * WaitLatch(cur_timeout); + * ResetLatch(); + * } + * A working example is shown in src/test/modules/worker_spi/worker_spi.c + * as well as in code for workers in src/backend/postmaster et al. * * WaitEventSets allow to wait for latches being set and additional events - * postmaster dying and socket readiness of several sockets currently - at the @@ -139,6 +163,70 @@ typedef struct Latch WL_SOCKET_WRITEABLE | \ WL_SOCKET_CONNECTED) +/* + * Common design pattern for Hibernation. + * + * Sleep until we are signaled or normal_delay has elapsed. If we + * haven't done any work for quite some time, lengthen the sleep + * time so as to reduce the server's idle power consumption. + * + * Avoids use static vars to allow each process to + * have its own private, independent counters. + */ + +#define HIBERNATE_DELAY_SEC 60 +#define HIBERNATE_DELAY_MS (1000L * HIBERNATE_DELAY_SEC) +#define LOOPS_UNTIL_HIBERNATE 50 + +#define SET_DELAY_OR_HIBERNATE(work_done, normal_delay) \ + if (work_done) \ + { \ + hibernate_logged = false; \ + left_till_hibernate = LOOPS_UNTIL_HIBERNATE;\ + } \ + else if (left_till_hibernate > 0) \ + left_till_hibernate--; \ + if (left_till_hibernate > 0) \ + cur_timeout = normal_delay; \ + else \ + { \ + cur_timeout = HIBERNATE_DELAY_MS; \ + if (!hibernate_logged) \ + elog(DEBUG3, "hibernating for %ld", \ + cur_timeout); \ + hibernate_logged = true; \ + } + +#define SET_DELAY_OR_HIBERNATE_OPT(work_done, normal_delay, hib_delay) \ + if (work_done) \ + { \ + hibernate_logged = false; \ + left_till_hibernate = LOOPS_UNTIL_HIBERNATE;\ + } \ + else if (left_till_hibernate > 0) \ + left_till_hibernate--; \ + if (left_till_hibernate > 0) \ + cur_timeout = normal_delay; \ + else \ + { \ + cur_timeout = hib_delay; \ + if (!hibernate_logged) \ + elog(DEBUG3, "hibernating for %ld", \ + cur_timeout); \ + hibernate_logged = true; \ + } + +#define AM_HIBERNATING() (left_till_hibernate == 0) + +#define DECLARE_HIBERNATE_VARS() \ +bool hibernate_logged = false; \ +int left_till_hibernate = LOOPS_UNTIL_HIBERNATE;\ +long cur_timeout; + +#define RESET_TO_NON_HIBERNATE() \ + left_till_hibernate = LOOPS_UNTIL_HIBERNATE; + + typedef struct WaitEvent { int pos; /* position in the event data structure */ diff --git a/src/test/modules/worker_spi/worker_spi.c b/src/test/modules/worker_spi/worker_spi.c index 05ced63780..1d05634b86 100644 --- a/src/test/modules/worker_spi/worker_spi.c +++ b/src/test/modules/worker_spi/worker_spi.c @@ -46,6 +46,8 @@ PG_MODULE_MAGIC; PG_FUNCTION_INFO_V1(worker_spi_launch); +DECLARE_HIBERNATE_VARS(); + void _PG_init(void); void worker_spi_main(Datum) pg_attribute_noreturn(); @@ -137,6 +139,7 @@ worker_spi_main(Datum main_arg) worktable *table; StringInfoData buf; char name[20]; + bool work_done = true; table = palloc(sizeof(worktable)); sprintf(name, "schema%d", index); @@ -191,6 +194,13 @@ worker_spi_main(Datum main_arg) { int ret; + /* + * Use the standard design pattern for wait time/hibernation. + * After 50 consecutive loops with work_done=true the wait time + * will be set to the standard hibernation timeout of 60s. + */ + SET_DELAY_OR_HIBERNATE(work_done, worker_spi_naptime * 1000L); + /* * Background workers mustn't call usleep() or any direct equivalent: * instead, they may wait on their process latch, which sleeps as @@ -199,7 +209,7 @@ worker_spi_main(Datum main_arg) */ (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, - worker_spi_naptime * 1000L, + cur_timeout, PG_WAIT_EXTENSION); ResetLatch(MyLatch); @@ -256,7 +266,10 @@ worker_spi_main(Datum main_arg) elog(LOG, "%s: count in %s.%s is now %d", MyBgworkerEntry->bgw_name, table->schema, table->name, val); + work_done = true; } + else + work_done = false; /* * And finish our transaction.