From a6950d0b5592444b33035f03ec431efbd0d1a8d5 Mon Sep 17 00:00:00 2001 From: Anthony Hsu Date: Thu, 26 Jun 2025 22:58:59 -0700 Subject: [PATCH v2] Always enable standby timeout in ResolveRecoveryConflictWithBufferPin to ensure standby limit is honored. --- src/backend/storage/buffer/bufmgr.c | 7 +++ src/backend/storage/ipc/standby.c | 68 ++++++++++++++++++----------- src/include/storage/standby.h | 8 ++++ 3 files changed, 57 insertions(+), 26 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 667aa0c0c78..6fb0f491633 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -5721,6 +5721,13 @@ LockBufferForCleanup(Buffer buffer) /* Successfully acquired exclusive lock with pincount 1 */ UnlockBufHdr(bufHdr, buf_state); + /* + * Reset sleep time after sending + * PROCSIG_RECOVERY_CONFLICT_BUFFERPIN for the next buffer pin + * conflict. + */ + bufferpin_wait_us = BUFFERPIN_INITIAL_WAIT_US; + /* * Emit the log message if recovery conflict on buffer pin was * resolved but the startup process waited longer than diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 7fa8d9247e0..778c269b13c 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -41,6 +41,12 @@ int max_standby_archive_delay = 30 * 1000; int max_standby_streaming_delay = 30 * 1000; bool log_recovery_conflict_waits = false; +/* + * Sleep time after sending PROCSIG_RECOVERY_CONFLICT_BUFFERPIN. Reset in + * LockBufferForCleanup after successfully acquiring cleanup lock. + */ +int bufferpin_wait_us = BUFFERPIN_INITIAL_WAIT_US; + /* * Keep track of all the exclusive locks owned by original transactions. * For each known exclusive lock, there is a RecoveryLockEntry in the @@ -769,8 +775,8 @@ cleanup: * The ProcWaitForSignal() sleep normally done in LockBufferForCleanup() * (when not InHotStandby) is performed here, for code clarity. * - * We either resolve conflicts immediately or set a timeout to wake us at - * the limit of our patience. + * We set a timeout to wake us at the limit of our patience. This will trigger + * immediately if the deadline has already passed. * * Resolve conflicts by sending a PROCSIG signal to all backends to check if * they hold one of the buffer pins that is blocking Startup process. If so, @@ -798,39 +804,32 @@ ResolveRecoveryConflictWithBufferPin(void) ltime = GetStandbyLimitTime(); - if (GetCurrentTimestamp() >= ltime && ltime != 0) + /* + * Wake up at ltime, and check for deadlocks as well if we will be + * waiting longer than deadlock_timeout + */ + EnableTimeoutParams timeouts[2]; + int cnt = 0; + + if (ltime != 0) { - /* - * We're already behind, so clear a path as quickly as possible. - */ - SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + timeouts[cnt].id = STANDBY_TIMEOUT; + timeouts[cnt].type = TMPARAM_AT; + timeouts[cnt].fin_time = ltime; + cnt++; } - else - { - /* - * Wake up at ltime, and check for deadlocks as well if we will be - * waiting longer than deadlock_timeout - */ - EnableTimeoutParams timeouts[2]; - int cnt = 0; - - if (ltime != 0) - { - timeouts[cnt].id = STANDBY_TIMEOUT; - timeouts[cnt].type = TMPARAM_AT; - timeouts[cnt].fin_time = ltime; - cnt++; - } + if (ltime == 0 || GetCurrentTimestamp() < ltime) + { got_standby_deadlock_timeout = false; timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT; timeouts[cnt].type = TMPARAM_AFTER; timeouts[cnt].delay_ms = DeadlockTimeout; cnt++; - - enable_timeouts(timeouts, cnt); } + enable_timeouts(timeouts, cnt); + /* * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted * by one of the timeouts established above. @@ -843,7 +842,24 @@ ResolveRecoveryConflictWithBufferPin(void) ProcWaitForSignal(WAIT_EVENT_BUFFER_PIN); if (got_standby_delay_timeout) + { SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + + /* + * Sleep a bit to avoid busy-waiting and repeatedly sending + * PROCSIG_RECOVERY_CONFLICT_BUFFERPIN. + */ + pgstat_report_wait_start(WAIT_EVENT_BUFFER_PIN); + pg_usleep(bufferpin_wait_us); + pgstat_report_wait_end(); + + /* + * Progressively increase the sleep time but to no more than 1 second. + */ + bufferpin_wait_us *= 2; + if (bufferpin_wait_us > 1000000) + bufferpin_wait_us = 1000000; + } else if (got_standby_deadlock_timeout) { /* @@ -857,7 +873,7 @@ ResolveRecoveryConflictWithBufferPin(void) * buffer is unpinned or ltime is reached. This would increase the * workload in the startup process and backends. In practice it may * not be so harmful because the period that the buffer is kept pinned - * is basically no so long. But we should fix this? + * is basically not so long. But we should fix this? */ SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); } diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index 24e2f5082bc..a3691a1cac7 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -25,6 +25,14 @@ extern PGDLLIMPORT int max_standby_archive_delay; extern PGDLLIMPORT int max_standby_streaming_delay; extern PGDLLIMPORT bool log_recovery_conflict_waits; +/* + * After sending a PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, we sleep for this long + * to give backends a chance to cancel before checking again if we can acquire + * the cleanup lock. + */ +#define BUFFERPIN_INITIAL_WAIT_US 1000; +extern int bufferpin_wait_us; + extern void InitRecoveryTransactionEnvironment(void); extern void ShutdownRecoveryTransactionEnvironment(void); -- 2.50.0.727.gbf7dc18ff4-goog