From d49274218577f1687b7e26ed145b60f6b0fe3859 Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Mon, 2 Feb 2026 11:12:04 +0800 Subject: [PATCH v11] Improve read_local_xlog_page_guts by replacing polling with latch-based waiting. Replace inefficient polling loops in read_local_xlog_page_guts with facilities developed in xlogwait module when WAL data is not yet available. This eliminates CPU-intensive busy waiting and improves responsiveness by waking processes immediately when their target LSN becomes available. --- src/backend/access/transam/xlogutils.c | 47 ++++++++++++++++++++++---- src/backend/replication/walsender.c | 4 --- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 5fbe39133b8..4303e78280f 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -23,6 +23,7 @@ #include "access/xlogrecovery.h" #include "access/xlog_internal.h" #include "access/xlogutils.h" +#include "access/xlogwait.h" #include "miscadmin.h" #include "storage/fd.h" #include "storage/smgr.h" @@ -880,12 +881,7 @@ read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr, loc = targetPagePtr + reqLen; /* - * Loop waiting for xlog to be available if necessary - * - * TODO: The walsender has its own version of this function, which uses a - * condition variable to wake up whenever WAL is flushed. We could use the - * same infrastructure here, instead of the check/sleep/repeat style of - * loop. + * Waiting for xlog to be available if necessary. */ while (1) { @@ -947,7 +943,44 @@ read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr, } CHECK_FOR_INTERRUPTS(); - pg_usleep(1000L); + + /* + * Wait for LSN using appropriate method based on server state. + */ + if (!RecoveryInProgress()) + { + /* Primary: wait for flush */ + WaitForLSN(WAIT_LSN_TYPE_PRIMARY_FLUSH, loc, -1); + } + else + { + /* Standby: wait for replay */ + WaitLSNResult result = WaitForLSN(WAIT_LSN_TYPE_STANDBY_REPLAY, loc, -1); + + switch (result) + { + case WAIT_LSN_RESULT_SUCCESS: + /* LSN was replayed, loop back to recheck timeline */ + break; + + case WAIT_LSN_RESULT_NOT_IN_RECOVERY: + + /* + * Promoted while waiting. This is the tricky case. + * We're now a primary, so loop back and use flush + * logic instead of replay logic. + */ + break; + + default: + elog(ERROR, "unexpected wait result"); + } + } + + /* + * Loop back to recheck everything. Timeline might have changed + * during our wait. + */ } else { diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index a0e6a3d200c..105d826c693 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -1035,10 +1035,6 @@ StartReplication(StartReplicationCmd *cmd) /* * XLogReaderRoutine->page_read callback for logical decoding contexts, as a * walsender process. - * - * Inside the walsender we can do better than read_local_xlog_page, - * which has to do a plain sleep/busy loop, because the walsender's latch gets - * set every time WAL is flushed. */ static int logical_read_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, -- 2.51.0