From 336d5b671157f974cceef15385e394ca27dd58f2 Mon Sep 17 00:00:00 2001 From: Ayush Tiwari Date: Mon, 20 Apr 2026 00:52:00 +0530 Subject: [BUG] Fix race in online checksums launcher_exit() When pg_enable_data_checksums() is called twice before the first launcher starts, two launcher processes are registered. The second (redundant) launcher exits early after seeing launcher_running is already set, but its launcher_exit() callback unconditionally clears the shared DataChecksumState->launcher_running flag and may call SetDataChecksumsOff(). This allows a third launcher to start concurrently with the first, and can silently revert the cluster checksum state to off while the first launcher is still working. Fix by returning early from launcher_exit() when the process-local launcher_running flag is false, indicating this process never claimed the launcher role. --- src/backend/postmaster/datachecksum_state.c | 25 +++++++++++++-------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/backend/postmaster/datachecksum_state.c b/src/backend/postmaster/datachecksum_state.c index 18797a8ee3d..76f5aa00f2b 100644 --- a/src/backend/postmaster/datachecksum_state.c +++ b/src/backend/postmaster/datachecksum_state.c @@ -887,17 +887,24 @@ launcher_exit(int code, Datum arg) { abort_requested = false; - if (launcher_running) + /* + * Only perform cleanup if we actually claimed the launcher role by + * setting the shared launcher_running flag. A redundant launcher that + * found another launcher already running will have exited early without + * setting the local launcher_running flag, and must not touch the shared + * state owned by the active launcher. + */ + if (!launcher_running) + return; + + LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE); + if (DataChecksumState->worker_pid != InvalidPid) { - LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE); - if (DataChecksumState->worker_pid != InvalidPid) - { - ereport(LOG, - errmsg("data checksums launcher exiting while worker is still running, signalling worker")); - kill(DataChecksumState->worker_pid, SIGTERM); - } - LWLockRelease(DataChecksumsWorkerLock); + ereport(LOG, + errmsg("data checksums launcher exiting while worker is still running, signalling worker")); + kill(DataChecksumState->worker_pid, SIGTERM); } + LWLockRelease(DataChecksumsWorkerLock); /* * If the launcher is exiting before data checksums are enabled then set -- 2.34.1