From 8694c31dee28606448edfa5b0375fad8a7d619bd Mon Sep 17 00:00:00 2001 From: David Steele Date: Wed, 18 Mar 2026 07:11:13 +0000 Subject: Add pg_control flag to prevent recovery without backup_label. Harden recovery by adding a flag to pg_control to indicate that backup_label is required. This prevents the user from deleting backup_label resulting in an inconsistent recovery. Another advantage is that the copy of pg_control used by pg_basebackup is guaranteed not to be torn. This functionality is limited to pg_basebackup (or any software comfortable with modifying pg_control). Control and catalog version bumps are required. --- doc/src/sgml/func/func-info.sgml | 5 ++++ src/backend/access/transam/xlog.c | 36 +++++++++++++++++++++++ src/backend/access/transam/xlogrecovery.c | 19 +++++++++++- src/backend/backup/basebackup.c | 15 ++++------ src/backend/utils/misc/pg_controldata.c | 7 +++-- src/bin/pg_controldata/pg_controldata.c | 2 ++ src/bin/pg_resetwal/pg_resetwal.c | 1 + src/bin/pg_rewind/pg_rewind.c | 1 + src/include/access/xlog.h | 1 + src/include/catalog/pg_control.h | 4 +++ src/include/catalog/pg_proc.dat | 6 ++-- src/test/recovery/t/002_archiving.pl | 20 +++++++++++++ 12 files changed, 102 insertions(+), 15 deletions(-) diff --git a/doc/src/sgml/func/func-info.sgml b/doc/src/sgml/func/func-info.sgml index 5b5f1f3c5df..b1b49ce7b07 100644 --- a/doc/src/sgml/func/func-info.sgml +++ b/doc/src/sgml/func/func-info.sgml @@ -3661,6 +3661,11 @@ acl | {postgres=arwdDxtm/postgres,foo=r/postgres} boolean + + backup_label_required + boolean + + diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f5c9a34374d..87ee245e3f4 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -9592,6 +9592,42 @@ do_pg_abort_backup(int code, Datum arg) } } +/* + * Create a consistent copy of control data to be used for backup and update it + * to require a backup label for recovery. Also recalculate the CRC. + */ +void +backup_control_file(uint8_t *controlFile) +{ + ControlFileData *controlData = ((ControlFileData *)controlFile); + + memset(controlFile, 0, PG_CONTROL_FILE_SIZE); + + LWLockAcquire(ControlFileLock, LW_SHARED); + memcpy(controlFile, ControlFile, sizeof(ControlFileData)); + +#ifdef USE_ASSERT_CHECKING + /* + * Verify that the contents of pg_control are the same in memory as on disk + */ + { + bool crc_ok; + ControlFileData *dataDisk = get_controlfile(DataDir, &crc_ok); + + Assert(crc_ok && + memcmp(dataDisk, controlFile, sizeof(ControlFileData)) == 0); + } +#endif + + LWLockRelease(ControlFileLock); + + controlData->backupLabelRequired = true; + + INIT_CRC32C(controlData->crc); + COMP_CRC32C(controlData->crc, controlFile, offsetof(ControlFileData, crc)); + FIN_CRC32C(controlData->crc); +} + /* * Register a handler that will warn about unterminated backups at end of * session, unless this has already been done. diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 6d2c4a86b96..bd702e895d7 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -647,7 +647,14 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, } else { - /* No backup_label file has been found if we are here. */ + /* + * No backup_label file has been found if we are here. Error if the + * control file requires backup_label. + */ + if (ControlFile->backupLabelRequired) + ereport(FATAL, + (errmsg("could not find backup_label required for recovery"), + errhint("backup_label must be present for recovery to proceed"))); /* * If tablespace_map file is present without backup_label file, there @@ -927,11 +934,21 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, * * Any other state indicates that the backup somehow became corrupted * and we can't sensibly continue with recovery. + * + * backupLabelRequired is set to false since backup_label is no longer + * required once pg_control has been updated on disk. If recovery + * terminates abnormally between when pg_control is updated and + * backup_label is renamed then on restart pg_control will be + * reinitialized from backup_label. If the user manually deletes + * backup_label before restarting then recovery will proceed with the + * contents of pg_control just as it would if the crash had happened + * directly after backup_label rename. */ if (haveBackupLabel) { ControlFile->backupStartPoint = checkPoint.redo; ControlFile->backupEndRequired = backupEndRequired; + ControlFile->backupLabelRequired = false; if (backupFromStandby) { diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c index ab1fbae8001..c4f7827f866 100644 --- a/src/backend/backup/basebackup.c +++ b/src/backend/backup/basebackup.c @@ -23,6 +23,7 @@ #include "backup/basebackup_incremental.h" #include "backup/basebackup_sink.h" #include "backup/basebackup_target.h" +#include "catalog/pg_control.h" #include "catalog/pg_tablespace_d.h" #include "commands/defrem.h" #include "common/compression.h" @@ -332,9 +333,9 @@ perform_base_backup(basebackup_options *opt, bbsink *sink, if (ti->path == NULL) { - struct stat statbuf; bool sendtblspclinks = true; char *backup_label; + uint8_t controlFile[PG_CONTROL_FILE_SIZE]; bbsink_begin_archive(sink, "base.tar"); @@ -357,14 +358,10 @@ perform_base_backup(basebackup_options *opt, bbsink *sink, sendtblspclinks, &manifest, InvalidOid, ib); /* ... and pg_control after everything else. */ - if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", - XLOG_CONTROL_FILE))); - sendFile(sink, XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, - false, InvalidOid, InvalidOid, - InvalidRelFileNumber, 0, &manifest, 0, NULL, 0); + backup_control_file(controlFile); + sendFileWithContent(sink, XLOG_CONTROL_FILE, + (char *)controlFile, PG_CONTROL_FILE_SIZE, + &manifest); } else { diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index c6d9cbb1577..c2c19eb77df 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -162,8 +162,8 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) Datum pg_control_recovery(PG_FUNCTION_ARGS) { - Datum values[5]; - bool nulls[5]; + Datum values[6]; + bool nulls[6]; TupleDesc tupdesc; HeapTuple htup; ControlFileData *ControlFile; @@ -195,6 +195,9 @@ pg_control_recovery(PG_FUNCTION_ARGS) values[4] = BoolGetDatum(ControlFile->backupEndRequired); nulls[4] = false; + values[5] = BoolGetDatum(ControlFile->backupLabelRequired); + nulls[5] = false; + htup = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM(HeapTupleGetDatum(htup)); diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index a4060309ae0..5919dd58fed 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -301,6 +301,8 @@ main(int argc, char *argv[]) LSN_FORMAT_ARGS(ControlFile->backupEndPoint)); printf(_("End-of-backup record required: %s\n"), ControlFile->backupEndRequired ? _("yes") : _("no")); + printf(_("Backup label required: %s\n"), + ControlFile->backupLabelRequired ? _("yes") : _("no")); printf(_("wal_level setting: %s\n"), wal_level_str(ControlFile->wal_level)); printf(_("wal_log_hints setting: %s\n"), diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index ab766c34d4b..768a4ed2f18 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -918,6 +918,7 @@ RewriteControlFile(void) ControlFile.backupStartPoint = InvalidXLogRecPtr; ControlFile.backupEndPoint = InvalidXLogRecPtr; ControlFile.backupEndRequired = false; + ControlFile.backupLabelRequired = false; /* * Force the defaults for max_* settings. The values don't really matter diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 9d745d4b25b..509b9e80a21 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -736,6 +736,7 @@ perform_rewind(filemap_t *filemap, rewind_source *source, ControlFile_new.minRecoveryPoint = endrec; ControlFile_new.minRecoveryPointTLI = endtli; ControlFile_new.state = DB_IN_ARCHIVE_RECOVERY; + ControlFile_new.backupLabelRequired = true; if (!dry_run) update_controlfile(datadir_target, &ControlFile_new, do_sync); } diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index dcc12eb8cbe..469739aa23f 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -313,6 +313,7 @@ extern void do_pg_backup_start(const char *backupidstr, bool fast, StringInfo tblspcmapfile); extern void do_pg_backup_stop(BackupState *state, bool waitforarchive); extern void do_pg_abort_backup(int code, Datum arg); +extern void backup_control_file(uint8_t *controlFile); extern void register_persistent_abort_backup_handler(void); extern SessionBackupState get_backup_status(void); diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 77a661e818b..d9841f58108 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -166,12 +166,16 @@ typedef struct ControlFileData * If backupEndRequired is true, we know for sure that we're restoring * from a backup, and must see a backup-end record before we can safely * start up. + * + * If backupLabelRequired is true, then a backup_label file must be + * present in order for recovery to proceed. */ XLogRecPtr minRecoveryPoint; TimeLineID minRecoveryPointTLI; XLogRecPtr backupStartPoint; XLogRecPtr backupEndPoint; bool backupEndRequired; + bool backupLabelRequired; /* * Parameter settings that determine if the WAL can be used for archival diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index fc8d82665b8..999ea2ac801 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12504,9 +12504,9 @@ { oid => '3443', descr => 'pg_controldata recovery state information as a function', proname => 'pg_control_recovery', provolatile => 'v', prorettype => 'record', - proargtypes => '', proallargtypes => '{pg_lsn,int4,pg_lsn,pg_lsn,bool}', - proargmodes => '{o,o,o,o,o}', - proargnames => '{min_recovery_end_lsn,min_recovery_end_timeline,backup_start_lsn,backup_end_lsn,end_of_backup_record_required}', + proargtypes => '', proallargtypes => '{pg_lsn,int4,pg_lsn,pg_lsn,bool,bool}', + proargmodes => '{o,o,o,o,o,o}', + proargnames => '{min_recovery_end_lsn,min_recovery_end_timeline,backup_start_lsn,backup_end_lsn,end_of_backup_record_required,backup_label_required}', prosrc => 'pg_control_recovery' }, { oid => '3444', diff --git a/src/test/recovery/t/002_archiving.pl b/src/test/recovery/t/002_archiving.pl index aa40f58e6d6..9963d13473e 100644 --- a/src/test/recovery/t/002_archiving.pl +++ b/src/test/recovery/t/002_archiving.pl @@ -41,6 +41,26 @@ $node_standby->append_conf( archive_cleanup_command = 'echo archive_cleanup_done > $archive_cleanup_command_file' recovery_end_command = 'echo recovery_ended_done > $recovery_end_command_file' )); + +# Rename backup_label to verify that recovery will not start without it +rename("$data_dir/backup_label", "$data_dir/backup_label.tmp") + or BAIL_OUT "could not move $data_dir/backup_label"; + +my $res = run_log( + [ + 'pg_ctl', '-D', $node_standby->data_dir, '-l', + $node_standby->logfile, 'start' + ]); +ok(!$res, 'invalid recovery startup fails'); + +my $logfile = slurp_file($node_standby->logfile()); +ok($logfile =~ qr/could not find backup_label required for recovery/, + 'could not find backup_label required for recovery'); + +# Restore backup_label so recovery proceeds normally +rename("$data_dir/backup_label.tmp", "$data_dir/backup_label") + or BAIL_OUT "could not move $data_dir/backup_label"; + $node_standby->start; # Create some content on primary -- 2.34.1