From b823bc3086f80e869057ccef0c6e8a3f7c664a9a Mon Sep 17 00:00:00 2001 From: Amul Sul Date: Thu, 17 Jul 2025 16:39:36 +0530 Subject: [PATCH v6 8/8] pg_verifybackup: enabled WAL parsing for tar-format backup Now that pg_waldump supports decoding from tar archives, we should leverage this functionality to remove the previous restriction on WAL parsing for tar-backed formats. --- doc/src/sgml/ref/pg_verifybackup.sgml | 5 +- src/bin/pg_verifybackup/pg_verifybackup.c | 66 +++++++++++++------ src/bin/pg_verifybackup/t/002_algorithm.pl | 4 -- src/bin/pg_verifybackup/t/003_corruption.pl | 4 +- src/bin/pg_verifybackup/t/008_untar.pl | 5 +- src/bin/pg_verifybackup/t/010_client_untar.pl | 5 +- 6 files changed, 50 insertions(+), 39 deletions(-) diff --git a/doc/src/sgml/ref/pg_verifybackup.sgml b/doc/src/sgml/ref/pg_verifybackup.sgml index e9b8bfd51b1..16b50b5a4df 100644 --- a/doc/src/sgml/ref/pg_verifybackup.sgml +++ b/doc/src/sgml/ref/pg_verifybackup.sgml @@ -36,10 +36,7 @@ PostgreSQL documentation backup_manifest generated by the server at the time of the backup. The backup may be stored either in the "plain" or the "tar" format; this includes tar-format backups compressed with any algorithm - supported by pg_basebackup. However, at present, - WAL verification is supported only for plain-format - backups. Therefore, if the backup is stored in tar-format, the - -n, --no-parse-wal option should be used. + supported by pg_basebackup. diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index 9fcd6be004e..6915fc7f28e 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -74,7 +74,9 @@ pg_noreturn static void report_manifest_error(JsonManifestParseContext *context, const char *fmt,...) pg_attribute_printf(2, 3); -static void verify_tar_backup(verifier_context *context, DIR *dir); +static void verify_tar_backup(verifier_context *context, DIR *dir, + char **base_archive_path, + char **wal_archive_path); static void verify_plain_backup_directory(verifier_context *context, char *relpath, char *fullpath, DIR *dir); @@ -83,7 +85,9 @@ static void verify_plain_backup_file(verifier_context *context, char *relpath, static void verify_control_file(const char *controlpath, uint64 manifest_system_identifier); static void precheck_tar_backup_file(verifier_context *context, char *relpath, - char *fullpath, SimplePtrList *tarfiles); + char *fullpath, SimplePtrList *tarfiles, + char **base_archive_path, + char **wal_archive_path); static void verify_tar_file(verifier_context *context, char *relpath, char *fullpath, astreamer *streamer); static void report_extra_backup_files(verifier_context *context); @@ -136,6 +140,8 @@ main(int argc, char **argv) bool no_parse_wal = false; bool quiet = false; char *wal_path = NULL; + char *base_archive_path = NULL; + char *wal_archive_path = NULL; char *pg_waldump_path = NULL; DIR *dir; @@ -327,17 +333,6 @@ main(int argc, char **argv) pfree(path); } - /* - * XXX: In the future, we should consider enhancing pg_waldump to read WAL - * files from an archive. - */ - if (!no_parse_wal && context.format == 't') - { - pg_log_error("pg_waldump cannot read tar files"); - pg_log_error_hint("You must use -n/--no-parse-wal when verifying a tar-format backup."); - exit(1); - } - /* * Perform the appropriate type of verification appropriate based on the * backup format. This will close 'dir'. @@ -346,7 +341,7 @@ main(int argc, char **argv) verify_plain_backup_directory(&context, NULL, context.backup_directory, dir); else - verify_tar_backup(&context, dir); + verify_tar_backup(&context, dir, &base_archive_path, &wal_archive_path); /* * The "matched" flag should now be set on every entry in the hash table. @@ -364,9 +359,28 @@ main(int argc, char **argv) if (context.format == 'p' && !context.skip_checksums) verify_backup_checksums(&context); - /* By default, look for the WAL in the backup directory, too. */ + /* + * By default, WAL files are expected to be found in the backup directory + * for plain-format backups. In the case of tar-format backups, if a + * separate WAL archive is not found, the WAL files are most likely + * included within the main data directory archive. + */ if (wal_path == NULL) - wal_path = psprintf("%s/pg_wal", context.backup_directory); + { + if (context.format == 'p') + wal_path = psprintf("%s/pg_wal", context.backup_directory); + else if (wal_archive_path) + wal_path = wal_archive_path; + else if (base_archive_path) + wal_path = base_archive_path; + else + { + pg_log_error("wal archive not found"); + pg_log_error_hint("Specify the correct path using the option -w/--wal-path." + "Or you must use -n/--no-parse-wal when verifying a tar-format backup."); + exit(1); + } + } /* * Try to parse the required ranges of WAL records, unless we were told @@ -787,7 +801,8 @@ verify_control_file(const char *controlpath, uint64 manifest_system_identifier) * close when we're done with it. */ static void -verify_tar_backup(verifier_context *context, DIR *dir) +verify_tar_backup(verifier_context *context, DIR *dir, char **base_archive_path, + char **wal_archive_path) { struct dirent *dirent; SimplePtrList tarfiles = {NULL, NULL}; @@ -816,7 +831,8 @@ verify_tar_backup(verifier_context *context, DIR *dir) char *fullpath; fullpath = psprintf("%s/%s", context->backup_directory, filename); - precheck_tar_backup_file(context, filename, fullpath, &tarfiles); + precheck_tar_backup_file(context, filename, fullpath, &tarfiles, + base_archive_path, wal_archive_path); pfree(fullpath); } } @@ -875,11 +891,13 @@ verify_tar_backup(verifier_context *context, DIR *dir) * * The arguments to this function are mostly the same as the * verify_plain_backup_file. The additional argument outputs a list of valid - * tar files. + * tar files, along with the full paths to the main archive and the WAL + * directory archive. */ static void precheck_tar_backup_file(verifier_context *context, char *relpath, - char *fullpath, SimplePtrList *tarfiles) + char *fullpath, SimplePtrList *tarfiles, + char **base_archive_path, char **wal_archive_path) { struct stat sb; Oid tblspc_oid = InvalidOid; @@ -918,9 +936,17 @@ precheck_tar_backup_file(verifier_context *context, char *relpath, * extension such as .gz, .lz4, or .zst. */ if (strncmp("base", relpath, 4) == 0) + { suffix = relpath + 4; + + *base_archive_path = pstrdup(fullpath); + } else if (strncmp("pg_wal", relpath, 6) == 0) + { suffix = relpath + 6; + + *wal_archive_path = pstrdup(fullpath); + } else { /* Expected a .tar file here. */ diff --git a/src/bin/pg_verifybackup/t/002_algorithm.pl b/src/bin/pg_verifybackup/t/002_algorithm.pl index ae16c11bc4d..4f284a9e828 100644 --- a/src/bin/pg_verifybackup/t/002_algorithm.pl +++ b/src/bin/pg_verifybackup/t/002_algorithm.pl @@ -30,10 +30,6 @@ sub test_checksums { # Add switch to get a tar-format backup push @backup, ('--format' => 'tar'); - - # Add switch to skip WAL verification, which is not yet supported for - # tar-format backups - push @verify, ('--no-parse-wal'); } # A backup with a bogus algorithm should fail. diff --git a/src/bin/pg_verifybackup/t/003_corruption.pl b/src/bin/pg_verifybackup/t/003_corruption.pl index 1dd60f709cf..f1ebdbb46b4 100644 --- a/src/bin/pg_verifybackup/t/003_corruption.pl +++ b/src/bin/pg_verifybackup/t/003_corruption.pl @@ -193,10 +193,8 @@ for my $scenario (@scenario) command_ok([ $tar, '-cf' => "$tar_backup_path/base.tar", '.' ]); chdir($cwd) || die "chdir: $!"; - # Now check that the backup no longer verifies. We must use -n - # here, because pg_waldump can't yet read WAL from a tarfile. command_fails_like( - [ 'pg_verifybackup', '--no-parse-wal', $tar_backup_path ], + [ 'pg_verifybackup', $tar_backup_path ], $scenario->{'fails_like'}, "corrupt backup fails verification: $name"); diff --git a/src/bin/pg_verifybackup/t/008_untar.pl b/src/bin/pg_verifybackup/t/008_untar.pl index bc3d6b352ad..09079a94fee 100644 --- a/src/bin/pg_verifybackup/t/008_untar.pl +++ b/src/bin/pg_verifybackup/t/008_untar.pl @@ -47,7 +47,6 @@ my $tsoid = $primary->safe_psql( SELECT oid FROM pg_tablespace WHERE spcname = 'regress_ts1')); my $backup_path = $primary->backup_dir . '/server-backup'; -my $extract_path = $primary->backup_dir . '/extracted-backup'; my @test_configuration = ( { @@ -123,14 +122,12 @@ for my $tc (@test_configuration) # Verify tar backup. $primary->command_ok( [ - 'pg_verifybackup', '--no-parse-wal', - '--exit-on-error', $backup_path, + 'pg_verifybackup', '--exit-on-error', $backup_path, ], "verify backup, compression $method"); # Cleanup. rmtree($backup_path); - rmtree($extract_path); } } diff --git a/src/bin/pg_verifybackup/t/010_client_untar.pl b/src/bin/pg_verifybackup/t/010_client_untar.pl index b62faeb5acf..5b0e76ee69d 100644 --- a/src/bin/pg_verifybackup/t/010_client_untar.pl +++ b/src/bin/pg_verifybackup/t/010_client_untar.pl @@ -32,7 +32,6 @@ print $jf $junk_data; close $jf; my $backup_path = $primary->backup_dir . '/client-backup'; -my $extract_path = $primary->backup_dir . '/extracted-backup'; my @test_configuration = ( { @@ -137,13 +136,11 @@ for my $tc (@test_configuration) # Verify tar backup. $primary->command_ok( [ - 'pg_verifybackup', '--no-parse-wal', - '--exit-on-error', $backup_path, + 'pg_verifybackup', '--exit-on-error', $backup_path, ], "verify backup, compression $method"); # Cleanup. - rmtree($extract_path); rmtree($backup_path); } } -- 2.47.1