From b4b249842df0f0550dee3722890eded7568c0c2e Mon Sep 17 00:00:00 2001 From: Osumi Takamichi Date: Thu, 28 Jan 2021 00:16:01 +0000 Subject: [PATCH v01] trace wal_level change for backup management This patch adds a new parameter in the control file in order to trace wal_level drop to minimal from higher level. The purpose is to expose the LSN when the drop of wal_level happens and to make it possible to notify the change to the users. The value is reset when pg_basebackup creates a new base backup successfully. Author: Takamichi Osumi Discussion : https://www.postgresql.org/message-id/OSBPR01MB4888B94DF1C9325033C26D00EDD00%40OSBPR01MB4888.jpnprd01.prod.outlook.com --- src/backend/access/transam/xlog.c | 35 +++++++++++++++ src/backend/replication/basebackup.c | 3 ++ src/bin/pg_basebackup/t/010_pg_basebackup.pl | 64 +++++++++++++++++++++++++++- src/bin/pg_controldata/pg_controldata.c | 3 ++ src/bin/pg_resetwal/pg_resetwal.c | 6 ++- src/include/access/xlog.h | 1 + src/include/catalog/pg_control.h | 7 +++ 7 files changed, 117 insertions(+), 2 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index cc007b8..990c93f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -772,6 +772,16 @@ static ControlFileData *ControlFile = NULL; */ #define ConvertToXSegs(x, segsize) XLogMBVarToSegs((x), (segsize)) +/* + * Return true if the first wal_level drop which + * could cause discontinuity of snapshots happens, + * since the latest base backup was taken. + */ +#define TestWalLevelGap() \ + (ControlFile->wal_level_drop == InvalidXLogRecPtr && \ + ControlFile->wal_level >= WAL_LEVEL_REPLICA && \ + wal_level < WAL_LEVEL_REPLICA) + /* The number of bytes in a WAL segment usable for WAL data. */ static int UsableBytesInSegment; @@ -4629,6 +4639,7 @@ InitControlFile(uint64 sysidentifier) ControlFile->max_locks_per_xact = max_locks_per_xact; ControlFile->wal_level = wal_level; ControlFile->wal_log_hints = wal_log_hints; + ControlFile->wal_level_drop = InvalidXLogRecPtr; ControlFile->track_commit_timestamp = track_commit_timestamp; ControlFile->data_checksum_version = bootstrap_data_checksum_version; } @@ -4931,6 +4942,19 @@ UpdateControlFile(void) } /* + * Successful execution of pg_basebackup + * needs to reset wal_level_drop in the control file. + */ +void +ResetWalLevelDrop(void) +{ + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + ControlFile->wal_level_drop = InvalidXLogRecPtr; + UpdateControlFile(); + LWLockRelease(ControlFileLock); +} + +/* * Returns the unique system identifier from control file. */ uint64 @@ -9847,6 +9871,11 @@ XLogRestorePoint(const char *rpName) static void XLogReportParameters(void) { + /* + * flag to know whether or not wal_level dropped to minimal from higher level + */ + bool cause_discontinuity = false; + if (wal_level != ControlFile->wal_level || wal_log_hints != ControlFile->wal_log_hints || MaxConnections != ControlFile->MaxConnections || @@ -9876,6 +9905,8 @@ XLogReportParameters(void) xlrec.wal_level = wal_level; xlrec.wal_log_hints = wal_log_hints; xlrec.track_commit_timestamp = track_commit_timestamp; + if (TestWalLevelGap()) + cause_discontinuity = true; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xlrec)); @@ -9894,6 +9925,8 @@ XLogReportParameters(void) ControlFile->wal_level = wal_level; ControlFile->wal_log_hints = wal_log_hints; ControlFile->track_commit_timestamp = track_commit_timestamp; + if (cause_discontinuity) + ControlFile->wal_level_drop = GetXLogInsertRecPtr(); UpdateControlFile(); LWLockRelease(ControlFileLock); @@ -10322,6 +10355,8 @@ xlog_redo(XLogReaderState *record) ControlFile->minRecoveryPoint = lsn; ControlFile->minRecoveryPointTLI = ThisTimeLineID; } + if (TestWalLevelGap()) + ControlFile->wal_level_drop = GetXLogInsertRecPtr(); CommitTsParameterChange(xlrec.track_commit_timestamp, ControlFile->track_commit_timestamp); diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 0f54635..3fd0041 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -739,6 +739,9 @@ perform_base_backup(basebackup_options *opt) /* clean up the resource owner we created */ WalSndResourceCleanup(true); + /* reset wal_level_drop, because the gap of wal_level has been restored */ + ResetWalLevelDrop(); + pgstat_progress_end_command(); } diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl index f674a7c..35540e6 100644 --- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl +++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl @@ -6,7 +6,7 @@ use File::Basename qw(basename dirname); use File::Path qw(rmtree); use PostgresNode; use TestLib; -use Test::More tests => 109; +use Test::More tests => 118; program_help_ok('pg_basebackup'); program_version_ok('pg_basebackup'); @@ -575,3 +575,65 @@ rmtree("$tempdir/backup_corrupt4"); $node->safe_psql('postgres', "DROP TABLE corrupt1;"); $node->safe_psql('postgres', "DROP TABLE corrupt2;"); + +# confirm pg_basebackup works together with the control +# file's parameter to trace wal_level drop. +my $another_node = get_new_node('wal_level'); +my $backup_name = 'my_backup'; + +$another_node->init(has_archiving => 1); +$another_node->append_conf('postgresql.conf', 'wal_level = replica'); +$another_node->start; + +my $data = $another_node->data_dir; +command_like( + [ 'pg_controldata', $data ], + qr/Oldest location of wal_level change to minimal:.*0\/0/, + 'check the initial value is equal to 0/0'); +$another_node->stop; + +$another_node->append_conf('postgresql.conf', + 'wal_level = logical'); +$another_node->start; +command_like( + [ 'pg_controldata', $data ], + qr/Oldest location of wal_level change to minimal:.*0\/0/, + 'raising the wal_level from raplica to logical does not change initial value'); +$another_node->stop; + +$another_node->append_conf('postgresql.conf',q[ +wal_level = minimal +max_wal_senders = 0 +archive_mode = off +]); +$another_node->start; +$another_node->stop; + +my ($stdout, $stderr) = run_command([ 'pg_controldata', $data ]); +my @control_data = split("\n", $stdout); +my $wal_level_drop_lsn = undef; +foreach (@control_data) +{ + if ($_ =~ /^Oldest location of wal_level change to minimal:.*\/(.*)/) + { + $wal_level_drop_lsn = $1; + chomp($wal_level_drop_lsn); + last; + } +} +die unless defined $wal_level_drop_lsn; +die if $wal_level_drop_lsn eq '0'; + +$another_node->append_conf('postgresql.conf', q[ +wal_level = replica +max_wal_senders = 10 +]); + +$another_node->start; +$another_node->backup($backup_name); +$another_node->stop; + +command_like( + [ 'pg_controldata', $data ], + qr/Oldest location of wal_level change to minimal:.*0\/0/, + 'the parameter has been changed to the initial value'); diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 3e00ac0..65ba290 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -334,5 +334,8 @@ main(int argc, char *argv[]) ControlFile->data_checksum_version); printf(_("Mock authentication nonce: %s\n"), mock_auth_nonce_str); + printf(_("Oldest location of wal_level change to minimal: %X/%X\n"), + (uint32) (ControlFile->wal_level_drop >> 32), + (uint32) ControlFile->wal_level_drop); return 0; } diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index 805dafe..a8555f4 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -701,7 +701,10 @@ GuessControlValues(void) ControlFile.checkPoint = ControlFile.checkPointCopy.redo; ControlFile.unloggedLSN = FirstNormalUnloggedLSN; - /* minRecoveryPoint, backupStartPoint and backupEndPoint can be left zero */ + /* + * minRecoveryPoint, backupStartPoint, backupEndPoint and wal_level_drop + * can be left zero + */ ControlFile.wal_level = WAL_LEVEL_MINIMAL; ControlFile.wal_log_hints = false; @@ -901,6 +904,7 @@ RewriteControlFile(void) ControlFile.backupStartPoint = 0; ControlFile.backupEndPoint = 0; ControlFile.backupEndRequired = false; + ControlFile.wal_level_drop = 0; /* * Force the defaults for max_* settings. The values don't really matter diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 75ec107..c5e18af 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -316,6 +316,7 @@ extern TimestampTz GetLatestXTime(void); extern TimestampTz GetCurrentChunkReplayStartTime(void); extern void UpdateControlFile(void); +extern void ResetWalLevelDrop(void); extern uint64 GetSystemIdentifier(void); extern char *GetMockAuthenticationNonce(void); extern bool DataChecksumsEnabled(void); diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index e3f4815..d4c5ccb 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -226,6 +226,13 @@ typedef struct ControlFileData */ char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]; + /* + * This indicates LSN just after the oldest wal_level change to lower than + * wal_level=replica because it causes discontinuity of snapshots. Reset + * by creating a new base backup, using pg_basebackup. + */ + XLogRecPtr wal_level_drop; + /* CRC of all above ... MUST BE LAST! */ pg_crc32c crc; } ControlFileData; -- 2.2.0