From e7c2e425eba642c8e9c379c5fecc4bd5caf28997 Mon Sep 17 00:00:00 2001 From: "Sami Imseih (AWS)" Date: Tue, 22 Feb 2022 19:09:36 +0000 Subject: [PATCH 1/1] Fix "missing continuation record" after standby promotion Fix a condition where a recently promoted standby attempts to write an OVERWRITE_RECORD with an LSN of the previously read aborted record. --- src/backend/access/transam/xlog.c | 16 ++- ...inuation-record-after-standby-promot.patch | 134 ++++++++++++++++++ 2 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 src/test/recovery/0001-Fix-missing-continuation-record-after-standby-promot.patch diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 0d2bd7a357..56c2fdec96 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5423,11 +5423,25 @@ StartupXLOG(void) * made it through and start writing after the portion that persisted. * (It's critical to first write an OVERWRITE_CONTRECORD message, which * we'll do as soon as we're open for writing new WAL.) + * + * If the last wal record is ahead of the missing contrecord, this is + * a recently promoted primary and we should not write an overwrite + * contrecord. */ if (!XLogRecPtrIsInvalid(missingContrecPtr)) { Assert(!XLogRecPtrIsInvalid(abortedRecPtr)); - EndOfLog = missingContrecPtr; + if (endOfRecoveryInfo->lastRec < missingContrecPtr) + { + elog(DEBUG2, "setting end of wal to missing continuation record %X/%X", + LSN_FORMAT_ARGS(missingContrecPtr)); + EndOfLog = missingContrecPtr; + } + else + { + elog(DEBUG2, "resetting aborted record"); + abortedRecPtr = InvalidXLogRecPtr; + } } /* diff --git a/src/test/recovery/0001-Fix-missing-continuation-record-after-standby-promot.patch b/src/test/recovery/0001-Fix-missing-continuation-record-after-standby-promot.patch new file mode 100644 index 0000000000..40d922801b --- /dev/null +++ b/src/test/recovery/0001-Fix-missing-continuation-record-after-standby-promot.patch @@ -0,0 +1,134 @@ +From cb344355facb3e6f793013b0b9998683277f3bd8 Mon Sep 17 00:00:00 2001 +From: "Sami Imseih (AWS)" +Date: Tue, 22 Feb 2022 18:59:44 +0000 +Subject: [PATCH 1/1] Fix "missing continuation record" after standby + promotion. + +Fix a condition where a recently promoted standby attempts to +write an OVERWRITE_RECORD with an LSN of the previously read +aborted record. +--- + .../t/029_overwrite_contrecord_promotion.pl | 111 ++++++++++++++++++ + 1 file changed, 111 insertions(+) + create mode 100644 src/test/recovery/t/029_overwrite_contrecord_promotion.pl + +diff --git a/src/test/recovery/t/029_overwrite_contrecord_promotion.pl b/src/test/recovery/t/029_overwrite_contrecord_promotion.pl +new file mode 100644 +index 0000000000..ea4ebb32c0 +--- /dev/null ++++ b/src/test/recovery/t/029_overwrite_contrecord_promotion.pl +@@ -0,0 +1,111 @@ ++# Copyright (c) 2021-2022, PostgreSQL Global Development Group ++ ++# Tests for resetting the "aborted record" after a promotion. ++ ++use strict; ++use warnings; ++ ++use FindBin; ++use PostgreSQL::Test::Cluster; ++use PostgreSQL::Test::Utils; ++use Test::More; ++ ++# Test: Create a physical replica that's missing the last WAL file, ++# then restart the primary to create a divergent WAL file and observe ++# that the replica resets the "aborted record" after a promotion. ++ ++my $node = PostgreSQL::Test::Cluster->new('primary'); ++$node->init(allows_streaming => 1); ++# We need these settings for stability of WAL behavior. ++$node->append_conf( ++ 'postgresql.conf', qq( ++autovacuum = off ++wal_keep_size = 1GB ++log_min_messages = DEBUG2 ++)); ++$node->start; ++ ++$node->safe_psql('postgres', 'create table filler (a int, b text)'); ++ ++# Now consume all remaining room in the current WAL segment, leaving ++# space enough only for the start of a largish record. ++$node->safe_psql( ++ 'postgres', q{ ++DO $$ ++DECLARE ++ wal_segsize int := setting::int FROM pg_settings WHERE name = 'wal_segment_size'; ++ remain int; ++ iters int := 0; ++BEGIN ++ LOOP ++ INSERT into filler ++ select g, repeat(md5(g::text), (random() * 60 + 1)::int) ++ from generate_series(1, 10) g; ++ ++ remain := wal_segsize - (pg_current_wal_insert_lsn() - '0/0') % wal_segsize; ++ IF remain < 2 * setting::int from pg_settings where name = 'block_size' THEN ++ RAISE log 'exiting after % iterations, % bytes to end of WAL segment', iters, remain; ++ EXIT; ++ END IF; ++ iters := iters + 1; ++ END LOOP; ++END ++$$; ++}); ++ ++my $initfile = $node->safe_psql('postgres', ++ 'SELECT pg_walfile_name(pg_current_wal_insert_lsn())'); ++$node->safe_psql('postgres', ++ qq{SELECT pg_logical_emit_message(true, 'test 026', repeat('xyzxz', 123456))} ++); ++#$node->safe_psql('postgres', qq{create table foo ()}); ++my $endfile = $node->safe_psql('postgres', ++ 'SELECT pg_walfile_name(pg_current_wal_insert_lsn())'); ++ok($initfile ne $endfile, "$initfile differs from $endfile"); ++ ++# Now stop abruptly, to avoid a stop checkpoint. We can remove the tail file ++# afterwards, and on startup the large message should be overwritten with new ++# contents ++$node->stop('immediate'); ++ ++unlink $node->basedir . "/pgdata/pg_wal/$endfile" ++ or die "could not unlink " . $node->basedir . "/pgdata/pg_wal/$endfile: $!"; ++ ++# OK, create a standby at this spot. ++$node->backup_fs_cold('backup'); ++my $node_standby = PostgreSQL::Test::Cluster->new('standby'); ++$node_standby->init_from_backup($node, 'backup', has_streaming => 1); ++ ++$node_standby->start; ++$node->start; ++ ++$node->safe_psql('postgres', ++ qq{create table foo (a text); insert into foo values ('hello')}); ++$node->safe_psql('postgres', ++ qq{SELECT pg_logical_emit_message(true, 'test 026', 'AABBCC')}); ++ ++my $until_lsn = $node->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); ++my $caughtup_query = ++ "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()"; ++$node_standby->poll_query_until('postgres', $caughtup_query) ++ or die "Timed out while waiting for standby to catch up"; ++ ++ok($node_standby->safe_psql('postgres', 'select * from foo') eq 'hello', ++ 'standby replays past overwritten contrecord'); ++ ++$ENV{PGDATA} = $node_standby->data_dir; ++$ENV{PGPORT} = $node_standby->port; ++$ENV{PGGHOST} = $node_standby->host; ++system "psql -c 'select pg_promote()'"; ++ ++# Verify message appears in standby's log ++my $log = slurp_file($node_standby->logfile); ++like( ++ $log, ++ qr[resetting aborted record], ++ "found log line in standby"); ++ ++$node->stop; ++$node_standby->stop; ++ ++done_testing(); +-- +2.32.0 + -- 2.32.0