From 17fbdb43c2d98ad4f3946f37a4d9f95ab38a2d2e Mon Sep 17 00:00:00 2001 From: "Sami Imseih (AWS)" Date: Fri, 25 Feb 2022 04:17:17 +0000 Subject: [PATCH 1/1] [PATCH v3] Fix "missing continuation record" after standby promotion Fix a condition where a recently promoted standby attempts to write an OVERWRITE_RECORD with an LSN of the previously read aborted record. Author: Sami Imseih Discussion: https://postgr.es/m/44D259DE-7542-49C4-8A52-2AB01534DCA9@amazon.com --- src/backend/access/transam/xlog.c | 15 ++- .../t/029_overwrite_contrecord_promotion.pl | 111 ++++++++++++++++++ 2 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 src/test/recovery/t/029_overwrite_contrecord_promotion.pl diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 0d2bd7a357..ae4dcba8df 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5427,7 +5427,17 @@ StartupXLOG(void) if (!XLogRecPtrIsInvalid(missingContrecPtr)) { Assert(!XLogRecPtrIsInvalid(abortedRecPtr)); - EndOfLog = missingContrecPtr; + if (endOfRecoveryInfo->lastRec < missingContrecPtr) + { + elog(DEBUG2, "setting end of wal to missing continuation record %X/%X", + LSN_FORMAT_ARGS(missingContrecPtr)); + EndOfLog = missingContrecPtr; + } + else + { + elog(DEBUG2, "resetting aborted record"); + abortedRecPtr = InvalidXLogRecPtr; + } } /* @@ -6793,6 +6803,9 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, XLogFlush(recptr); + elog(DEBUG2, "OVERWRITE_CONTRECORD inserted at %X/%X for aborted record %X/%X", + LSN_FORMAT_ARGS(ProcLastRecPtr), LSN_FORMAT_ARGS(xlrec.overwritten_lsn)); + END_CRIT_SECTION(); return recptr; diff --git a/src/test/recovery/t/029_overwrite_contrecord_promotion.pl b/src/test/recovery/t/029_overwrite_contrecord_promotion.pl new file mode 100644 index 0000000000..e3511e89c1 --- /dev/null +++ b/src/test/recovery/t/029_overwrite_contrecord_promotion.pl @@ -0,0 +1,111 @@ +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Tests for resetting the "aborted record" after a promotion. + +use strict; +use warnings; + +use FindBin; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Test: Create a physical replica that's missing the last WAL file, +# then restart the primary to create a divergent WAL file and observe +# that the replica resets the "aborted record" after a promotion. + +my $node = PostgreSQL::Test::Cluster->new('primary'); +$node->init(allows_streaming => 1); +# We need these settings for stability of WAL behavior. +$node->append_conf( + 'postgresql.conf', qq( +autovacuum = off +wal_keep_size = 1GB +log_min_messages = DEBUG2 +)); +$node->start; + +$node->safe_psql('postgres', 'create table filler (a int, b text)'); + +# Now consume all remaining room in the current WAL segment, leaving +# space enough only for the start of a largish record. +$node->safe_psql( + 'postgres', q{ +DO $$ +DECLARE + wal_segsize int := setting::int FROM pg_settings WHERE name = 'wal_segment_size'; + remain int; + iters int := 0; +BEGIN + LOOP + INSERT into filler + select g, repeat(md5(g::text), (random() * 60 + 1)::int) + from generate_series(1, 10) g; + + remain := wal_segsize - (pg_current_wal_insert_lsn() - '0/0') % wal_segsize; + IF remain < 2 * setting::int from pg_settings where name = 'block_size' THEN + RAISE log 'exiting after % iterations, % bytes to end of WAL segment', iters, remain; + EXIT; + END IF; + iters := iters + 1; + END LOOP; +END +$$; +}); + +my $initfile = $node->safe_psql('postgres', + 'SELECT pg_walfile_name(pg_current_wal_insert_lsn())'); +$node->safe_psql('postgres', + qq{SELECT pg_logical_emit_message(true, 'test 026', repeat('xyzxz', 123456))} +); +#$node->safe_psql('postgres', qq{create table foo ()}); +my$endfile = $node->safe_psql('postgres', + 'SELECT pg_walfile_name(pg_current_wal_insert_lsn())'); +ok($initfile ne $endfile, "$initfile differs from $endfile"); + +# Now stop abruptly, to avoid a stop checkpoint. We can remove the tail file +# afterwards, and on startup the large message should be overwritten with new +# contents +$node->stop('immediate'); + +unlink $node->basedir . "/pgdata/pg_wal/$endfile" + or die "could not unlink " . $node->basedir . "/pgdata/pg_wal/$endfile: $!"; + +# OK, create a standby at this spot. +$node->backup_fs_cold('backup'); +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup($node, 'backup', has_streaming => 1); + +$node_standby->start; +$node->start; + +$node->safe_psql('postgres', + qq{create table foo (a text); insert into foo values ('hello')}); +$node->safe_psql('postgres', + qq{SELECT pg_logical_emit_message(true, 'test 026', 'AABBCC')}); + +my $until_lsn = $node->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); +my $caughtup_query = + "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()"; +$node_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + +ok($node_standby->safe_psql('postgres', 'select * from foo') eq 'hello', + 'standby replays past overwritten contrecord'); + +$ENV{PGDATA} = $node_standby->data_dir; +$ENV{PGPORT} = $node_standby->port; +$ENV{PGGHOST} = $node_standby->host; +system "psql -c 'select pg_promote()'"; + +# Verify message appears in standby's log +my $log = slurp_file($node_standby->logfile); +like( + $log, + qr[resetting aborted record], + "found log line in standby"); + +$node->stop; +$node_standby->stop; + +done_testing(); -- 2.32.0