From ae10aaed56ba309bcb8a32d74a7f5fc9ea466c5c Mon Sep 17 00:00:00 2001 From: "Sami Imseih (AWS)" Date: Mon, 7 Mar 2022 19:43:50 +0000 Subject: [PATCH 1/1] Fix "missing continuation record" after standby promotion Invalidate abortedRecPtr and missingContrecPtr after a missing continuation record is skipped on a standby. This fixes a PANIC caused when a recently promoted standby attempts to write an OVERWRITE_RECORD with an LSN of the previously read aborted record. Author: Sami Imseih Reviewed-by: Kyotaro Horiguchi , Alvaro Herrera Discussion: https://postgr.es/m/44D259DE-7542-49C4-8A52-2AB01534DCA9@amazon.co Backpatch to 10 --- src/backend/access/transam/xlog.c | 4 ++++ src/test/recovery/t/026_overwrite_contrecord.pl | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index c64febdb53..76fe62af33 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -10260,6 +10260,10 @@ VerifyOverwriteContrecord(xl_overwrite_contrecord *xlrec, XLogReaderState *state (uint32) (state->overwrittenRecPtr >> 32), (uint32) state->overwrittenRecPtr); + /* We have safely skipped the aborted record */ + abortedRecPtr = InvalidXLogRecPtr; + missingContrecPtr = InvalidXLogRecPtr; + ereport(LOG, (errmsg("successfully skipped missing contrecord at %X/%X, overwritten at %s", (uint32) (xlrec->overwritten_lsn >> 32), diff --git a/src/test/recovery/t/026_overwrite_contrecord.pl b/src/test/recovery/t/026_overwrite_contrecord.pl index 57b2a6b7fb..ef962b1271 100644 --- a/src/test/recovery/t/026_overwrite_contrecord.pl +++ b/src/test/recovery/t/026_overwrite_contrecord.pl @@ -15,7 +15,7 @@ plan tests => 3; # Test: Create a physical replica that's missing the last WAL file, # then restart the primary to create a divergent WAL file and observe # that the replica replays the "overwrite contrecord" from that new -# file. +# file and the standby promotes successfully. my $node = PostgresNode->get_new_node('primary'); $node->init(allows_streaming => 1); @@ -105,5 +105,11 @@ like( qr[successfully skipped missing contrecord at], "found log line in standby"); +# Verify promotion is successful +$ENV{PGDATA} = $node_standby->data_dir; +$ENV{PGPORT} = $node_standby->port; +$ENV{PGGHOST} = $node_standby->host; +system "pg_ctl promote"; + $node->stop; $node_standby->stop; -- 2.32.0