From 94e695031e7aa78670b1d0fd63f6cfed0d501611 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Date: Fri, 14 May 2021 13:51:01 +0900
Subject: [PATCH v2] Set expectedTLEs correctly based on recoveryTargetTLI

When a standby starts streaming before it determines expectedTLEs, it
is set based on the timeline of the WAL segment just streamed from the
primary.  If we fetch the last checkpoint in the older timeline, this
behavior leads to setting expectedTLEs based on the older timeline
than recoveryTargetTLI and later calls to rescanLatestTimeLine don't
any longer update recoveryTargetTLI and expectedTLEs and the standby
stays frozen at the point.

This behavior has been introduced by commit ee994272ca but there's no
explanation about breaking the defined relationship between the two
variables, and there seems not to be a case the behavior is useful.

Make things consistent by fixing WaitForWALToBecomeAvailable to set
expectedTLEs not using receiveTLI but recoveryTargetTLI.
---
 src/backend/access/transam/xlog.c          |  3 +-
 src/test/recovery/t/004_timeline_switch.pl | 72 +++++++++++++++++++++-
 2 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 8d163f190f..ef8b6d2c8d 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -12662,7 +12662,8 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 						if (readFile < 0)
 						{
 							if (!expectedTLEs)
-								expectedTLEs = readTimeLineHistory(receiveTLI);
+								expectedTLEs =
+									readTimeLineHistory(recoveryTargetTLI);
 							readFile = XLogFileRead(readSegNo, PANIC,
 													receiveTLI,
 													XLOG_FROM_STREAM, false);
diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl
index c101980e9e..2c8b73f5f0 100644
--- a/src/test/recovery/t/004_timeline_switch.pl
+++ b/src/test/recovery/t/004_timeline_switch.pl
@@ -7,7 +7,7 @@ use warnings;
 use File::Path qw(rmtree);
 use PostgresNode;
 use TestLib;
-use Test::More tests => 3;
+use Test::More tests => 4;
 
 $ENV{PGDATABASE} = 'postgres';
 
@@ -109,3 +109,73 @@ $node_primary_2->wait_for_catchup($node_standby_3, 'replay',
 my $result_2 =
   $node_standby_3->safe_psql('postgres', "SELECT count(*) FROM tab_int");
 is($result_2, qq(1), 'check content of standby 3');
+
+######
+# Check if recoveryTargetTLI is correctly updated when we start
+# replication from the checkpoint in the previous timeline of
+# just-promoted primary, with having pg_wal wiped out.
+
+# setup two standby nodes, one of them archives WAL files
+$node_primary_2->psql('postgres', 'CREATE TABLE t (a int)');
+my $node_standby_4 = get_new_node('standby_4');
+$node_standby_4->init_from_backup($node_primary_2, $backup_name,
+	has_streaming => 1,	allows_streaming => 1, has_archiving => 1);
+
+$node_standby_4->enable_archiving;
+my $archdir = $node_standby_4->archive_dir;
+
+# keep segments, and enable archive on the standby
+$node_standby_4->append_conf('postgresql.conf', qq[
+wal_keep_size = 512MB
+archive_mode = always
+]);
+
+$node_standby_4->start;
+
+my $node_standby_5 = get_new_node('node_standby_5');
+$node_standby_5->init_from_backup($node_primary_2, $backup_name,
+	has_streaming => 1);
+$node_standby_5->start;
+
+$node_primary_2->psql('postgres', qq[
+					  SELECT pg_switch_wal();
+					  INSERT INTO t VALUES (0);
+					  CHECKPOINT;
+]);
+$node_primary_2->wait_for_catchup($node_standby_4, 'write',
+								$node_primary_2->lsn('insert'));
+$node_primary_2->wait_for_catchup($node_standby_5, 'write',
+								$node_primary_2->lsn('insert'));
+
+# disconnect the second standby then connect to the promoted first standby
+$node_standby_5->stop;
+$node_standby_4->psql('postgres', 'SELECT pg_promote()');
+$node_standby_5->enable_streaming($node_standby_4);
+$node_standby_5->append_conf('postgresql.conf',
+							 "restore_command = 'cp $archdir/%f %p'");
+
+# wipe-out pg_wal on the second standby (new standby)
+my $pgwaldir = $node_standby_5->data_dir. "/pg_wal";
+opendir my $dh, $pgwaldir or die "failed to open $pgwaldir of node_standby_5";
+while (my $f = readdir($dh))
+{
+	unlink("$pgwaldir/$f") if (-f "$pgwaldir/$f");
+}
+closedir($dh);
+
+# restart the second standby, not just reloading to trigger archive recovery
+$node_standby_5->start;
+$node_standby_4->psql('postgres', qq[
+						INSERT INTO t VALUES(0);
+						SELECT pg_switch_wal();
+						INSERT INTO t VALUES(0); -- avoid segment border
+]);
+
+$node_standby_4->wait_for_catchup($node_standby_5, 'write',
+									$node_standby_4->lsn('insert'));
+
+# check if the change is correctly replicated
+my $result_3 =
+  $node_standby_4->safe_psql('postgres',
+							   'SELECT pg_current_wal_insert_lsn() = write_lsn FROM pg_stat_replication');
+is($result_3, 't', 'check receiving historic timeline from primary');
-- 
2.27.0