From 43888869846b6de00fbddb215300a8ff774bbc04 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Date: Tue, 8 Feb 2022 16:42:53 +0900
Subject: [PATCH v1] Get rid of unused path to handle concurrent checkpoints

CreateRestartPoint considered the case a concurrent checkpoint is
running. But after 7ff23c6d27 we no longer launch multiple checkpoints
simultaneously.  That code path, if it is passed, may leave
unrecoverable database by removing WAL segments that are required by
the last established restartpoint.
---
 src/backend/access/transam/xlog.c | 53 +++++++++++++++++++------------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 958220c495..01a345e8bd 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -9752,29 +9752,30 @@ CreateRestartPoint(int flags)
 	PriorRedoPtr = ControlFile->checkPointCopy.redo;
 
 	/*
-	 * Update pg_control, using current time.  Check that it still shows
-	 * DB_IN_ARCHIVE_RECOVERY state and an older checkpoint, else do nothing;
-	 * this is a quick hack to make sure nothing really bad happens if somehow
-	 * we get here after the end-of-recovery checkpoint.
+	 * Update pg_control, using current time.
 	 */
 	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-	if (ControlFile->state == DB_IN_ARCHIVE_RECOVERY &&
-		ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
+
+	/* We mustn't have a concurrent checkpoint that advances checkpoint LSN */
+	Assert(lastCheckPoint.redo > ControlFile->checkPointCopy.redo);
+
+	ControlFile->checkPoint = lastCheckPointRecPtr;
+	ControlFile->checkPointCopy = lastCheckPoint;
+
+	/*
+	 * Ensure minRecoveryPoint is past the checkpoint record while archive
+	 * recovery is still ongoing.  Normally, this will have happened already
+	 * while writing out dirty buffers, but not necessarily - e.g. because no
+	 * buffers were dirtied.  We do this because a non-exclusive base backup
+	 * uses minRecoveryPoint to determine which WAL files must be included in
+	 * the backup, and the file (or files) containing the checkpoint record
+	 * must be included, at a minimum. Note that for an ordinary restart of
+	 * recovery there's no value in having the minimum recovery point any
+	 * earlier than this anyway, because redo will begin just after the
+	 * checkpoint record.
+	 */
+	if (ControlFile->state == DB_IN_ARCHIVE_RECOVERY)
 	{
-		ControlFile->checkPoint = lastCheckPointRecPtr;
-		ControlFile->checkPointCopy = lastCheckPoint;
-
-		/*
-		 * Ensure minRecoveryPoint is past the checkpoint record.  Normally,
-		 * this will have happened already while writing out dirty buffers,
-		 * but not necessarily - e.g. because no buffers were dirtied.  We do
-		 * this because a non-exclusive base backup uses minRecoveryPoint to
-		 * determine which WAL files must be included in the backup, and the
-		 * file (or files) containing the checkpoint record must be included,
-		 * at a minimum. Note that for an ordinary restart of recovery there's
-		 * no value in having the minimum recovery point any earlier than this
-		 * anyway, because redo will begin just after the checkpoint record.
-		 */
 		if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
 		{
 			ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
@@ -9786,8 +9787,18 @@ CreateRestartPoint(int flags)
 		}
 		if (flags & CHECKPOINT_IS_SHUTDOWN)
 			ControlFile->state = DB_SHUTDOWNED_IN_RECOVERY;
-		UpdateControlFile();
 	}
+	else
+	{
+		/*
+		 * We have exited from archive-recovery mode after this restartpoint
+		 * started. Crash recovery ever after should always recover to the end
+		 * of WAL.
+		 */
+		ControlFile->minRecoveryPoint = InvalidXLogRecPtr;
+		ControlFile->minRecoveryPointTLI = 0;
+	}
+	UpdateControlFile();
 	LWLockRelease(ControlFileLock);
 
 	/*
-- 
2.27.0