From 834235add60df8203c243fa30e78cfa62619076b Mon Sep 17 00:00:00 2001
From: Jehan-Guillaume de Rorthais <jgdr@dalibo.com>
Date: Wed, 1 Apr 2020 16:36:06 +0200
Subject: [PATCH 1/2] Fix WAL retention during production crash recovery

During crash recovery of a production cluster with archive_mode=on,
XLogArchiveCheckDone() was considering the cluster as inRecovery
without archive_mode=always. Because of this non-arcived WAL and
related .ready files were recycled or removed.
---
 src/backend/access/transam/xlog.c        | 25 +++++++++++++++++-------
 src/backend/access/transam/xlogarchive.c |  7 ++++---
 src/include/access/xlog.h                |  9 +++++++++
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 977d448f50..55d06a8704 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -219,8 +219,8 @@ static TimeLineID receiveTLI = 0;
 static bool lastFullPageWrites;
 
 /*
- * Local copy of SharedRecoveryInProgress variable. True actually means "not
- * known, need to check the shared state".
+ * This is false when SharedRecoveryInProgress is not IN_ARCHIVE_RECOVERY.
+ * True actually means "not known, need to check the shared state".
  */
 static bool LocalRecoveryInProgress = true;
 
@@ -653,10 +653,10 @@ typedef struct XLogCtlData
 	TimeLineID	PrevTimeLineID;
 
 	/*
-	 * SharedRecoveryInProgress indicates if we're still in crash or archive
+	 * SharedRecoveryInProgress indicates if we're either in crash or archive
 	 * recovery.  Protected by info_lck.
 	 */
-	bool		SharedRecoveryInProgress;
+	RecoveryState	SharedRecoveryInProgress;
 
 	/*
 	 * SharedHotStandbyActive indicates if we allow hot standby queries to be
@@ -5131,7 +5131,7 @@ XLOGShmemInit(void)
 	 * in additional info.)
 	 */
 	XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
-	XLogCtl->SharedRecoveryInProgress = true;
+	XLogCtl->SharedRecoveryInProgress = IN_ARCHIVE_RECOVERY;
 	XLogCtl->SharedHotStandbyActive = false;
 	XLogCtl->SharedPromoteIsTriggered = false;
 	XLogCtl->WalWriterSleeping = false;
@@ -6901,6 +6901,9 @@ StartupXLOG(void)
 								ControlFile->checkPointCopy.ThisTimeLineID,
 								recoveryTargetTLI)));
 			ControlFile->state = DB_IN_CRASH_RECOVERY;
+			SpinLockAcquire(&XLogCtl->info_lck);
+			XLogCtl->SharedRecoveryInProgress = IN_CRASH_RECOVERY;
+			SpinLockRelease(&XLogCtl->info_lck);
 		}
 		ControlFile->checkPoint = checkPointLoc;
 		ControlFile->checkPointCopy = checkPoint;
@@ -7928,7 +7931,7 @@ StartupXLOG(void)
 	ControlFile->time = (pg_time_t) time(NULL);
 
 	SpinLockAcquire(&XLogCtl->info_lck);
-	XLogCtl->SharedRecoveryInProgress = false;
+	XLogCtl->SharedRecoveryInProgress = NOT_IN_RECOVERY;
 	SpinLockRelease(&XLogCtl->info_lck);
 
 	UpdateControlFile();
@@ -8074,7 +8077,7 @@ RecoveryInProgress(void)
 		 */
 		volatile XLogCtlData *xlogctl = XLogCtl;
 
-		LocalRecoveryInProgress = xlogctl->SharedRecoveryInProgress;
+		LocalRecoveryInProgress = (xlogctl->SharedRecoveryInProgress != NOT_IN_RECOVERY);
 
 		/*
 		 * Initialize TimeLineID and RedoRecPtr when we discover that recovery
@@ -8103,6 +8106,14 @@ RecoveryInProgress(void)
 	}
 }
 
+RecoveryState
+GetRecoveryState(void)
+{
+	volatile XLogCtlData *xlogctl = XLogCtl;
+
+	return xlogctl->SharedRecoveryInProgress;
+}
+
 /*
  * Is HotStandby active yet? This is only important in special backends
  * since normal backends won't ever be able to connect until this returns
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
index d62c12310a..25666cdbcf 100644
--- a/src/backend/access/transam/xlogarchive.c
+++ b/src/backend/access/transam/xlogarchive.c
@@ -572,7 +572,7 @@ XLogArchiveCheckDone(const char *xlog)
 {
 	char		archiveStatusPath[MAXPGPATH];
 	struct stat stat_buf;
-	bool		inRecovery = RecoveryInProgress();
+	RecoveryState	inRecoveryState = GetRecoveryState();
 
 	/*
 	 * The file is always deletable if archive_mode is "off".  On standbys
@@ -580,8 +580,9 @@ XLogArchiveCheckDone(const char *xlog)
 	 * "always".  On a primary, archiving is enabled if archive_mode is "on"
 	 * or "always".
 	 */
-	if (!((XLogArchivingActive() && !inRecovery) ||
-		  (XLogArchivingAlways() && inRecovery)))
+	if ( (inRecoveryState != IN_CRASH_RECOVERY) && (
+		  (inRecoveryState == NOT_IN_RECOVERY && !XLogArchivingActive()) &&
+		  (inRecoveryState == IN_ARCHIVE_RECOVERY && !XLogArchivingAlways())))
 		return true;
 
 	/* First check for .done --- this means archiver is done with it */
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 9ec7b31cce..d8b08d6d17 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -165,6 +165,14 @@ typedef enum WalLevel
 	WAL_LEVEL_LOGICAL
 } WalLevel;
 
+/* Recovery state */
+typedef enum RecoveryState
+{
+	NOT_IN_RECOVERY = 0,
+	IN_CRASH_RECOVERY,
+	IN_ARCHIVE_RECOVERY
+} RecoveryState;
+
 extern PGDLLIMPORT int wal_level;
 
 /* Is WAL archiving enabled (always or only while server is running normally)? */
@@ -278,6 +286,7 @@ extern const char *xlog_identify(uint8 info);
 extern void issue_xlog_fsync(int fd, XLogSegNo segno);
 
 extern bool RecoveryInProgress(void);
+extern RecoveryState GetRecoveryState(void);
 extern bool HotStandbyActive(void);
 extern bool HotStandbyActiveInReplay(void);
 extern bool XLogInsertAllowed(void);
-- 
2.20.1

