From 276f59c8b37a31cb831b7753d2b107eb1d83c1fb Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Date: Fri, 28 Feb 2020 15:52:58 +0900
Subject: [PATCH v5] Make End-Of-Recovery error less scary

When recovery in any type ends, we see a bit scary error message like
"invalid record length" that suggests something serious is
happening. Actually if recovery meets a record with length = 0, that
usually means it finished applying all available WAL records.

Make this message less scary as "reached end of WAL". Instead raise
the error level for other kind of WAL failure to WARNING.
---
 src/backend/access/transam/xlog.c       | 94 +++++++++++++++++++------
 src/backend/access/transam/xlogreader.c | 14 ++++
 src/backend/replication/walreceiver.c   |  3 +-
 src/include/access/xlogreader.h         |  1 +
 4 files changed, 87 insertions(+), 25 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 5cda30836f..623fb01d0a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -4477,6 +4477,7 @@ ReadRecord(XLogReaderState *xlogreader, int emode,
 	for (;;)
 	{
 		char	   *errormsg;
+		XLogRecPtr	ErrRecPtr = InvalidXLogRecPtr;
 
 		record = XLogReadRecord(xlogreader, &errormsg);
 		ReadRecPtr = xlogreader->ReadRecPtr;
@@ -4494,6 +4495,16 @@ ReadRecord(XLogReaderState *xlogreader, int emode,
 			{
 				abortedRecPtr = xlogreader->abortedRecPtr;
 				missingContrecPtr = xlogreader->missingContrecPtr;
+				ErrRecPtr = abortedRecPtr;
+			}
+			else
+			{
+				/*
+				 * NULL ReadRecPtr means we could not read a record at
+				 * beginning. In that case EndRecPtr is storing the LSN of the
+				 * record we tried to read.
+				 */
+				ErrRecPtr = ReadRecPtr ? ReadRecPtr : EndRecPtr;
 			}
 
 			if (readFile >= 0)
@@ -4503,13 +4514,12 @@ ReadRecord(XLogReaderState *xlogreader, int emode,
 			}
 
 			/*
-			 * We only end up here without a message when XLogPageRead()
-			 * failed - in that case we already logged something. In
-			 * StandbyMode that only happens if we have been triggered, so we
-			 * shouldn't loop anymore in that case.
+			 * If we get here for other than end-of-wal, emit the error message
+			 * right now. Otherwise the message if any is shown as a part of
+			 * the end-of-WAL message below.
 			 */
-			if (errormsg)
-				ereport(emode_for_corrupt_record(emode, EndRecPtr),
+			if (!xlogreader->EndOfWAL && errormsg)
+				ereport(emode_for_corrupt_record(emode, ErrRecPtr),
 						(errmsg_internal("%s", errormsg) /* already translated */ ));
 		}
 
@@ -4540,11 +4550,12 @@ ReadRecord(XLogReaderState *xlogreader, int emode,
 			/* Great, got a record */
 			return record;
 		}
-		else
+
+		/* No valid record available from this source */
+		lastSourceFailed = true;
+
+		if (!fetching_ckpt)
 		{
-			/* No valid record available from this source */
-			lastSourceFailed = true;
-
 			/*
 			 * If archive recovery was requested, but we were still doing
 			 * crash recovery, switch to archive recovery and retry using the
@@ -4557,11 +4568,17 @@ ReadRecord(XLogReaderState *xlogreader, int emode,
 			 * we'd have no idea how far we'd have to replay to reach
 			 * consistency.  So err on the safe side and give up.
 			 */
-			if (!InArchiveRecovery && ArchiveRecoveryRequested &&
-				!fetching_ckpt)
+			if (!InArchiveRecovery && ArchiveRecoveryRequested)
 			{
+				/*
+				 * We don't report this as LOG, since we don't stop recovery
+				 * here
+				 */
 				ereport(DEBUG1,
-						(errmsg_internal("reached end of WAL in pg_wal, entering archive recovery")));
+						(errmsg_internal("reached end of WAL at %X/%X on timeline %u in %s during crash recovery, entering archive recovery",
+										 LSN_FORMAT_ARGS(ErrRecPtr),
+										 replayTLI,
+										 xlogSourceNames[currentSource])));
 				InArchiveRecovery = true;
 				if (StandbyModeRequested)
 					StandbyMode = true;
@@ -4609,12 +4626,33 @@ ReadRecord(XLogReaderState *xlogreader, int emode,
 				continue;
 			}
 
-			/* In standby mode, loop back to retry. Otherwise, give up. */
-			if (StandbyMode && !CheckForStandbyTrigger())
-				continue;
-			else
-				return NULL;
+			/*
+			 *  If we haven't emit an error message, we have safely reached the
+			 *  end-of-WAL.
+			 */
+			if (emode_for_corrupt_record(LOG, ErrRecPtr) == LOG)
+			{
+				char *fmt;
+
+				if (StandbyMode)
+					fmt = gettext_noop("reached end of WAL at %X/%X on timeline %u in %s during standby mode");
+				else if (InArchiveRecovery)
+					fmt = gettext_noop("reached end of WAL at %X/%X on timeline %u in %s during archive recovery");
+				else
+					fmt = gettext_noop("reached end of WAL at %X/%X on timeline %u in %s during crash recovery");
+
+				ereport(LOG,
+						(errmsg(fmt, LSN_FORMAT_ARGS(ErrRecPtr), replayTLI,
+								xlogSourceNames[currentSource]),
+						 (errormsg ? errdetail_internal("%s", errormsg) : 0)));
+			}
 		}
+
+		/* In standby mode, loop back to retry. Otherwise, give up. */
+		if (StandbyMode && !CheckForStandbyTrigger())
+			continue;
+		else
+			return NULL;
 	}
 }
 
@@ -7544,7 +7582,7 @@ StartupXLOG(void)
 		else
 		{
 			/* just have to read next record after CheckPoint */
-			record = ReadRecord(xlogreader, LOG, false, ThisTimeLineID);
+			record = ReadRecord(xlogreader, WARNING, false, ThisTimeLineID);
 		}
 
 		if (record != NULL)
@@ -7781,7 +7819,7 @@ StartupXLOG(void)
 				}
 
 				/* Else, try to fetch the next WAL record */
-				record = ReadRecord(xlogreader, LOG, false, ThisTimeLineID);
+				record = ReadRecord(xlogreader, WARNING, false, ThisTimeLineID);
 			} while (record != NULL);
 
 			/*
@@ -7841,13 +7879,20 @@ StartupXLOG(void)
 
 			InRedo = false;
 		}
-		else
+		else if (xlogreader->EndOfWAL)
 		{
 			/* there are no WAL records following the checkpoint */
 			ereport(LOG,
 					(errmsg("redo is not required")));
 
 		}
+		else
+		{
+			/* broken record found */
+			ereport(WARNING,
+					(errmsg("redo is skipped"),
+					 errhint("This suggests WAL file corruption. You might need to check the database.")));
+		}
 
 		/*
 		 * This check is intentionally after the above log messages that
@@ -13135,7 +13180,9 @@ XLogShutdownWalRcv(void)
  * reading from pg_wal, because we don't expect any invalid records in archive
  * or in records streamed from the primary. Files in the archive should be complete,
  * and we should never hit the end of WAL because we stop and wait for more WAL
- * to arrive before replaying it.
+ * to arrive before replaying it.  When we failed to read a new page,
+ * readSource is reset to XLOG_FROM_ANY. This indicates all sources including
+ * pg_wal was failed. Thus treat that the same way with XLOG_FROM_PG_WAL.
  *
  * NOTE: This function remembers the RecPtr value it was last called with,
  * to suppress repeated messages about the same record. Only call this when
@@ -13147,7 +13194,8 @@ emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
 {
 	static XLogRecPtr lastComplaint = 0;
 
-	if (readSource == XLOG_FROM_PG_WAL && emode == LOG)
+	if ((readSource == XLOG_FROM_PG_WAL || readSource == XLOG_FROM_ANY)
+		&& emode <= WARNING)
 	{
 		if (RecPtr == lastComplaint)
 			emode = DEBUG1;
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index f39f8044a9..df2198e862 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -121,6 +121,7 @@ XLogReaderAllocate(int wal_segment_size, const char *waldir,
 		pfree(state);
 		return NULL;
 	}
+	state->EndOfWAL = false;
 	state->errormsg_buf[0] = '\0';
 
 	/*
@@ -292,6 +293,7 @@ XLogReadRecord(XLogReaderState *state, char **errormsg)
 	/* reset error state */
 	*errormsg = NULL;
 	state->errormsg_buf[0] = '\0';
+	state->EndOfWAL = false;
 
 	ResetDecoder(state);
 	state->abortedRecPtr = InvalidXLogRecPtr;
@@ -588,6 +590,9 @@ err:
 		 */
 		state->abortedRecPtr = RecPtr;
 		state->missingContrecPtr = targetPagePtr;
+		report_invalid_record(state,
+							  "missing contrecord at %X/%X",
+							  LSN_FORMAT_ARGS(RecPtr));
 	}
 
 	/*
@@ -730,6 +735,15 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
 					  XLogRecPtr PrevRecPtr, XLogRecord *record,
 					  bool randAccess)
 {
+	if (record->xl_tot_len == 0)
+	{
+		/* This is strictly not an invalid state, so phrase it as so. */
+		report_invalid_record(state,
+							  "record length is 0 at %X/%X",
+							  LSN_FORMAT_ARGS(RecPtr));
+		state->EndOfWAL = true;
+		return false;
+	}
 	if (record->xl_tot_len < SizeOfXLogRecord)
 	{
 		report_invalid_record(state,
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 7a7eb3784e..ba3c4bd550 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -471,8 +471,7 @@ WalReceiverMain(void)
 						else if (len < 0)
 						{
 							ereport(LOG,
-									(errmsg("replication terminated by primary server"),
-									 errdetail("End of WAL reached on timeline %u at %X/%X.",
+									(errmsg("replication terminated by primary server on timeline %u at %X/%X.",
 											   startpointTLI,
 											   LSN_FORMAT_ARGS(LogstreamResult.Write))));
 							endofwal = true;
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index de6fd791fe..1241b85838 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -174,6 +174,7 @@ struct XLogReaderState
 	 */
 	XLogRecPtr	ReadRecPtr;		/* start of last record read */
 	XLogRecPtr	EndRecPtr;		/* end+1 of last record read */
+	bool		EndOfWAL;		/* the last attempt was EOW? */
 
 	/*
 	 * Set at the end of recovery: the start point of a partial record at the
-- 
2.27.0

