Strange error message in xlog.c

Started by Kyotaro Horiguchiover 6 years ago1 messages
#1Kyotaro Horiguchi
horikyota.ntt@gmail.com
1 attachment(s)

Hello,

I had an trouble report that the reporter had the following error
messages.

FATAL: XX000: requested timeline 175 is not a child of this server's history
DETAIL: Latest checkpoint is at 1A/D6000028 on timeline 172, but in
the history of the requested timeline, the server forked off from that
timeline at 1C/29074DB8.

This message doesn't make sense. Perhaps timeline 172 started
after 1A/D6000028 instead.

The attached patch makes the error messages for both cases make sense.

FATAL: requested timeline 4 is not a child of this server's history
DETAIL: Latest checkpoint is at 0/3000060 on timeline 2, but in the
history of the requested timeline, the server forked off from that
timeline at 0/22000A0.

FATAL: requested timeline 4 is not a child of this server's history
DETAIL: Latest checkpoint is at 0/3000060 on timeline 2, but in the
history of the requested timeline, the server entered that timeline at
0/40000A0.

Intentional corruption of timeline-history is required to
exercise this. Do we need to do that regression test?

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

Attachments:

0001-Fix-error-message-for-timeline-history-mismatch.patchapplication/octet-stream; name=0001-Fix-error-message-for-timeline-history-mismatch.patchDownload
From 9dfe8e3b163fed9ce6fc0c3d59462cf7352590ce Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horiguchi.kyotaro@lab.ntt.co.jp>
Date: Mon, 17 Jun 2019 17:15:28 +0900
Subject: [PATCH] Fix error message for timeline history mismatch.

If the latest checkpoint is found to have been happened before the
beginning of the target timeline while archive recovery, the error
message looks nonsense. Fix the message for that case.
---
 src/backend/access/transam/timeline.c | 51 +++++++++++++++++++++++++++++++++++
 src/backend/access/transam/xlog.c     | 19 +++++++++++--
 src/include/access/timeline.h         |  2 ++
 3 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c
index cbd9b2cee1..4172ab02a2 100644
--- a/src/backend/access/transam/timeline.c
+++ b/src/backend/access/transam/timeline.c
@@ -581,3 +581,54 @@ tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
 					tli)));
 	return InvalidXLogRecPtr;	/* keep compiler quiet */
 }
+
+/*
+ * Returns the relationship between lsn and the range of LSN of tli in history.
+ *
+ * Returns 0 if lsn is in the range of tli.
+ *
+ * Returns a negative number if lsn is before the range. The beginnig LSN of
+ * the tli is stored into *switchpoint if it not NULL.
+ *
+ * Returns a positive number if lsn is after the range. The ending LSN of the
+ * tli is stored into *switchpoint if it is not NULL.
+ */
+int
+compareLSNtoTLI(XLogRecPtr lsn, TimeLineID tli, List *history,
+				XLogRecPtr *switchpoint)
+{
+	ListCell   *cell;
+
+	if (switchpoint)
+		*switchpoint = 0;
+
+	foreach(cell, history)
+	{
+		TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
+
+		if (tle->tli != tli)
+			continue;
+
+		if (lsn < tle->begin)
+		{
+			if (switchpoint)
+				*switchpoint = tle->begin;
+			return -1;
+		}
+
+		/* tle->end is exclusive */
+		if (lsn >= tle->end)
+		{
+			if (switchpoint)
+				*switchpoint = tle->end;
+			return 1;
+		}
+		return 0;
+	}
+
+	ereport(ERROR,
+			(errmsg("requested timeline %u is not in this server's history",
+					tli)));
+	return 0;	/* keep compiler quiet */
+}
+
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index e08320e829..c61c1d6587 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6554,12 +6554,27 @@ StartupXLOG(void)
 		checkPoint.ThisTimeLineID)
 	{
 		XLogRecPtr	switchpoint;
+		int			comp;
 
 		/*
-		 * tliSwitchPoint will throw an error if the checkpoint's timeline is
+		 * compareLSNtoTLI will throw an error if the checkpoint's timeline is
 		 * not in expectedTLEs at all.
 		 */
-		switchpoint = tliSwitchPoint(ControlFile->checkPointCopy.ThisTimeLineID, expectedTLEs, NULL);
+		comp = compareLSNtoTLI(checkPointLoc, checkPoint.ThisTimeLineID,
+							   expectedTLEs, &switchpoint);
+		Assert(comp != 0);
+
+
+		if (comp < 0)
+			ereport(FATAL,
+					(errmsg("requested timeline %u is not a child of this server's history",
+							recoveryTargetTLI),
+					 errdetail("Latest checkpoint is at %X/%X on timeline %u, but in the history of the requested timeline, the server entered that timeline at %X/%X.",
+						   (uint32) (ControlFile->checkPoint >> 32),
+						   (uint32) ControlFile->checkPoint,
+						   ControlFile->checkPointCopy.ThisTimeLineID,
+						   (uint32) (switchpoint >> 32),
+						   (uint32) switchpoint)));
 		ereport(FATAL,
 				(errmsg("requested timeline %u is not a child of this server's history",
 						recoveryTargetTLI),
diff --git a/src/include/access/timeline.h b/src/include/access/timeline.h
index a6dc2edb89..52fcc17851 100644
--- a/src/include/access/timeline.h
+++ b/src/include/access/timeline.h
@@ -40,5 +40,7 @@ extern bool tliInHistory(TimeLineID tli, List *expectedTLIs);
 extern TimeLineID tliOfPointInHistory(XLogRecPtr ptr, List *history);
 extern XLogRecPtr tliSwitchPoint(TimeLineID tli, List *history,
 								 TimeLineID *nextTLI);
+extern int compareLSNtoTLI(XLogRecPtr lsn, TimeLineID tli,
+						   List *history, XLogRecPtr *switchpoint);
 
 #endif							/* TIMELINE_H */
-- 
2.16.3