Propagate XLogFindNextRecord error to callers
Started by Anthonin Bonnefoyabout 1 month ago1 messages
Hi,
Currently, XLogFindNextRecord errormsg is ignored and callers will only
output a generic 'could not find a valid record' message without details.
Additionally, invalid page header won't go through XLogReadRecord, leaving
the error in state->errormsg_buf.
This patch propagates XLogFindNextRecord's error message to the callers and
displays it. In case of an invalid page header, the errormsg is filled with
errormsg_buf content.
With this patch, pg_waldump will now have the following output when reading
a file with an invalid header:
pg_waldump: error: could not find a valid record after D80/5C000000:
invalid magic number D116 in WAL segment 0000001400000D8000000017, LSN
D80/5C000000, offset 0
Regards,
Anthonin Bonnefoy
Attachments:
v1-0001-Propage-errormsg-to-XLogFindNextRecord-caller.patchapplication/octet-stream; name=v1-0001-Propage-errormsg-to-XLogFindNextRecord-caller.patchDownload
From f807ad4e9aaf5e68601f65482cfcd33eeaea5e0c Mon Sep 17 00:00:00 2001
From: Anthonin Bonnefoy <anthonin.bonnefoy@datadoghq.com>
Date: Fri, 12 Dec 2025 09:56:00 +0100
Subject: Propage errormsg to XLogFindNextRecord caller
Currently, XLogFindNextRecord errormsg is ignored and callers will only
output a generic 'could not find a valid record' message without
details.
Additionally, invalid page header won't go through XLogReadRecord,
leaving the error in state->errormsg_buf.
This patch propagates XLogFindNextRecord's error message to the callers
and displays it. In case of an invalid page header, the errormsg is
filled with errormsg_buf content.
---
contrib/pg_walinspect/pg_walinspect.c | 16 ++++++++++++----
src/backend/access/transam/xlogreader.c | 16 +++++++++++++---
src/backend/postmaster/walsummarizer.c | 17 ++++++++++++-----
src/bin/pg_waldump/pg_waldump.c | 12 +++++++++---
src/include/access/xlogreader.h | 2 +-
5 files changed, 47 insertions(+), 16 deletions(-)
diff --git a/contrib/pg_walinspect/pg_walinspect.c b/contrib/pg_walinspect/pg_walinspect.c
index 6945bac1306..da997e4fd86 100644
--- a/contrib/pg_walinspect/pg_walinspect.c
+++ b/contrib/pg_walinspect/pg_walinspect.c
@@ -97,6 +97,7 @@ InitXLogReaderState(XLogRecPtr lsn)
XLogReaderState *xlogreader;
ReadLocalXLogPageNoWaitPrivate *private_data;
XLogRecPtr first_valid_record;
+ char *errormsg;
/*
* Reading WAL below the first page of the first segments isn't allowed.
@@ -124,12 +125,19 @@ InitXLogReaderState(XLogRecPtr lsn)
errdetail("Failed while allocating a WAL reading processor.")));
/* first find a valid recptr to start from */
- first_valid_record = XLogFindNextRecord(xlogreader, lsn);
+ first_valid_record = XLogFindNextRecord(xlogreader, lsn, &errormsg);
if (!XLogRecPtrIsValid(first_valid_record))
- ereport(ERROR,
- errmsg("could not find a valid record after %X/%08X",
- LSN_FORMAT_ARGS(lsn)));
+ {
+ if (errormsg)
+ ereport(ERROR,
+ errmsg("could not find a valid record after %X/%08X: %s",
+ LSN_FORMAT_ARGS(lsn), errormsg));
+ else
+ ereport(ERROR,
+ errmsg("could not find a valid record after %X/%08X",
+ LSN_FORMAT_ARGS(lsn)));
+ }
return xlogreader;
}
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 5e5001b2101..269c0ff7f47 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -1391,12 +1391,11 @@ XLogReaderResetError(XLogReaderState *state)
* XLogReadRecord() will read the next valid record.
*/
XLogRecPtr
-XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
+XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
{
XLogRecPtr tmpRecPtr;
XLogRecPtr found = InvalidXLogRecPtr;
XLogPageHeader header;
- char *errormsg;
Assert(XLogRecPtrIsValid(RecPtr));
@@ -1481,7 +1480,7 @@ XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
* or we just jumped over the remaining data of a continuation.
*/
XLogBeginRead(state, tmpRecPtr);
- while (XLogReadRecord(state, &errormsg) != NULL)
+ while (XLogReadRecord(state, errormsg) != NULL)
{
/* past the record we've found, break out */
if (RecPtr <= state->ReadRecPtr)
@@ -1496,6 +1495,17 @@ XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
err:
XLogReaderInvalReadState(state);
+ /*
+ * we may have reported errors due to invalid WAL header, propagate the
+ * error message to the caller.
+ */
+ if (state->errormsg_deferred)
+ {
+ if (state->errormsg_buf[0] != '\0')
+ *errormsg = state->errormsg_buf;
+ state->errormsg_deferred = false;
+ }
+
return InvalidXLogRecPtr;
}
diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c
index e7e4d652f97..3e7114b6942 100644
--- a/src/backend/postmaster/walsummarizer.c
+++ b/src/backend/postmaster/walsummarizer.c
@@ -918,6 +918,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
WalSummaryIO io;
BlockRefTable *brtab = CreateEmptyBlockRefTable();
bool fast_forward = true;
+ char *errormsg;
/* Initialize private data for xlogreader. */
private_data = palloc0_object(SummarizerReadLocalXLogPrivate);
@@ -969,7 +970,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
}
else
{
- summary_start_lsn = XLogFindNextRecord(xlogreader, start_lsn);
+ summary_start_lsn = XLogFindNextRecord(xlogreader, start_lsn, &errormsg);
if (!XLogRecPtrIsValid(summary_start_lsn))
{
/*
@@ -998,9 +999,16 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
switch_lsn = xlogreader->EndRecPtr;
}
else
- ereport(ERROR,
- errmsg("could not find a valid record after %X/%08X",
- LSN_FORMAT_ARGS(start_lsn)));
+ {
+ if (errormsg)
+ ereport(ERROR,
+ errmsg("could not find a valid record after %X/%08X: %s",
+ LSN_FORMAT_ARGS(start_lsn), errormsg));
+ else
+ ereport(ERROR,
+ errmsg("could not find a valid record after %X/%08X",
+ LSN_FORMAT_ARGS(start_lsn)));
+ }
}
/* We shouldn't go backward. */
@@ -1013,7 +1021,6 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
while (1)
{
int block_id;
- char *errormsg;
XLogRecord *record;
uint8 rmid;
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index 1c1ccf59f65..d73f44e6394 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -1211,11 +1211,17 @@ main(int argc, char **argv)
pg_fatal("out of memory while allocating a WAL reading processor");
/* first find a valid recptr to start from */
- first_record = XLogFindNextRecord(xlogreader_state, private.startptr);
+ first_record = XLogFindNextRecord(xlogreader_state, private.startptr, &errormsg);
if (!XLogRecPtrIsValid(first_record))
- pg_fatal("could not find a valid record after %X/%08X",
- LSN_FORMAT_ARGS(private.startptr));
+ {
+ if (errormsg)
+ pg_fatal("could not find a valid record after %X/%X: %s",
+ LSN_FORMAT_ARGS(private.startptr), errormsg);
+ else
+ pg_fatal("could not find a valid record after %X/%X",
+ LSN_FORMAT_ARGS(private.startptr));
+ }
/*
* Display a message that we're skipping data if `from` wasn't a pointer
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index dfabbbd57d4..4f68fe0e4ea 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -342,7 +342,7 @@ extern void XLogReaderSetDecodeBuffer(XLogReaderState *state,
/* Position the XLogReader to given record */
extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
-extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
+extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg);
/* Return values from XLogPageReadCB. */
typedef enum XLogPageReadResult
--
2.51.0