WAL reader APIs and WAL segment open/close callbacks
Hi all,
I have been playing with the new APIs of xlogreader.h, and while
merging some of my stuff with 13, I found the handling around
->seg.ws_file overcomplicated and confusing as it is necessary for a
plugin to manipulate directly the fd of an opened segment in the WAL
segment open/close callbacks.
Wouldn't it be cleaner to limit the exposition of ->seg.ws_file to the
user if possible? There are cases like a WAL sender where you cannot
do that, but something that came to my mind is to make
WALSegmentOpenCB return the fd of the opened segment, and pass down the
fd to close to WALSegmentCloseCB. Then xlogreader.c is in charge of
resetting the field when a segment is closed.
Any thoughts?
--
Michael
Attachments:
xlogreader-fds-v1.patchtext/x-diff; charset=us-asciiDownload
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index c21b0ba972..fcf57b32eb 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -63,10 +63,10 @@ typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
int reqLen,
XLogRecPtr targetRecPtr,
char *readBuf);
-typedef void (*WALSegmentOpenCB) (XLogReaderState *xlogreader,
+typedef int (*WALSegmentOpenCB) (XLogReaderState *xlogreader,
XLogSegNo nextSegNo,
TimeLineID *tli_p);
-typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader);
+typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader, int fd);
typedef struct XLogReaderRoutine
{
@@ -93,10 +93,10 @@ typedef struct XLogReaderRoutine
XLogPageReadCB page_read;
/*
- * Callback to open the specified WAL segment for reading. ->seg.ws_file
- * shall be set to the file descriptor of the opened segment. In case of
- * failure, an error shall be raised by the callback and it shall not
- * return.
+ * Callback to open the specified WAL segment for reading. Needs to
+ * return a file descriptor that will be saved as ->seg.ws_file,
+ * indicating the opened WAL segment. In case of failure, an error
+ * shall be raised by the callback and it shall not return.
*
* "nextSegNo" is the number of the segment to be opened.
*
@@ -107,8 +107,8 @@ typedef struct XLogReaderRoutine
WALSegmentOpenCB segment_open;
/*
- * WAL segment close callback. ->seg.ws_file shall be set to a negative
- * number.
+ * WAL segment close callback. "fd" is the file descriptor of the
+ * segment to close.
*/
WALSegmentCloseCB segment_close;
} XLogReaderRoutine;
diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h
index e59b6cf3a9..c9cbdaffae 100644
--- a/src/include/access/xlogutils.h
+++ b/src/include/access/xlogutils.h
@@ -50,10 +50,10 @@ extern void FreeFakeRelcacheEntry(Relation fakerel);
extern int read_local_xlog_page(XLogReaderState *state,
XLogRecPtr targetPagePtr, int reqLen,
XLogRecPtr targetRecPtr, char *cur_page);
-extern void wal_segment_open(XLogReaderState *state,
+extern int wal_segment_open(XLogReaderState *state,
XLogSegNo nextSegNo,
TimeLineID *tli_p);
-extern void wal_segment_close(XLogReaderState *state);
+extern void wal_segment_close(XLogReaderState *state, int fd);
extern void XLogReadDetermineTimeline(XLogReaderState *state,
XLogRecPtr wantPage, uint32 wantLength);
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 5995798b58..b026be4cbe 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -139,7 +139,10 @@ XLogReaderFree(XLogReaderState *state)
int block_id;
if (state->seg.ws_file != -1)
- state->routine.segment_close(state);
+ {
+ state->routine.segment_close(state, state->seg.ws_file);
+ state->seg.ws_file = -1;
+ }
for (block_id = 0; block_id <= XLR_MAX_BLOCK_ID; block_id++)
{
@@ -1089,10 +1092,13 @@ WALRead(XLogReaderState *state,
XLogSegNo nextSegNo;
if (state->seg.ws_file >= 0)
- state->routine.segment_close(state);
+ {
+ state->routine.segment_close(state, state->seg.ws_file);
+ state->seg.ws_file = -1;
+ }
XLByteToSeg(recptr, nextSegNo, state->segcxt.ws_segsize);
- state->routine.segment_open(state, nextSegNo, &tli);
+ state->seg.ws_file = state->routine.segment_open(state, nextSegNo, &tli);
/* This shouldn't happen -- indicates a bug in segment_open */
Assert(state->seg.ws_file >= 0);
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index 322b0e8ff5..6d3d8f6a53 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -784,17 +784,18 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
}
/* XLogReaderRoutine->segment_open callback for local pg_wal files */
-void
+int
wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo,
TimeLineID *tli_p)
{
TimeLineID tli = *tli_p;
char path[MAXPGPATH];
+ int fd = -1;
XLogFilePath(path, tli, nextSegNo, state->segcxt.ws_segsize);
- state->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
- if (state->seg.ws_file >= 0)
- return;
+ fd = BasicOpenFile(path, O_RDONLY | PG_BINARY);
+ if (fd >= 0)
+ return fd;
if (errno == ENOENT)
ereport(ERROR,
@@ -806,15 +807,16 @@ wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
path)));
+
+ return -1; /* keep compiler quiet */
}
/* stock XLogReaderRoutine->segment_close callback */
void
-wal_segment_close(XLogReaderState *state)
+wal_segment_close(XLogReaderState *state, int fd)
{
- close(state->seg.ws_file);
+ close(fd);
/* need to check errno? */
- state->seg.ws_file = -1;
}
/*
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 86847cbb54..c01eb2d2b6 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -254,7 +254,7 @@ static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time);
static TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now);
static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch);
-static void WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo,
+static int WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo,
TimeLineID *tli_p);
static void UpdateSpillStats(LogicalDecodingContext *ctx);
@@ -316,7 +316,7 @@ WalSndErrorCleanup(void)
pgstat_report_wait_end();
if (xlogreader != NULL && xlogreader->seg.ws_file >= 0)
- wal_segment_close(xlogreader);
+ wal_segment_close(xlogreader, xlogreader->seg.ws_file);
if (MyReplicationSlot != NULL)
ReplicationSlotRelease();
@@ -2456,11 +2456,12 @@ WalSndKill(int code, Datum arg)
}
/* XLogReaderRoutine->segment_open callback */
-static void
+static int
WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo,
TimeLineID *tli_p)
{
char path[MAXPGPATH];
+ int fd = -1;
/*-------
* When reading from a historic timeline, and there is a timeline switch
@@ -2497,9 +2498,9 @@ WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo,
}
XLogFilePath(path, *tli_p, nextSegNo, state->segcxt.ws_segsize);
- state->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
- if (state->seg.ws_file >= 0)
- return;
+ fd = BasicOpenFile(path, O_RDONLY | PG_BINARY);
+ if (fd >= 0)
+ return fd;
/*
* If the file is not found, assume it's because the standby asked for a
@@ -2522,6 +2523,8 @@ WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
path)));
+
+ return -1; /* keep compiler quiet */
}
/*
@@ -2686,7 +2689,7 @@ XLogSendPhysical(void)
{
/* close the current file. */
if (xlogreader->seg.ws_file >= 0)
- wal_segment_close(xlogreader);
+ wal_segment_close(xlogreader, xlogreader->seg.ws_file);
/* Send CopyDone */
pq_putmessage_noblock('c', NULL, 0);
@@ -2791,7 +2794,7 @@ retry:
if (reload && xlogreader->seg.ws_file >= 0)
{
- wal_segment_close(xlogreader);
+ wal_segment_close(xlogreader, xlogreader->seg.ws_file);
goto retry;
}
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index d1a0678935..6410a869f3 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -280,13 +280,14 @@ identify_target_directory(char *directory, char *fname)
}
/* pg_waldump's XLogReaderRoutine->segment_open callback */
-static void
+static int
WALDumpOpenSegment(XLogReaderState *state, XLogSegNo nextSegNo,
TimeLineID *tli_p)
{
TimeLineID tli = *tli_p;
char fname[MAXPGPATH];
int tries;
+ int fd = -1;
XLogFileName(fname, tli, nextSegNo, state->segcxt.ws_segsize);
@@ -298,9 +299,9 @@ WALDumpOpenSegment(XLogReaderState *state, XLogSegNo nextSegNo,
*/
for (tries = 0; tries < 10; tries++)
{
- state->seg.ws_file = open_file_in_directory(state->segcxt.ws_dir, fname);
- if (state->seg.ws_file >= 0)
- return;
+ fd = open_file_in_directory(state->segcxt.ws_dir, fname);
+ if (fd >= 0)
+ return fd;
if (errno == ENOENT)
{
int save_errno = errno;
@@ -316,6 +317,7 @@ WALDumpOpenSegment(XLogReaderState *state, XLogSegNo nextSegNo,
}
fatal_error("could not find file \"%s\": %m", fname);
+ return -1; /* keep compiler quiet */
}
/*
@@ -323,11 +325,10 @@ WALDumpOpenSegment(XLogReaderState *state, XLogSegNo nextSegNo,
* wal_segment_close
*/
static void
-WALDumpCloseSegment(XLogReaderState *state)
+WALDumpCloseSegment(XLogReaderState *state, int fd)
{
- close(state->seg.ws_file);
+ close(fd);
/* need to check errno? */
- state->seg.ws_file = -1;
}
/* pg_waldump's XLogReaderRoutine->page_read callback */
At Mon, 25 May 2020 07:44:09 +0900, Michael Paquier <michael@paquier.xyz> wrote in
Hi all,
I have been playing with the new APIs of xlogreader.h, and while
merging some of my stuff with 13, I found the handling around
->seg.ws_file overcomplicated and confusing as it is necessary for a
plugin to manipulate directly the fd of an opened segment in the WAL
segment open/close callbacks.
That depends on where we draw responsibility border, or who is
responsible to the value of ws_file. I think that this API change was
assuming the callbacks having full-knowledge of the xlogreader struct
and are responsible to maintain related struct members, and I agree to
that direction.
Wouldn't it be cleaner to limit the exposition of ->seg.ws_file to the
user if possible? There are cases like a WAL sender where you cannot
do that, but something that came to my mind is to make
WALSegmentOpenCB return the fd of the opened segment, and pass down the
fd to close to WALSegmentCloseCB. Then xlogreader.c is in charge of
resetting the field when a segment is closed.Any thoughts?
If we are going to hide the struct from the callbacks, we shouldn't
pass to the callbacks a pointer to the complete XLogReaderState
struct.
regards.
--
Kyotaro Horiguchi
NTT Open Source Software Center
On Mon, May 25, 2020 at 11:17:06AM +0900, Kyotaro Horiguchi wrote:
That depends on where we draw responsibility border, or who is
responsible to the value of ws_file. I think that this API change was
assuming the callbacks having full-knowledge of the xlogreader struct
and are responsible to maintain related struct members, and I agree to
that direction.
Sure. Still I am skeptical that the interface of HEAD is the most
instinctive choice as designed. We assume that plugins using
xlogreader.c have to set the flag all the way down for something which
is mostly an internal state. WAL senders need to be able to use the
fd directly to close the segment in some code paths, but the only
thing where we give, and IMO, should give access to the information of
WALOpenSegment is for the error path after a failed WALRead(). And it
seems more natural to me to return the opened fd to xlogreader.c, that
is then the part in charge of assigning the fd to the correct part of
XLogReaderState. This reminds me a bit of what we did for
libpqrcv_receive() a few years ago where we manipulate directly a fd
to wait on instead of setting it directly in some internal structure.
If we are going to hide the struct from the callbacks, we shouldn't
pass to the callbacks a pointer to the complete XLogReaderState
struct.
It still seems to me that it is helpful to pass down the whole thing
to the close and open callbacks, for at least debugging purposes. I
found that helpful when debugging my tool through my rebase with
v13.
As a side note, it was actually tricky to find out that you have to
call WALRead() to force the opening of a new segment when calling
XLogFindNextRecord() in the block read callback after WAL reader
allocation. Perhaps we should call segment_open() at the beginning of
XLogFindNextRecord() if no segment is opened yet? I would bet that
not everything is aimed at using WALRead() even if that's a useful
wrapper, and as shaped the block-read callbacks are mostly useful to
give the callers the ability to adjust to a maximum record length that
can be read, which looks limited (?).
--
Michael
At Mon, 25 May 2020 14:19:34 +0900, Michael Paquier <michael@paquier.xyz> wrote in
On Mon, May 25, 2020 at 11:17:06AM +0900, Kyotaro Horiguchi wrote:
That depends on where we draw responsibility border, or who is
responsible to the value of ws_file. I think that this API change was
assuming the callbacks having full-knowledge of the xlogreader struct
and are responsible to maintain related struct members, and I agree to
that direction.Sure. Still I am skeptical that the interface of HEAD is the most
instinctive choice as designed. We assume that plugins using
xlogreader.c have to set the flag all the way down for something which
is mostly an internal state. WAL senders need to be able to use the
fd directly to close the segment in some code paths, but the only
thing where we give, and IMO, should give access to the information of
WALOpenSegment is for the error path after a failed WALRead(). And it
seems more natural to me to return the opened fd to xlogreader.c, that
is then the part in charge of assigning the fd to the correct part of
XLogReaderState. This reminds me a bit of what we did for
libpqrcv_receive() a few years ago where we manipulate directly a fd
to wait on instead of setting it directly in some internal structure.
I agree that it's generally natural that open callback returns an fd
and close takes an fd. However, for the xlogreader case, the
xlogreader itself doesn't know much about files. WALRead is the
exception as a convenient routine for reading WAL files in a generic
way, which can be thought as belonging to the caller side. The
callbacks (that is, the caller side of xlogreader) are in charge of
opening, reading and closing segment files. That is the same for the
case of XLogPageRead, which is the caller of xlogreader and is
directly manipulating ws_file and ws_tli.
Further, I think that xlogreader shouldn't know about file handling at
all. The reason that xlogreaderstate has fd and tli is that it is
needed by file-handling callbacks, which belongs to the caller of
xlogreader.
If the call structure were upside-down, that is, callers handled files
privately then call xlogreader to retrieve records from the page data,
things would be simpler in the caller's view. That is the patch I'm
proposing as a xlog reader refactoring [1]/messages/by-id/20200422.101246.331162888498679491.horikyota.ntt@gmail.com.
If we are going to hide the struct from the callbacks, we shouldn't
pass to the callbacks a pointer to the complete XLogReaderState
struct.It still seems to me that it is helpful to pass down the whole thing
to the close and open callbacks, for at least debugging purposes. I
found that helpful when debugging my tool through my rebase with
v13.
Why do you not looking into upper stack-frames?
As a side note, it was actually tricky to find out that you have to
call WALRead() to force the opening of a new segment when calling
XLogFindNextRecord() in the block read callback after WAL reader
allocation. Perhaps we should call segment_open() at the beginning of
XLogFindNextRecord() if no segment is opened yet? I would bet that
The API change was mere a refactoring and didn't change the whole
logic largely.
The segment open callback is not a part of xlogreader but only a part
of the WALRead function. As I mentioned above, file handling is the
matter of the caller side (and WALRead, which is a part of
caller-side). ReadPageInternal doesn't know about underlying files at
all.
not everything is aimed at using WALRead() even if that's a useful
wrapper, and as shaped the block-read callbacks are mostly useful to
give the callers the ability to adjust to a maximum record length that
can be read, which looks limited (?).
I'm not sure. The reason for the fact that the page-read callbacks
that don't use WALRead doesn't need open callback is it's not actually
belongs to xlogreader, I think.
[1]: /messages/by-id/20200422.101246.331162888498679491.horikyota.ntt@gmail.com
regards.
--
Kyotaro Horiguchi
NTT Open Source Software Center
On 2020-May-25, Michael Paquier wrote:
I have been playing with the new APIs of xlogreader.h, and while
merging some of my stuff with 13, I found the handling around
->seg.ws_file overcomplicated and confusing as it is necessary for a
plugin to manipulate directly the fd of an opened segment in the WAL
segment open/close callbacks.Wouldn't it be cleaner to limit the exposition of ->seg.ws_file to the
user if possible? There are cases like a WAL sender where you cannot
do that, but something that came to my mind is to make
WALSegmentOpenCB return the fd of the opened segment, and pass down the
fd to close to WALSegmentCloseCB. Then xlogreader.c is in charge of
resetting the field when a segment is closed.
The original code did things as you suggest: the open_segment callback
returned the FD, and the caller installed it in the struct. We then
changed it in commit 850196b610d2 to have the CB install the FD in the
struct directly. I didn't like this idea when I first saw it -- my
reaction was pretty much the same as yours -- but eventually I settled
on it because if we want xlogreader to be in charge of installing the
FD, then we should also make it responsible for reacting properly when a
bad FD is returned, and report errors correctly.
(In the previous coding, xlogreader didn't tolerate bad FDs; it just
blindly installed a bad FD if one was returned. Luckily, existing CBs
never returned any bad FDs so there's no bug, but it seems hazardous API
design.)
In my ideal world, the open_segment CB would just open and return a
valid FD, or return an error message if unable to; if WALRead sees that
the returned FD is not valid, it installs the error message in *errinfo
so its caller can report it. I'm not opposed to doing things that way,
but it seemed more complexity to me than what we have now.
Now maybe you wish for a middle ground: the CB returns the FD, or fails
trying. Is that what you want? I didn't like that, as it seems
unprincipled. I'd rather keep things as they're now.
--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
On Mon, May 25, 2020 at 04:30:34PM -0400, Alvaro Herrera wrote:
The original code did things as you suggest: the open_segment callback
returned the FD, and the caller installed it in the struct. We then
changed it in commit 850196b610d2 to have the CB install the FD in the
struct directly. I didn't like this idea when I first saw it -- my
reaction was pretty much the same as yours -- but eventually I settled
on it because if we want xlogreader to be in charge of installing the
FD, then we should also make it responsible for reacting properly when a
bad FD is returned, and report errors correctly.
Installing the fd in WALOpenSegment and reporting an error are not
related concepts though, no? segment_open could still report errors
and return the fd, where then xlogreader.c saves the returned fd in
ws_file.
(In the previous coding, xlogreader didn't tolerate bad FDs; it just
blindly installed a bad FD if one was returned. Luckily, existing CBs
never returned any bad FDs so there's no bug, but it seems hazardous API
design.)
I think that the assertions making sure that bad fds are not passed
down by segment_open are fine to live with.
In my ideal world, the open_segment CB would just open and return a
valid FD, or return an error message if unable to; if WALRead sees that
the returned FD is not valid, it installs the error message in *errinfo
so its caller can report it. I'm not opposed to doing things that way,
but it seemed more complexity to me than what we have now.
Hm. We require now that segment_open fails immediately if it cannot
have a correct fd, so it does not return an error message, it just
gives up. I am indeed not sure that moving around more WALReadError
is that interesting for that purpose. It could be interesting to
allow plugins to have a way to retry opening a new segment though
instead of giving up? But we don't really need that much now in
core.
Now maybe you wish for a middle ground: the CB returns the FD, or fails
trying. Is that what you want? I didn't like that, as it seems
unprincipled. I'd rather keep things as they're now.
Yeah, I think that the patch I sent previously is attempting at doing
things in a middle ground, which felt more natural to me while merging
my own stuff: do not fill directly ws_file within segment_open, and
let xlogreader.c save the returned fd, with segment_open giving up
immediately if we cannot get one. If you wish to keep things as they
are now that's fine by me :)
NB: I found some incorrect comments as per the attached:
s/open_segment/segment_open/
s/close_segment/segment_close/
--
Michael
Attachments:
xlogreader-comment.patchtext/x-diff; charset=us-asciiDownload
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index c21b0ba972..d930fe957d 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -21,7 +21,7 @@
* XLogReadRecord or XLogFindNextRecord; it can be passed in as NULL
* otherwise. The WALRead function can be used as a helper to write
* page_read callbacks, but it is not mandatory; callers that use it,
- * must supply open_segment callbacks. The close_segment callback
+ * must supply segment_open callbacks. The segment_close callback
* must always be supplied.
*
* After reading a record with XLogReadRecord(), it's decomposed into