From fd6eb4067397721901d992d9dc9bfe49a533cd85 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Fri, 29 Nov 2019 10:12:54 +1300 Subject: [PATCH] Fix ordering bug in mdunlinkfork(). When mdunlinkfork() searches the filesystem for segment files to unlink, it must send SYNC_FORGET_REQUEST requests for all segments before unlinking any of them. mdsyncfiletag() uses _mdfd_getset() to open segments, and that function can try to open segments other than the one the caller asked for. So, use stat() instead of unlink() to discover the range of existing files, and then "forget" them all before we begin unlinking. The consequence of this bug was a rare failure in the checkpointer, made more likely if you saturated the sync request queue so that the SYNC_FORGET_REQUEST messages for a given relation were more likely to be absorbed separately by the checkpointer. Back-patch to 12. Defect in commit 3eb77eba. Author: Thomas Munro Reported-by: Justin Pryzby Discussion: https://postgr.es/m/20191119115759.GI30362%40telsasoft.com --- src/backend/storage/smgr/md.c | 37 +++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 8a9eaf6430..4eac1d990f 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "access/xlog.h" #include "access/xlogutils.h" @@ -344,30 +345,46 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) { char *segpath = (char *) palloc(strlen(path) + 12); BlockNumber segno; + BlockNumber nsegments; /* - * Note that because we loop until getting ENOENT, we will correctly - * remove all inactive segments as well as active ones. + * Because mdsyncfiletag() uses _mdfd_getseg() to open segment files, + * we have to send SYNC_FORGET_REQUEST for all segments before we + * begin unlinking any of them. Since it opens segments other than + * the one you asked for, it might try to open a file that we've + * already unlinked if we don't do this first. */ for (segno = 1;; segno++) { - /* - * Forget any pending sync requests for this segment before we try - * to unlink. - */ - if (!RelFileNodeBackendIsTemp(rnode)) - register_forget_request(rnode, forkNum, segno); + struct stat statbuf; sprintf(segpath, "%s.%u", path, segno); - if (unlink(segpath) < 0) + if (stat(segpath, &statbuf) < 0) { /* ENOENT is expected after the last segment... */ if (errno != ENOENT) ereport(WARNING, (errcode_for_file_access(), - errmsg("could not remove file \"%s\": %m", segpath))); + errmsg("could not stat file \"%s\" before removing: %m", + segpath))); break; } + if (!RelFileNodeBackendIsTemp(rnode)) + register_forget_request(rnode, forkNum, segno); + } + nsegments = segno; + + /* + * Now that we've canceled requests for all segments up to nsegments, + * it is safe to remove the files. + */ + for (segno = 1; segno < nsegments; segno++) + { + sprintf(segpath, "%s.%u", path, segno); + if (unlink(segpath) < 0) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not remove file \"%s\": %m", segpath))); } pfree(segpath); } -- 2.23.0