From f2d79b22c8613bc5b124319703216f24348769e5 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sat, 30 Nov 2019 10:18:16 +1300 Subject: [PATCH] Don't use _mdfd_getseg() in mdsyncfiletag(). _mdfd_getseg() opens all segments up to the requested one. That causes problems for mdsyncfiletag(), if mdunlinkfork() has already unlinked other segment files. Open the file we want directly by name instead. The consequence of this bug was a rare panic in the checkpointer, made more likely if you saturated the sync request queue so that the SYNC_FORGET_REQUEST messages for a given relation were more likely to be absorbed in separate cycles by the checkpointer. Back-patch to 12. Defect in commit 3eb77eba. Author: Thomas Munro Reported-by: Justin Pryzby Discussion: https://postgr.es/m/20191119115759.GI30362%40telsasoft.com --- src/backend/storage/smgr/md.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 8a9eaf6430..0f407c3fa3 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -1280,25 +1280,37 @@ int mdsyncfiletag(const FileTag *ftag, char *path) { SMgrRelation reln = smgropen(ftag->rnode, InvalidBackendId); - MdfdVec *v; char *p; + int fd, + result, + save_errno; /* Provide the path for informational messages. */ p = _mdfd_segpath(reln, ftag->forknum, ftag->segno); strlcpy(path, p, MAXPGPATH); pfree(p); - /* Try to open the requested segment. */ - v = _mdfd_getseg(reln, - ftag->forknum, - ftag->segno * (BlockNumber) RELSEG_SIZE, - false, - EXTENSION_RETURN_NULL | EXTENSION_DONT_CHECK_SIZE); - if (v == NULL) + /* + * Try to open the requested segment. Use a raw file descriptor instead + * of _mdfd_getseg() and FileSync(), because _mdfd_getseg() might open + * segments other than the one we're interested in. + */ + fd = OpenTransientFile(path, O_RDWR); + if (fd < 0) return -1; - /* Try to fsync the file. */ - return FileSync(v->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC); + /* Sync the file, being careful not to clobber fsync()'s errno. */ + pgstat_report_wait_start(WAIT_EVENT_DATA_FILE_SYNC); + result = pg_fsync(fd); + save_errno = errno; + pgstat_report_wait_end(); + if (CloseTransientFile(fd) != 0) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", path))); + errno = save_errno; + + return result; } /* -- 2.23.0