From 4148b0756058ad42367b9758284af1cfefbd023e Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Thu, 20 Jan 2022 19:03:03 +0000 Subject: [PATCH v2 1/1] fix up code in directory scans for logical replication --- src/backend/access/heap/rewriteheap.c | 44 ++++++++++++++++++++----- src/backend/replication/logical/reorderbuffer.c | 4 --- src/backend/replication/logical/snapbuild.c | 9 +++-- 3 files changed, 41 insertions(+), 16 deletions(-) diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index aa265edf60..f1df361e06 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -1222,31 +1222,49 @@ CheckPointLogicalRewriteHeap(void) uint32 hi, lo; - if (strcmp(mapping_de->d_name, ".") == 0 || - strcmp(mapping_de->d_name, "..") == 0) + /* Skip over files that cannot be ours. */ + if (strncmp(mapping_de->d_name, "map-", 4) != 0) continue; snprintf(path, sizeof(path), "pg_logical/mappings/%s", mapping_de->d_name); - if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode)) - continue; - - /* Skip over files that cannot be ours. */ - if (strncmp(mapping_de->d_name, "map-", 4) != 0) + if (lstat(path, &statbuf) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", path))); + else if (!S_ISREG(statbuf.st_mode)) continue; + /* + * We just log a message if a file doesn't fit the pattern, it's + * probably some editor's lock/state file or similar... + */ if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, &dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6) - elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); + { + ereport(LOG, + (errmsg("could not parse file name \"%s\"", + mapping_de->d_name))); + continue; + } lsn = ((uint64) hi) << 32 | lo; if (lsn < cutoff || cutoff == InvalidXLogRecPtr) { elog(DEBUG1, "removing logical rewrite file \"%s\"", path); + + /* + * It's not particularly harmful, though strange, if we can't + * remove the file here. Don't prevent the checkpoint from + * completing, that'd be a cure worse than the disease. + */ if (unlink(path) < 0) - ereport(ERROR, + { + ereport(LOG, (errcode_for_file_access(), errmsg("could not remove file \"%s\": %m", path))); + continue; + } } else { @@ -1267,6 +1285,11 @@ CheckPointLogicalRewriteHeap(void) * We could try to avoid fsyncing files that either haven't * changed or have only been created since the checkpoint's start, * but it's currently not deemed worth the effort. + * + * Unlike failures to unlink() old logical rewrite files, we must + * ERROR if we're unable to sync transient state down to disk. This + * allows replay to assume that everything written out before + * checkpoint start is persisted. */ pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC); if (pg_fsync(fd) != 0) @@ -1282,4 +1305,7 @@ CheckPointLogicalRewriteHeap(void) } } FreeDir(mappings_dir); + + /* persist directory entries to disk */ + fsync_fname("pg_logical/mappings", true); } diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index d317a626e1..6fb4b35d89 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -5028,10 +5028,6 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot) f_lo; RewriteMappingFile *f; - if (strcmp(mapping_de->d_name, ".") == 0 || - strcmp(mapping_de->d_name, "..") == 0) - continue; - /* Ignore files that aren't ours */ if (strncmp(mapping_de->d_name, "map-", 4) != 0) continue; diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index ab04e946bc..f9e26e96c7 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -1954,8 +1954,11 @@ CheckPointSnapBuild(void) continue; snprintf(path, sizeof(path), "pg_logical/snapshots/%s", snap_de->d_name); - - if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode)) + if (lstat(path, &statbuf) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", path))); + else if (!S_ISREG(statbuf.st_mode)) { elog(DEBUG1, "only regular files expected: %s", path); continue; @@ -1968,7 +1971,7 @@ CheckPointSnapBuild(void) * currently being written that are older than cutoff. * * We just log a message if a file doesn't fit the pattern, it's - * probably some editors lock/state file or similar... + * probably some editor's lock/state file or similar... */ if (sscanf(snap_de->d_name, "%X-%X.snap", &hi, &lo) != 2) { -- 2.16.6