merge file_exists_in_directory and _fileExistsInDirectory functions and move into common file dumputils.c
Hi,
We have file_exists_in_directory function in pg_restore.c and same
code we are using in _fileExistsInDirectory function in pg_backup_archiver.c
also.
Here, I am attaching a patch to move these duplicate functions into
dumputils.c file
--
Thanks and Regards
Mahendra Singh Thalor
EnterpriseDB: http://www.enterprisedb.com
Attachments:
v01-move-duplicate-code-of-file_exists_in_directory-and-.patchapplication/octet-stream; name=v01-move-duplicate-code-of-file_exists_in_directory-and-.patchDownload
From 4348270bd811048581d901012930befdf888cbb2 Mon Sep 17 00:00:00 2001
From: Mahendra Singh Thalor <mahi6run@gmail.com>
Date: Tue, 8 Apr 2025 15:57:15 +0530
Subject: [PATCH] move duplicate code of file_exists_in_directory and
_fileExistsInDirectory in dumputils.c file
we have file_exists_in_directory function in pg_restore.c and same
code we are using in _fileExistsInDirectory function in pg_backup_archiver.c
also.
Move these common function into dumputils.c file.
---
src/bin/pg_dump/dumputils.c | 17 +++++++++++++++++
src/bin/pg_dump/dumputils.h | 1 +
src/bin/pg_dump/pg_backup_archiver.c | 20 ++++----------------
src/bin/pg_dump/pg_restore.c | 19 +------------------
4 files changed, 23 insertions(+), 34 deletions(-)
diff --git a/src/bin/pg_dump/dumputils.c b/src/bin/pg_dump/dumputils.c
index 1d8ffe363e7..19a02c9ff59 100644
--- a/src/bin/pg_dump/dumputils.c
+++ b/src/bin/pg_dump/dumputils.c
@@ -920,3 +920,20 @@ create_or_open_dir(const char *dirname)
pg_fatal("directory \"%s\" is not empty", dirname);
}
}
+
+/*
+ * file_exists_in_directory
+ *
+ * Returns true if the file exists in the given directory.
+ */
+bool
+file_exists_in_directory(const char *dir, const char *filename)
+{
+ struct stat st;
+ char buf[MAXPGPATH];
+
+ if (snprintf(buf, MAXPGPATH, "%s/%s", dir, filename) >= MAXPGPATH)
+ pg_fatal("directory name too long: \"%s\"", dir);
+
+ return (stat(buf, &st) == 0 && S_ISREG(st.st_mode));
+}
diff --git a/src/bin/pg_dump/dumputils.h b/src/bin/pg_dump/dumputils.h
index 91c6e612e28..7da049403fd 100644
--- a/src/bin/pg_dump/dumputils.h
+++ b/src/bin/pg_dump/dumputils.h
@@ -63,5 +63,6 @@ extern void makeAlterConfigCommand(PGconn *conn, const char *configitem,
const char *type2, const char *name2,
PQExpBuffer buf);
extern void create_or_open_dir(const char *dirname);
+extern bool file_exists_in_directory(const char *dir, const char *filename);
#endif /* DUMPUTILS_H */
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index f961162f365..51ff060448a 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2212,18 +2212,6 @@ ReadStr(ArchiveHandle *AH)
return buf;
}
-static bool
-_fileExistsInDirectory(const char *dir, const char *filename)
-{
- struct stat st;
- char buf[MAXPGPATH];
-
- if (snprintf(buf, MAXPGPATH, "%s/%s", dir, filename) >= MAXPGPATH)
- pg_fatal("directory name too long: \"%s\"", dir);
-
- return (stat(buf, &st) == 0 && S_ISREG(st.st_mode));
-}
-
static int
_discoverArchiveFormat(ArchiveHandle *AH)
{
@@ -2255,18 +2243,18 @@ _discoverArchiveFormat(ArchiveHandle *AH)
if (stat(AH->fSpec, &st) == 0 && S_ISDIR(st.st_mode))
{
AH->format = archDirectory;
- if (_fileExistsInDirectory(AH->fSpec, "toc.dat"))
+ if (file_exists_in_directory(AH->fSpec, "toc.dat"))
return AH->format;
#ifdef HAVE_LIBZ
- if (_fileExistsInDirectory(AH->fSpec, "toc.dat.gz"))
+ if (file_exists_in_directory(AH->fSpec, "toc.dat.gz"))
return AH->format;
#endif
#ifdef USE_LZ4
- if (_fileExistsInDirectory(AH->fSpec, "toc.dat.lz4"))
+ if (file_exists_in_directory(AH->fSpec, "toc.dat.lz4"))
return AH->format;
#endif
#ifdef USE_ZSTD
- if (_fileExistsInDirectory(AH->fSpec, "toc.dat.zst"))
+ if (file_exists_in_directory(AH->fSpec, "toc.dat.zst"))
return AH->format;
#endif
pg_fatal("directory \"%s\" does not appear to be a valid archive (\"toc.dat\" does not exist)",
diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c
index 06c28ab3149..24e18d38fdb 100644
--- a/src/bin/pg_dump/pg_restore.c
+++ b/src/bin/pg_dump/pg_restore.c
@@ -48,6 +48,7 @@
#include "common/string.h"
#include "connectdb.h"
+#include "dumputils.h"
#include "fe_utils/option_utils.h"
#include "fe_utils/string_utils.h"
#include "filter.h"
@@ -57,7 +58,6 @@
static void usage(const char *progname);
static void read_restore_filters(const char *filename, RestoreOptions *opts);
-static bool file_exists_in_directory(const char *dir, const char *filename);
static int restore_one_database(const char *inputFileSpec, RestoreOptions *opts,
int numWorkers, bool append_data, int num);
static int read_one_statement(StringInfo inBuf, FILE *pfile);
@@ -823,23 +823,6 @@ read_restore_filters(const char *filename, RestoreOptions *opts)
filter_free(&fstate);
}
-/*
- * file_exists_in_directory
- *
- * Returns true if the file exists in the given directory.
- */
-static bool
-file_exists_in_directory(const char *dir, const char *filename)
-{
- struct stat st;
- char buf[MAXPGPATH];
-
- if (snprintf(buf, MAXPGPATH, "%s/%s", dir, filename) >= MAXPGPATH)
- pg_fatal("directory name too long: \"%s\"", dir);
-
- return (stat(buf, &st) == 0 && S_ISREG(st.st_mode));
-}
-
/*
* read_one_statement
*
--
2.39.3
On Thu, Apr 10, 2025 at 10:41:33PM +0530, Mahendra Singh Thalor wrote:
We have file_exists_in_directory function in pg_restore.c and same
code we are using in _fileExistsInDirectory function in pg_backup_archiver.c
also.
Here, I am attaching a patch to move these duplicate functions into
dumputils.c file
Indeed. I don't quite see a reason not to remove this duplication,
and both routines in pg_restore.c and the pg_dump code are the same.
dumputils.h is only used by pg_dump and pg_dumpall, and its top
comment documents exactly that, so using this location for a routine
that would be used by a pg_restore path is a bit strange to me for
something that is qualified as a "dump" routine in your patch.
Perhaps we should just use a more centralized place, like file_utils.c
so as all frontends could benefit of it?
Please make sure to add it to the next commit fest that will begin in
July, this refactoring proposal is too late to be considered for v18.
--
Michael
On 2025-Apr-11, Michael Paquier wrote:
Perhaps we should just use a more centralized place, like file_utils.c
so as all frontends could benefit of it?
I'm not sure about that. This code looks to be making too many
assumptions that aren't acceptable for a general routine, such as
complaining only that the directory name is long without the possibility
that the culprit is the file name. It's more or less okay in current
uses because they're all using harcoded short names, but that would not
hold in general. At the same time, isn't every call of this routine a
potential TOCTTOU bug? Again it's probably fine for the current code,
but I wouldn't be too sure about making this generally available as-is.
--
Álvaro Herrera 48°01'N 7°57'E — https://www.EnterpriseDB.com/
"Oh, great altar of passive entertainment, bestow upon me thy discordant images
at such speed as to render linear thought impossible" (Calvin a la TV)
On Fri, 11 Apr 2025 at 10:21, Michael Paquier <michael@paquier.xyz> wrote:
On Thu, Apr 10, 2025 at 10:41:33PM +0530, Mahendra Singh Thalor wrote:
We have file_exists_in_directory function in pg_restore.c and same
code we are using in _fileExistsInDirectory function in
pg_backup_archiver.c
also.
Here, I am attaching a patch to move these duplicate functions into
dumputils.c fileIndeed. I don't quite see a reason not to remove this duplication,
and both routines in pg_restore.c and the pg_dump code are the same.
Thanks Michael for the feedback.
dumputils.h is only used by pg_dump and pg_dumpall, and its top
comment documents exactly that, so using this location for a routine
that would be used by a pg_restore path is a bit strange to me for
something that is qualified as a "dump" routine in your patch.Perhaps we should just use a more centralized place, like file_utils.c
so as all frontends could benefit of it?
I tried to add it into file_utils.c but I was getting many "symbols not
found errors" so I moved this common function into dumputils.h as we have
another common function in that file. (Ex; create_or_open_dir)
If we want to move this function into file_utils.c, then I can try to
rewrite the patch.
Please make sure to add it to the next commit fest that will begin in
July, this refactoring proposal is too late to be considered for v18.
--
Michael
Okay. Thank you. I will add it.
On Fri, 11 Apr 2025 at 15:08, Álvaro Herrera <alvherre@kurilemu.de> wrote:
On 2025-Apr-11, Michael Paquier wrote:
Perhaps we should just use a more centralized place, like file_utils.c
so as all frontends could benefit of it?I'm not sure about that. This code looks to be making too many
assumptions that aren't acceptable for a general routine, such as
complaining only that the directory name is long without the possibility
that the culprit is the file name. It's more or less okay in current
uses because they're all using harcoded short names, but that would not
hold in general. At the same time, isn't every call of this routine a
potential TOCTTOU bug? Again it's probably fine for the current code,
but I wouldn't be too sure about making this generally available as-is.--
Álvaro Herrera 48°01'N 7°57'E —
"Oh, great altar of passive entertainment, bestow upon me thy discordant
images
at such speed as to render linear thought impossible" (Calvin a la TV)
Thanks Álvaro for the feedback.
/*
* file_exists_in_directory
*
* Returns true if the file exists in the given directory.
*/
static bool
file_exists_in_directory(const char *dir, const char *filename)
{
struct stat st;
char buf[MAXPGPATH];if (strlen(dir) >= MAXPGPATH)
pg_fatal("directory name too long: \"%s\"", dir);if (strlen(filename) >= MAXPGPATH)
pg_fatal("file name too long: \"%s\"", filename);/* Now check path length of dir/filename */
if (snprintf(buf, MAXPGPATH, "%s/%s", dir, filename) >= MAXPGPATH)
pg_fatal("combined name of directory:\"%s\" and file:\"%s\" is too
long", filename, dir);return (stat(buf, &st) == 0 && S_ISREG(st.st_mode));
}
I did changes as per above code and added some checks in code to give an
error for "too long" name. I started a new thread[1]/messages/by-id/CAKYtNApPmWmU9rdf__D=cA7ivL6H_UrPc=w0CHW74P2acxJJ-g@mail.gmail.com for "too long names"
check and later I will post an updated patch here to move
duplicate functions into one common file.
[1]: /messages/by-id/CAKYtNApPmWmU9rdf__D=cA7ivL6H_UrPc=w0CHW74P2acxJJ-g@mail.gmail.com
/messages/by-id/CAKYtNApPmWmU9rdf__D=cA7ivL6H_UrPc=w0CHW74P2acxJJ-g@mail.gmail.com
--
Thanks and Regards
Mahendra Singh Thalor
EnterpriseDB: http://www.enterprisedb.com