should frontend tools use syncfs() ?

Started by Justin Pryzbyover 4 years ago50 messages
#1Justin Pryzby
pryzby@telsasoft.com
1 attachment(s)

Forking this thread in which Thomas implemented syncfs for the startup process
(61752afb2).
/messages/by-id/CA+hUKG+SG9jSW3ekwib0cSdC0yD-jReJ21X4bZAmqxoWTLTc2A@mail.gmail.com

Is there any reason that initdb/pg_basebackup/pg_checksums/pg_rewind shouldn't
use syncfs() ?

do_syncfs() is in src/backend/ so would need to be duplicated^Wimplemented in
common.

They can't use the GUC, so need to add an cmdline option or look at an
environment variable.

Attachments:

0001-Allow-use-of-syncfs-in-frontend-tools.patchtext/x-diff; charset=us-asciiDownload
From 747ec6011d438a655ee26105a315119b75036c10 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Wed, 29 Sep 2021 14:22:32 -0500
Subject: [PATCH] Allow use of syncfs() in frontend tools

Like initdb/basebackup/checksums/rewind.

See also: 61752afb26404dfc99a535c7a53f7f04dc110263
---
 src/backend/utils/misc/guc.c          |  1 +
 src/bin/initdb/initdb.c               | 10 +++-
 src/bin/pg_basebackup/pg_basebackup.c |  2 +-
 src/bin/pg_checksums/pg_checksums.c   |  2 +-
 src/bin/pg_rewind/file_ops.c          |  2 +-
 src/common/file_utils.c               | 72 ++++++++++++++++++++++++++-
 src/include/common/file_utils.h       |  2 +-
 7 files changed, 83 insertions(+), 8 deletions(-)

diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index d2ce4a8450..e37f6941f4 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -50,6 +50,7 @@
 #include "commands/user.h"
 #include "commands/vacuum.h"
 #include "commands/variable.h"
+#include "common/file_utils.h"
 #include "common/string.h"
 #include "funcapi.h"
 #include "jit/jit.h"
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 1ed4808d53..79048f3949 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -142,6 +142,7 @@ static bool noclean = false;
 static bool noinstructions = false;
 static bool do_sync = true;
 static bool sync_only = false;
+static bool use_syncfs = false;
 static bool show_setting = false;
 static bool data_checksums = false;
 static char *xlog_dir = NULL;
@@ -2200,6 +2201,7 @@ usage(const char *progname)
 	printf(_("      --no-instructions     do not print instructions for next steps\n"));
 	printf(_("  -s, --show                show internal settings\n"));
 	printf(_("  -S, --sync-only           only sync database files to disk, then exit\n"));
+	printf(_("    , --syncfs              use syncfs() rather than recursive fsync()\n"));
 	printf(_("\nOther options:\n"));
 	printf(_("  -V, --version             output version information, then exit\n"));
 	printf(_("  -?, --help                show this help, then exit\n"));
@@ -2869,6 +2871,7 @@ main(int argc, char *argv[])
 		{"no-sync", no_argument, NULL, 'N'},
 		{"no-instructions", no_argument, NULL, 13},
 		{"sync-only", no_argument, NULL, 'S'},
+		{"syncfs", no_argument, NULL, 15},
 		{"waldir", required_argument, NULL, 'X'},
 		{"wal-segsize", required_argument, NULL, 12},
 		{"data-checksums", no_argument, NULL, 'k'},
@@ -3020,6 +3023,9 @@ main(int argc, char *argv[])
 										 extra_options,
 										 "-c debug_discard_caches=1");
 				break;
+			case 15:
+				use_syncfs = true;
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
@@ -3064,7 +3070,7 @@ main(int argc, char *argv[])
 
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, use_syncfs);
 		check_ok();
 		return 0;
 	}
@@ -3153,7 +3159,7 @@ main(int argc, char *argv[])
 	{
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, use_syncfs);
 		check_ok();
 	}
 	else
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 669aa207a3..0b59b1fe32 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -2192,7 +2192,7 @@ BaseBackup(void)
 		}
 		else
 		{
-			(void) fsync_pgdata(basedir, serverVersion);
+			(void) fsync_pgdata(basedir, serverVersion, false);
 		}
 	}
 
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index e833c5d75e..021a4b18f9 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -687,7 +687,7 @@ main(int argc, char *argv[])
 		if (do_sync)
 		{
 			pg_log_info("syncing data directory");
-			fsync_pgdata(DataDir, PG_VERSION_NUM);
+			fsync_pgdata(DataDir, PG_VERSION_NUM, false);
 		}
 
 		pg_log_info("updating control file");
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index c50f283ede..c50d5945ab 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -296,7 +296,7 @@ sync_target_dir(void)
 	if (!do_sync || dry_run)
 		return;
 
-	fsync_pgdata(datadir_target, PG_VERSION_NUM);
+	fsync_pgdata(datadir_target, PG_VERSION_NUM, false);
 }
 
 
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 40b73bbe1a..c8b62e8aed 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -50,8 +50,33 @@ static void walkdir(const char *path,
 					int (*action) (const char *fname, bool isdir),
 					bool process_symlinks);
 
+/* This is a frontend version of what's in src/backend/storage/file/fd.c */
+#ifdef HAVE_SYNCFS
+static void
+do_syncfs(const char *path)
+{
+	int			fd;
+
+	fd = open(path, O_RDONLY, 0);
+	if (fd < 0)
+	{
+		pg_log_error("could not open directory \"%s\": %m", path);
+		return;
+	}
+	if (syncfs(fd) < 0)
+	{
+		pg_log_fatal("could not synchronize file system for file \"%s\": %m", path);
+		exit(EXIT_FAILURE);
+	}
+	close(fd);
+}
+#endif
+
 /*
- * Issue fsync recursively on PGDATA and all its contents.
+ * Issue fsync recursively on PGDATA and all its contents, or issue syncfs for
+ * all potential filesystem, depending on recovery_init_sync_method setting.
+ *
+ * This is a frontend version of SyncDataDirectory.
  *
  * We fsync regular files and directories wherever they are, but we follow
  * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
@@ -62,7 +87,8 @@ static void walkdir(const char *path,
  */
 void
 fsync_pgdata(const char *pg_data,
-			 int serverVersion)
+			 int serverVersion,
+			 bool use_syncfs)
 {
 	bool		xlog_is_symlink;
 	char		pg_wal[MAXPGPATH];
@@ -93,6 +119,48 @@ fsync_pgdata(const char *pg_data,
 		xlog_is_symlink = true;
 #endif
 
+#ifdef HAVE_SYNCFS
+	if (use_syncfs)
+	{
+		DIR		   *dir;
+		struct dirent *de;
+
+		/*
+		 * On Linux, we don't have to open every single file one by one.  We
+		 * can use syncfs() to sync whole filesystems.  We only expect
+		 * filesystem boundaries to exist where we tolerate symlinks, namely
+		 * pg_wal and the tablespaces, so we call syncfs() for each of those
+		 * directories.
+		 */
+
+		/* Sync the top level pgdata directory. */
+		do_syncfs(pg_data);
+		/* If any tablespaces are configured, sync each of those. */
+		dir = opendir(pg_tblspc);
+		if (dir == NULL)
+		{
+			/* It's not a fatal error if pg_tblspc itself doesn't exist */
+			pg_log_error("could not open directory \"%s\": %m", pg_tblspc);
+		} else {
+			while ((de = readdir(dir)))
+			{
+				char		path[MAXPGPATH];
+
+				if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+					continue;
+
+				snprintf(path, MAXPGPATH, "%s/%s", pg_tblspc, de->d_name);
+				do_syncfs(path);
+			}
+			closedir(dir);
+		}
+		/* If pg_wal is a symlink, process that too. */
+		if (xlog_is_symlink)
+			do_syncfs(pg_wal);
+		return;
+	}
+#endif		/* !HAVE_SYNCFS */
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index 978a57460a..e622829c0b 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -26,7 +26,7 @@ typedef enum PGFileType
 
 #ifdef FRONTEND
 extern int	fsync_fname(const char *fname, bool isdir);
-extern void fsync_pgdata(const char *pg_data, int serverVersion);
+extern void fsync_pgdata(const char *pg_data, int serverVersion, bool use_syncfs);
 extern void fsync_dir_recurse(const char *dir);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
-- 
2.17.0

#2Michael Paquier
michael@paquier.xyz
In reply to: Justin Pryzby (#1)
Re: should frontend tools use syncfs() ?

On Wed, Sep 29, 2021 at 07:43:41PM -0500, Justin Pryzby wrote:

Forking this thread in which Thomas implemented syncfs for the startup process
(61752afb2).
/messages/by-id/CA+hUKG+SG9jSW3ekwib0cSdC0yD-jReJ21X4bZAmqxoWTLTc2A@mail.gmail.com

Is there any reason that initdb/pg_basebackup/pg_checksums/pg_rewind shouldn't
use syncfs() ?

That makes sense.

do_syncfs() is in src/backend/ so would need to be duplicated^Wimplemented in
common.

The fd handling in the backend makes things tricky if trying to plug
in a common interface, so I'd rather do that as this is frontend-only
code.

They can't use the GUC, so need to add an cmdline option or look at an
environment variable.

fsync_pgdata() is going to manipulate many inodes anyway, because
that's a code path designed to do so. If we know that syncfs() is
just going to be better, I'd rather just call it by default if
available and not add new switches to all the frontend tools in need
of flushing the data folder, switches that are not documented in your
patch.
--
Michael

#3Thomas Munro
thomas.munro@gmail.com
In reply to: Michael Paquier (#2)
Re: should frontend tools use syncfs() ?

On Thu, Sep 30, 2021 at 4:49 PM Michael Paquier <michael@paquier.xyz> wrote:

fsync_pgdata() is going to manipulate many inodes anyway, because
that's a code path designed to do so. If we know that syncfs() is
just going to be better, I'd rather just call it by default if
available and not add new switches to all the frontend tools in need
of flushing the data folder, switches that are not documented in your
patch.

If we want this it should be an option, because it flushes out data
other than the pgdata dir, and it doesn't report errors on old
kernels.

#4Michael Paquier
michael@paquier.xyz
In reply to: Thomas Munro (#3)
Re: should frontend tools use syncfs() ?

On Thu, Sep 30, 2021 at 05:08:24PM +1300, Thomas Munro wrote:

If we want this it should be an option, because it flushes out data
other than the pgdata dir, and it doesn't report errors on old
kernels.

Oh, OK, thanks. That's the part about 5.8. The only option
controlling if sync is used now in those binaries is --no-sync.
Should we use a different design for the option rather than a
--syncfs? Something like --sync={on,off,syncfs,fsync} could be a
possibility, for example.
--
Michael

#5Justin Pryzby
pryzby@telsasoft.com
In reply to: Thomas Munro (#3)
Re: should frontend tools use syncfs() ?

On Thu, Sep 30, 2021 at 05:08:24PM +1300, Thomas Munro wrote:

On Thu, Sep 30, 2021 at 4:49 PM Michael Paquier <michael@paquier.xyz> wrote:

fsync_pgdata() is going to manipulate many inodes anyway, because
that's a code path designed to do so. If we know that syncfs() is
just going to be better, I'd rather just call it by default if
available and not add new switches to all the frontend tools in need
of flushing the data folder, switches that are not documented in your
patch.

If we want this it should be an option, because it flushes out data
other than the pgdata dir, and it doesn't report errors on old
kernels.

I ran into bad performance of initdb --sync-only shortly after adding it to my
db migration script, so added initdb --syncfs.

I found that with sufficiently recent coreutils, I can do what's wanted by calling
/bin/sync -f /datadir

Since it's not integrated into initdb, it's necessary to include each
tablespace and wal.

--
Justin

#6Justin Pryzby
pryzby@telsasoft.com
In reply to: Michael Paquier (#2)
Re: should frontend tools use syncfs() ?

On Thu, Sep 30, 2021 at 12:49:36PM +0900, Michael Paquier wrote:

On Wed, Sep 29, 2021 at 07:43:41PM -0500, Justin Pryzby wrote:

Forking this thread in which Thomas implemented syncfs for the startup process
(61752afb2).
/messages/by-id/CA+hUKG+SG9jSW3ekwib0cSdC0yD-jReJ21X4bZAmqxoWTLTc2A@mail.gmail.com

Is there any reason that initdb/pg_basebackup/pg_checksums/pg_rewind shouldn't
use syncfs() ?

That makes sense.

do_syncfs() is in src/backend/ so would need to be duplicated^Wimplemented in
common.

The fd handling in the backend makes things tricky if trying to plug
in a common interface, so I'd rather do that as this is frontend-only
code.

They can't use the GUC, so need to add an cmdline option or look at an
environment variable.

fsync_pgdata() is going to manipulate many inodes anyway, because
that's a code path designed to do so. If we know that syncfs() is
just going to be better, I'd rather just call it by default if
available and not add new switches to all the frontend tools in need
of flushing the data folder, switches that are not documented in your
patch.

It is a draft/POC, after all.

The argument against using syncfs by default is that it could be worse than
recursive fsync if a tiny 200MB postgres instance lives on a shared filesystem
along with other, larger applications (maybe a larger postgres instance).

There's also an argument that syncfs might be unreliable in the case of a write
error. (But I agreed with Thomas' earlier assessment: that claim caries little
weight since fsync() itself wasn't reliable for 20some years).

I didn't pursue this patch, as it's easier for me to use /bin/sync -f. Someone
should adopt it if interested.

--
Justin

#7Nathan Bossart
nathandbossart@gmail.com
In reply to: Justin Pryzby (#6)
1 attachment(s)
Re: should frontend tools use syncfs() ?

On Wed, Apr 13, 2022 at 06:54:12AM -0500, Justin Pryzby wrote:

I didn't pursue this patch, as it's easier for me to use /bin/sync -f. Someone
should adopt it if interested.

I was about to start a new thread, but I found this one with some good
preliminary discussion. I came to the same conclusion about introducing a
new option instead of using syncfs() by default wherever it is available.
The attached patch is still a work-in-progress, but it seems to behave as
expected. I began investigating this because I noticed that the
sync-data-directory step on pg_upgrade takes quite a while when there are
many files, and I am looking for ways to reduce the amount of downtime
required for pg_upgrade.

The attached patch adds a new --sync-method option to the relevant frontend
utilities, but I am not wedded to that name/approach.

Thoughts?

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

Attachments:

v1-0001-allow-syncfs-in-frontend-utilities.patchtext/x-diff; charset=us-asciiDownload
From 4f484cd0268e63da8226d78dd21a8d7e29aa2b78 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Fri, 28 Jul 2023 15:56:24 -0700
Subject: [PATCH v1 1/1] allow syncfs in frontend utilities

---
 src/bin/initdb/initdb.c               | 11 +++-
 src/bin/pg_basebackup/pg_basebackup.c |  8 ++-
 src/bin/pg_checksums/pg_checksums.c   |  8 ++-
 src/bin/pg_rewind/file_ops.c          |  2 +-
 src/bin/pg_rewind/pg_rewind.c         |  8 +++
 src/bin/pg_rewind/pg_rewind.h         |  2 +
 src/bin/pg_upgrade/option.c           | 12 ++++
 src/bin/pg_upgrade/pg_upgrade.c       |  6 +-
 src/bin/pg_upgrade/pg_upgrade.h       |  1 +
 src/common/file_utils.c               | 79 ++++++++++++++++++++++++++-
 src/fe_utils/option_utils.c           | 18 ++++++
 src/include/common/file_utils.h       | 15 ++++-
 src/include/fe_utils/option_utils.h   |  3 +
 src/include/storage/fd.h              |  4 ++
 src/tools/pgindent/typedefs.list      |  1 +
 15 files changed, 168 insertions(+), 10 deletions(-)

diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 3f4167682a..908263ee62 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -76,6 +76,7 @@
 #include "common/restricted_token.h"
 #include "common/string.h"
 #include "common/username.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/string_utils.h"
 #include "getopt_long.h"
 #include "mb/pg_wchar.h"
@@ -165,6 +166,7 @@ static bool data_checksums = false;
 static char *xlog_dir = NULL;
 static char *str_wal_segment_size_mb = NULL;
 static int	wal_segment_size_mb;
+static SyncMethod sync_method = SYNC_METHOD_FSYNC;
 
 
 /* internal vars */
@@ -3106,6 +3108,7 @@ main(int argc, char *argv[])
 		{"locale-provider", required_argument, NULL, 15},
 		{"icu-locale", required_argument, NULL, 16},
 		{"icu-rules", required_argument, NULL, 17},
+		{"sync-method", required_argument, NULL, 18},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -3285,6 +3288,10 @@ main(int argc, char *argv[])
 			case 17:
 				icu_rules = pg_strdup(optarg);
 				break;
+			case 18:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -3332,7 +3339,7 @@ main(int argc, char *argv[])
 
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 		return 0;
 	}
@@ -3409,7 +3416,7 @@ main(int argc, char *argv[])
 	{
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 	}
 	else
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1dc8efe0cb..548e764b2f 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -148,6 +148,7 @@ static bool verify_checksums = true;
 static bool manifest = true;
 static bool manifest_force_encode = false;
 static char *manifest_checksums = NULL;
+static SyncMethod sync_method = SYNC_METHOD_FSYNC;
 
 static bool success = false;
 static bool made_new_pgdata = false;
@@ -2203,7 +2204,7 @@ BaseBackup(char *compression_algorithm, char *compression_detail,
 		}
 		else
 		{
-			(void) fsync_pgdata(basedir, serverVersion);
+			(void) fsync_pgdata(basedir, serverVersion, sync_method);
 		}
 	}
 
@@ -2281,6 +2282,7 @@ main(int argc, char **argv)
 		{"no-manifest", no_argument, NULL, 5},
 		{"manifest-force-encode", no_argument, NULL, 6},
 		{"manifest-checksums", required_argument, NULL, 7},
+		{"sync-method", required_argument, NULL, 8},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -2452,6 +2454,10 @@ main(int argc, char **argv)
 			case 7:
 				manifest_checksums = pg_strdup(optarg);
 				break;
+			case 8:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 19eb67e485..a1dfc51273 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -44,6 +44,7 @@ static char *only_filenode = NULL;
 static bool do_sync = true;
 static bool verbose = false;
 static bool showprogress = false;
+static SyncMethod sync_method = SYNC_METHOD_FSYNC;
 
 typedef enum
 {
@@ -445,6 +446,7 @@ main(int argc, char *argv[])
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"verbose", no_argument, NULL, 'v'},
+		{"sync-method", required_argument, NULL, 1},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -503,6 +505,10 @@ main(int argc, char *argv[])
 			case 'v':
 				verbose = true;
 				break;
+			case 1:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -633,7 +639,7 @@ main(int argc, char *argv[])
 		if (do_sync)
 		{
 			pg_log_info("syncing data directory");
-			fsync_pgdata(DataDir, PG_VERSION_NUM);
+			fsync_pgdata(DataDir, PG_VERSION_NUM, sync_method);
 		}
 
 		pg_log_info("updating control file");
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 25996b4da4..451fb1856e 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -296,7 +296,7 @@ sync_target_dir(void)
 	if (!do_sync || dry_run)
 		return;
 
-	fsync_pgdata(datadir_target, PG_VERSION_NUM);
+	fsync_pgdata(datadir_target, PG_VERSION_NUM, sync_method);
 }
 
 
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index f7f3b8227f..a424762f1e 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -22,6 +22,7 @@
 #include "common/file_perm.h"
 #include "common/restricted_token.h"
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/recovery_gen.h"
 #include "fe_utils/string_utils.h"
 #include "file_ops.h"
@@ -74,6 +75,7 @@ bool		showprogress = false;
 bool		dry_run = false;
 bool		do_sync = true;
 bool		restore_wal = false;
+SyncMethod	sync_method = SYNC_METHOD_FSYNC;
 
 /* Target history */
 TimeLineHistoryEntry *targetHistory;
@@ -131,6 +133,7 @@ main(int argc, char **argv)
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
+		{"sync-method", required_argument, NULL, 6},
 		{NULL, 0, NULL, 0}
 	};
 	int			option_index;
@@ -218,6 +221,11 @@ main(int argc, char **argv)
 				config_file = pg_strdup(optarg);
 				break;
 
+			case 6:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ef8bdc1fbb..a7ce754880 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -13,6 +13,7 @@
 
 #include "access/timeline.h"
 #include "common/logging.h"
+#include "common/file_utils.h"
 #include "datapagemap.h"
 #include "libpq-fe.h"
 #include "storage/block.h"
@@ -24,6 +25,7 @@ extern bool showprogress;
 extern bool dry_run;
 extern bool do_sync;
 extern int	WalSegSz;
+extern SyncMethod sync_method;
 
 /* Target history */
 extern TimeLineHistoryEntry *targetHistory;
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 640361009e..4f9da8e685 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -14,6 +14,7 @@
 #endif
 
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "getopt_long.h"
 #include "pg_upgrade.h"
 #include "utils/pidfile.h"
@@ -57,12 +58,14 @@ parseCommandLine(int argc, char *argv[])
 		{"verbose", no_argument, NULL, 'v'},
 		{"clone", no_argument, NULL, 1},
 		{"copy", no_argument, NULL, 2},
+		{"sync-method", required_argument, NULL, 3},
 
 		{NULL, 0, NULL, 0}
 	};
 	int			option;			/* Command line option */
 	int			optindex = 0;	/* used by getopt_long */
 	int			os_user_effective_id;
+	SyncMethod	unused;
 
 	user_opts.do_sync = true;
 	user_opts.transfer_mode = TRANSFER_MODE_COPY;
@@ -199,6 +202,12 @@ parseCommandLine(int argc, char *argv[])
 				user_opts.transfer_mode = TRANSFER_MODE_COPY;
 				break;
 
+			case 3:
+				if (!parse_sync_method(optarg, &unused))
+					exit(1);
+				user_opts.sync_method = pg_strdup(optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
 						os_info.progname);
@@ -209,6 +218,9 @@ parseCommandLine(int argc, char *argv[])
 	if (optind < argc)
 		pg_fatal("too many command-line arguments (first is \"%s\")", argv[optind]);
 
+	if (!user_opts.sync_method)
+		user_opts.sync_method = pg_strdup("fsync");
+
 	if (log_opts.verbose)
 		pg_log(PG_REPORT, "Running in verbose mode");
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 4562dafcff..96bfb67167 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -192,8 +192,10 @@ main(int argc, char **argv)
 	{
 		prep_status("Sync data directory to disk");
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-				  "\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir,
-				  new_cluster.pgdata);
+				  "\"%s/initdb\" --sync-only \"%s\" --sync-method %s",
+				  new_cluster.bindir,
+				  new_cluster.pgdata,
+				  user_opts.sync_method);
 		check_ok();
 	}
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 3eea0139c7..13457b2f75 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -304,6 +304,7 @@ typedef struct
 	transferMode transfer_mode; /* copy files or link them? */
 	int			jobs;			/* number of processes/threads to use */
 	char	   *socketdir;		/* directory to use for Unix sockets */
+	char	   *sync_method;
 } UserOpts;
 
 typedef struct
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 74833c4acb..b3d814d0c4 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -51,6 +51,33 @@ static void walkdir(const char *path,
 					int (*action) (const char *fname, bool isdir),
 					bool process_symlinks);
 
+#ifdef HAVE_SYNCFS
+static void
+do_syncfs(const char *path)
+{
+	int			fd;
+
+	fd = open(path, O_RDONLY | PG_BINARY, 0);
+
+	if (fd < 0)
+	{
+		if (errno == EACCES || errno == EISDIR)
+			return;
+		pg_log_error("could not open file \"%s\": %m", path);
+		return;
+	}
+
+	if (syncfs(fd) < 0)
+	{
+		pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+		(void) close(fd);
+		exit(EXIT_FAILURE);
+	}
+
+	(void) close(fd);
+}
+#endif
+
 /*
  * Issue fsync recursively on PGDATA and all its contents.
  *
@@ -63,7 +90,8 @@ static void walkdir(const char *path,
  */
 void
 fsync_pgdata(const char *pg_data,
-			 int serverVersion)
+			 int serverVersion,
+			 SyncMethod sync_method)
 {
 	bool		xlog_is_symlink;
 	char		pg_wal[MAXPGPATH];
@@ -89,6 +117,55 @@ fsync_pgdata(const char *pg_data,
 			xlog_is_symlink = true;
 	}
 
+#ifdef HAVE_SYNCFS
+	if (sync_method == SYNC_METHOD_SYNCFS)
+	{
+		DIR		   *dir;
+		struct dirent *de;
+
+		/*
+		 * On Linux, we don't have to open every single file one by one.  We
+		 * can use syncfs() to sync whole filesystems.  We only expect
+		 * filesystem boundaries to exist where we tolerate symlinks, namely
+		 * pg_wal and the tablespaces, so we call syncfs() for each of those
+		 * directories.
+		 */
+
+		/* Sync the top level pgdata directory. */
+		do_syncfs(pg_data);
+
+		/* If any tablespaces are configured, sync each of those. */
+		dir = opendir(pg_tblspc);
+		if (dir == NULL)
+			pg_log_error("could not open directory \"%s\": %m", pg_tblspc);
+		else
+		{
+			while (errno = 0, (de = readdir(dir)) != NULL)
+			{
+				char		subpath[MAXPGPATH * 2];
+
+				if (strcmp(de->d_name, ".") == 0 ||
+					strcmp(de->d_name, "..") == 0)
+					continue;
+
+				snprintf(subpath, sizeof(subpath), "%s/%s", pg_tblspc, de->d_name);
+				do_syncfs(subpath);
+			}
+
+			if (errno)
+				pg_log_error("could not read directory \"%s\": %m", pg_tblspc);
+
+			(void) closedir(dir);
+		}
+
+		/* If pg_wal is a symlink, process that too. */
+		if (xlog_is_symlink)
+			do_syncfs(pg_wal);
+
+		return;
+	}
+#endif							/* HAVE_SYNCFS */
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.
diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c
index 763c991015..c65aedf8f5 100644
--- a/src/fe_utils/option_utils.c
+++ b/src/fe_utils/option_utils.c
@@ -82,3 +82,21 @@ option_parse_int(const char *optarg, const char *optname,
 		*result = val;
 	return true;
 }
+
+bool
+parse_sync_method(const char *optarg, SyncMethod *sync_method)
+{
+	if (strcmp(optarg, "fsync") == 0)
+		*sync_method = SYNC_METHOD_FSYNC;
+#ifdef HAVE_SYNCFS
+	else if (strcmp(optarg, "syncfs") == 0)
+		*sync_method = SYNC_METHOD_SYNCFS;
+#endif
+	else
+	{
+		pg_log_error("unrecognized sync method: %s", optarg);
+		return false;
+	}
+
+	return true;
+}
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index b7efa1226d..fd5162fef1 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -27,12 +27,23 @@ typedef enum PGFileType
 struct iovec;					/* avoid including port/pg_iovec.h here */
 
 #ifdef FRONTEND
+
+typedef enum SyncMethod
+{
+	SYNC_METHOD_FSYNC,
+#ifdef HAVE_SYNCFS
+	SYNC_METHOD_SYNCFS
+#endif
+} SyncMethod;
+
 extern int	fsync_fname(const char *fname, bool isdir);
-extern void fsync_pgdata(const char *pg_data, int serverVersion);
+extern void fsync_pgdata(const char *pg_data, int serverVersion,
+						 SyncMethod sync_method);
 extern void fsync_dir_recurse(const char *dir);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
-#endif
+
+#endif							/* FRONTEND */
 
 extern PGFileType get_dirent_type(const char *path,
 								  const struct dirent *de,
diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h
index b7b0654cee..3ad8737219 100644
--- a/src/include/fe_utils/option_utils.h
+++ b/src/include/fe_utils/option_utils.h
@@ -14,6 +14,8 @@
 
 #include "postgres_fe.h"
 
+#include "common/file_utils.h"
+
 typedef void (*help_handler) (const char *progname);
 
 extern void handle_help_version_opts(int argc, char *argv[],
@@ -22,5 +24,6 @@ extern void handle_help_version_opts(int argc, char *argv[],
 extern bool option_parse_int(const char *optarg, const char *optname,
 							 int min_range, int max_range,
 							 int *result);
+extern bool parse_sync_method(const char *optarg, SyncMethod *sync_method);
 
 #endif							/* OPTION_UTILS_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 6791a406fc..196f20e716 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -43,6 +43,8 @@
 #ifndef FD_H
 #define FD_H
 
+#ifndef FRONTEND
+
 #include <dirent.h>
 #include <fcntl.h>
 
@@ -195,6 +197,8 @@ extern int	durable_unlink(const char *fname, int elevel);
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
+#endif							/* ! FRONTEND */
+
 /* Filename components */
 #define PG_TEMP_FILES_DIR "pgsql_tmp"
 #define PG_TEMP_FILE_PREFIX "pgsql_tmp"
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 11d47294cf..3bbf70fdf3 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2678,6 +2678,7 @@ SupportRequestSelectivity
 SupportRequestSimplify
 SupportRequestWFuncMonotonic
 Syn
+SyncMethod
 SyncOps
 SyncRepConfigData
 SyncRepStandbyData
-- 
2.25.1

#8Nathan Bossart
nathandbossart@gmail.com
In reply to: Nathan Bossart (#7)
1 attachment(s)
Re: should frontend tools use syncfs() ?

On Sat, Jul 29, 2023 at 02:40:10PM -0700, Nathan Bossart wrote:

I was about to start a new thread, but I found this one with some good
preliminary discussion. I came to the same conclusion about introducing a
new option instead of using syncfs() by default wherever it is available.
The attached patch is still a work-in-progress, but it seems to behave as
expected. I began investigating this because I noticed that the
sync-data-directory step on pg_upgrade takes quite a while when there are
many files, and I am looking for ways to reduce the amount of downtime
required for pg_upgrade.

The attached patch adds a new --sync-method option to the relevant frontend
utilities, but I am not wedded to that name/approach.

Here is a new version of the patch with documentation updates and a couple
other small improvements.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

Attachments:

v2-0001-allow-syncfs-in-frontend-utilities.patchtext/x-diff; charset=us-asciiDownload
From 6b65605c90703ee3aebca0b90c771c9f14b59545 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Fri, 28 Jul 2023 15:56:24 -0700
Subject: [PATCH v2 1/1] allow syncfs in frontend utilities

---
 doc/src/sgml/ref/initdb.sgml          | 27 ++++++++
 doc/src/sgml/ref/pg_basebackup.sgml   | 30 +++++++++
 doc/src/sgml/ref/pg_checksums.sgml    | 27 ++++++++
 doc/src/sgml/ref/pg_dump.sgml         | 27 ++++++++
 doc/src/sgml/ref/pg_rewind.sgml       | 27 ++++++++
 doc/src/sgml/ref/pgupgrade.sgml       | 29 +++++++++
 src/bin/initdb/initdb.c               | 11 +++-
 src/bin/pg_basebackup/pg_basebackup.c | 10 ++-
 src/bin/pg_checksums/pg_checksums.c   |  8 ++-
 src/bin/pg_dump/pg_backup.h           |  4 +-
 src/bin/pg_dump/pg_backup_archiver.c  | 13 ++--
 src/bin/pg_dump/pg_backup_archiver.h  |  1 +
 src/bin/pg_dump/pg_backup_directory.c |  2 +-
 src/bin/pg_dump/pg_dump.c             |  9 ++-
 src/bin/pg_rewind/file_ops.c          |  2 +-
 src/bin/pg_rewind/pg_rewind.c         |  8 +++
 src/bin/pg_rewind/pg_rewind.h         |  2 +
 src/bin/pg_upgrade/option.c           | 12 ++++
 src/bin/pg_upgrade/pg_upgrade.c       |  6 +-
 src/bin/pg_upgrade/pg_upgrade.h       |  1 +
 src/common/file_utils.c               | 91 ++++++++++++++++++++++++++-
 src/fe_utils/option_utils.c           | 18 ++++++
 src/include/common/file_utils.h       | 17 ++++-
 src/include/fe_utils/option_utils.h   |  3 +
 src/include/storage/fd.h              |  4 ++
 src/tools/pgindent/typedefs.list      |  1 +
 26 files changed, 369 insertions(+), 21 deletions(-)

diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 22f1011781..872fef5705 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -365,6 +365,33 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry id="app-initdb-option-sync-method">
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>initdb</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  This may be a lot
+        faster than the <literal>fsync</literal> setting, because it doesn't
+        need to open each file one by one.  On the other hand, it may be slower
+        if a file system is shared by other applications that modify a lot of
+        files, since those files will also be written to disk.  Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>initdb</command>, and relevant
+        error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="app-initdb-option-sync-only">
       <term><option>-S</option></term>
       <term><option>--sync-only</option></term>
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 79d3e657c3..af8eb43251 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -594,6 +594,36 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_basebackup</command> will recursively open and synchronize
+        all files in the backup directory.  When the plain format is used, the
+        search for files will follow symbolic links for the WAL directory and
+        each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        backup directory.  When the plain format is used,
+        <command>pg_basebackup</command> will also synchronize the file systems
+        that contain the WAL files and each tablespace.  This may be a lot
+        faster than the <literal>fsync</literal> setting, because it doesn't
+        need to open each file one by one.  On the other hand, it may be slower
+        if a file system is shared by other applications that modify a lot of
+        files, since those files will also be written to disk.  Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>pg_basebackup</command>, and
+        relevant error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml
index a3d0b0f0a3..70fb65a474 100644
--- a/doc/src/sgml/ref/pg_checksums.sgml
+++ b/doc/src/sgml/ref/pg_checksums.sgml
@@ -139,6 +139,33 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_checksums</command> will recursively open and synchronize
+        all files in the data directory.  The search for files will follow
+        symbolic links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  This may be a lot
+        faster than the <literal>fsync</literal> setting, because it doesn't
+        need to open each file one by one.  On the other hand, it may be slower
+        if a file system is shared by other applications that modify a lot of
+        files, since those files will also be written to disk. Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>pg_checksums</command>, and
+        relevant error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index a3cf0608f5..88139a3064 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -1179,6 +1179,33 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_dump --format=directory</command> will recursively open and
+        synchronize all files in the archive directory.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        archive directory.  This may be a lot faster than the
+        <literal>fsync</literal> setting, because it doesn't need to open each
+        file one by one.  On the other hand, it may be slower if the file
+        system is shared by other applications that modify a lot of files,
+        since those files will also be written to disk.  Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>pg_dump</command>, and relevant
+        error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used or
+        <option>--format</option> is not set to <literal>directory</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--table-and-children=<replaceable class="parameter">pattern</replaceable></option></term>
       <listitem>
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 15cddd086b..ed170a4c4c 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -284,6 +284,33 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_rewind</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  This may be a lot
+        faster than the <literal>fsync</literal> setting, because it doesn't
+        need to open each file one by one.  On the other hand, it may be slower
+        if a file system is shared by other applications that modify a lot of
+        files, since those files will also be written to disk.  Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>pg_rewind</command>, and
+        relevant error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-V</option></term>
       <term><option>--version</option></term>
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 7816b4c685..dd2ae6cbc9 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -190,6 +190,35 @@ PostgreSQL documentation
       variable <envar>PGSOCKETDIR</envar></para></listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_upgrade</command> will recursively open and synchronize all
+        files in the upgraded cluster's data directory.  The search for files
+        will follow symbolic links for the WAL directory and each configured
+        tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        upgrade cluster's data directory, its WAL files, and each tablespace.
+        This may be a lot faster than the <literal>fsync</literal> setting,
+        because it doesn't need to open each file one by one.  On the other
+        hand, it may be slower if a file system is shared by other applications
+        that modify a lot of files, since those files will also be written to
+        disk.  Furthermore, on versions of Linux before 5.8, I/O errors
+        encountered while writing data to disk may not be reported to
+        <command>pg_upgrade</command>, and relevant error messages may appear
+        only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-U</option> <replaceable>username</replaceable></term>
       <term><option>--username=</option><replaceable>username</replaceable></term>
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 3f4167682a..908263ee62 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -76,6 +76,7 @@
 #include "common/restricted_token.h"
 #include "common/string.h"
 #include "common/username.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/string_utils.h"
 #include "getopt_long.h"
 #include "mb/pg_wchar.h"
@@ -165,6 +166,7 @@ static bool data_checksums = false;
 static char *xlog_dir = NULL;
 static char *str_wal_segment_size_mb = NULL;
 static int	wal_segment_size_mb;
+static SyncMethod sync_method = SYNC_METHOD_FSYNC;
 
 
 /* internal vars */
@@ -3106,6 +3108,7 @@ main(int argc, char *argv[])
 		{"locale-provider", required_argument, NULL, 15},
 		{"icu-locale", required_argument, NULL, 16},
 		{"icu-rules", required_argument, NULL, 17},
+		{"sync-method", required_argument, NULL, 18},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -3285,6 +3288,10 @@ main(int argc, char *argv[])
 			case 17:
 				icu_rules = pg_strdup(optarg);
 				break;
+			case 18:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -3332,7 +3339,7 @@ main(int argc, char *argv[])
 
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 		return 0;
 	}
@@ -3409,7 +3416,7 @@ main(int argc, char *argv[])
 	{
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 	}
 	else
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1dc8efe0cb..3443a91956 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -148,6 +148,7 @@ static bool verify_checksums = true;
 static bool manifest = true;
 static bool manifest_force_encode = false;
 static char *manifest_checksums = NULL;
+static SyncMethod sync_method = SYNC_METHOD_FSYNC;
 
 static bool success = false;
 static bool made_new_pgdata = false;
@@ -2199,11 +2200,11 @@ BaseBackup(char *compression_algorithm, char *compression_detail,
 		if (format == 't')
 		{
 			if (strcmp(basedir, "-") != 0)
-				(void) fsync_dir_recurse(basedir);
+				(void) fsync_dir_recurse(basedir, sync_method);
 		}
 		else
 		{
-			(void) fsync_pgdata(basedir, serverVersion);
+			(void) fsync_pgdata(basedir, serverVersion, sync_method);
 		}
 	}
 
@@ -2281,6 +2282,7 @@ main(int argc, char **argv)
 		{"no-manifest", no_argument, NULL, 5},
 		{"manifest-force-encode", no_argument, NULL, 6},
 		{"manifest-checksums", required_argument, NULL, 7},
+		{"sync-method", required_argument, NULL, 8},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -2452,6 +2454,10 @@ main(int argc, char **argv)
 			case 7:
 				manifest_checksums = pg_strdup(optarg);
 				break;
+			case 8:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 19eb67e485..a1dfc51273 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -44,6 +44,7 @@ static char *only_filenode = NULL;
 static bool do_sync = true;
 static bool verbose = false;
 static bool showprogress = false;
+static SyncMethod sync_method = SYNC_METHOD_FSYNC;
 
 typedef enum
 {
@@ -445,6 +446,7 @@ main(int argc, char *argv[])
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"verbose", no_argument, NULL, 'v'},
+		{"sync-method", required_argument, NULL, 1},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -503,6 +505,10 @@ main(int argc, char *argv[])
 			case 'v':
 				verbose = true;
 				break;
+			case 1:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -633,7 +639,7 @@ main(int argc, char *argv[])
 		if (do_sync)
 		{
 			pg_log_info("syncing data directory");
-			fsync_pgdata(DataDir, PG_VERSION_NUM);
+			fsync_pgdata(DataDir, PG_VERSION_NUM, sync_method);
 		}
 
 		pg_log_info("updating control file");
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index aba780ef4b..7a2bb92938 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -24,6 +24,7 @@
 #define PG_BACKUP_H
 
 #include "common/compression.h"
+#include "common/file_utils.h"
 #include "fe_utils/simple_list.h"
 #include "libpq-fe.h"
 
@@ -307,7 +308,8 @@ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt);
 extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 							  const pg_compress_specification compression_spec,
 							  bool dosync, ArchiveMode mode,
-							  SetupWorkerPtrType setupDumpWorker);
+							  SetupWorkerPtrType setupDumpWorker,
+							  SyncMethod sync_method);
 
 /* The --list option */
 extern void PrintTOCSummary(Archive *AHX);
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 39ebcfec32..979db4bba6 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -66,7 +66,8 @@ typedef struct _parallelReadyList
 static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
 							   const pg_compress_specification compression_spec,
 							   bool dosync, ArchiveMode mode,
-							   SetupWorkerPtrType setupWorkerPtr);
+							   SetupWorkerPtrType setupWorkerPtr,
+							   SyncMethod sync_method);
 static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te);
 static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData);
 static char *sanitize_line(const char *str, bool want_hyphen);
@@ -238,11 +239,12 @@ Archive *
 CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 			  const pg_compress_specification compression_spec,
 			  bool dosync, ArchiveMode mode,
-			  SetupWorkerPtrType setupDumpWorker)
+			  SetupWorkerPtrType setupDumpWorker,
+			  SyncMethod sync_method)
 
 {
 	ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression_spec,
-								 dosync, mode, setupDumpWorker);
+								 dosync, mode, setupDumpWorker, sync_method);
 
 	return (Archive *) AH;
 }
@@ -257,7 +259,7 @@ OpenArchive(const char *FileSpec, const ArchiveFormat fmt)
 
 	compression_spec.algorithm = PG_COMPRESSION_NONE;
 	AH = _allocAH(FileSpec, fmt, compression_spec, true,
-				  archModeRead, setupRestoreWorker);
+				  archModeRead, setupRestoreWorker, SYNC_METHOD_FSYNC);
 
 	return (Archive *) AH;
 }
@@ -2233,7 +2235,7 @@ static ArchiveHandle *
 _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 		 const pg_compress_specification compression_spec,
 		 bool dosync, ArchiveMode mode,
-		 SetupWorkerPtrType setupWorkerPtr)
+		 SetupWorkerPtrType setupWorkerPtr, SyncMethod sync_method)
 {
 	ArchiveHandle *AH;
 	CompressFileHandle *CFH;
@@ -2287,6 +2289,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 	AH->mode = mode;
 	AH->compression_spec = compression_spec;
 	AH->dosync = dosync;
+	AH->sync_method = sync_method;
 
 	memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
 
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 18b38c17ab..e32e00547b 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -312,6 +312,7 @@ struct _archiveHandle
 	pg_compress_specification compression_spec; /* Requested specification for
 												 * compression */
 	bool		dosync;			/* data requested to be synced on sight */
+	SyncMethod	sync_method;
 	ArchiveMode mode;			/* File mode - r or w */
 	void	   *formatData;		/* Header data specific to file format */
 
diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c
index 7f2ac7c7fd..6faa3a511f 100644
--- a/src/bin/pg_dump/pg_backup_directory.c
+++ b/src/bin/pg_dump/pg_backup_directory.c
@@ -613,7 +613,7 @@ _CloseArchive(ArchiveHandle *AH)
 		 * individually. Just recurse once through all the files generated.
 		 */
 		if (AH->dosync)
-			fsync_dir_recurse(ctx->directory);
+			fsync_dir_recurse(ctx->directory, AH->sync_method);
 	}
 	AH->FH = NULL;
 }
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 5dab1ba9ea..fec1b6b3a9 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -357,6 +357,7 @@ main(int argc, char **argv)
 	char	   *compression_algorithm_str = "none";
 	char	   *error_detail = NULL;
 	bool		user_compression_defined = false;
+	SyncMethod	sync_method = SYNC_METHOD_FSYNC;
 
 	static DumpOptions dopt;
 
@@ -431,6 +432,7 @@ main(int argc, char **argv)
 		{"table-and-children", required_argument, NULL, 12},
 		{"exclude-table-and-children", required_argument, NULL, 13},
 		{"exclude-table-data-and-children", required_argument, NULL, 14},
+		{"sync-method", required_argument, NULL, 15},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -657,6 +659,11 @@ main(int argc, char **argv)
 										  optarg);
 				break;
 
+			case 15:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit_nicely(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -777,7 +784,7 @@ main(int argc, char **argv)
 
 	/* Open the output file */
 	fout = CreateArchive(filename, archiveFormat, compression_spec,
-						 dosync, archiveMode, setupDumpWorker);
+						 dosync, archiveMode, setupDumpWorker, sync_method);
 
 	/* Make dump options accessible right away */
 	SetArchiveOptions(fout, &dopt, NULL);
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 25996b4da4..451fb1856e 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -296,7 +296,7 @@ sync_target_dir(void)
 	if (!do_sync || dry_run)
 		return;
 
-	fsync_pgdata(datadir_target, PG_VERSION_NUM);
+	fsync_pgdata(datadir_target, PG_VERSION_NUM, sync_method);
 }
 
 
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index f7f3b8227f..a424762f1e 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -22,6 +22,7 @@
 #include "common/file_perm.h"
 #include "common/restricted_token.h"
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/recovery_gen.h"
 #include "fe_utils/string_utils.h"
 #include "file_ops.h"
@@ -74,6 +75,7 @@ bool		showprogress = false;
 bool		dry_run = false;
 bool		do_sync = true;
 bool		restore_wal = false;
+SyncMethod	sync_method = SYNC_METHOD_FSYNC;
 
 /* Target history */
 TimeLineHistoryEntry *targetHistory;
@@ -131,6 +133,7 @@ main(int argc, char **argv)
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
+		{"sync-method", required_argument, NULL, 6},
 		{NULL, 0, NULL, 0}
 	};
 	int			option_index;
@@ -218,6 +221,11 @@ main(int argc, char **argv)
 				config_file = pg_strdup(optarg);
 				break;
 
+			case 6:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ef8bdc1fbb..a7ce754880 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -13,6 +13,7 @@
 
 #include "access/timeline.h"
 #include "common/logging.h"
+#include "common/file_utils.h"
 #include "datapagemap.h"
 #include "libpq-fe.h"
 #include "storage/block.h"
@@ -24,6 +25,7 @@ extern bool showprogress;
 extern bool dry_run;
 extern bool do_sync;
 extern int	WalSegSz;
+extern SyncMethod sync_method;
 
 /* Target history */
 extern TimeLineHistoryEntry *targetHistory;
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 640361009e..4f9da8e685 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -14,6 +14,7 @@
 #endif
 
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "getopt_long.h"
 #include "pg_upgrade.h"
 #include "utils/pidfile.h"
@@ -57,12 +58,14 @@ parseCommandLine(int argc, char *argv[])
 		{"verbose", no_argument, NULL, 'v'},
 		{"clone", no_argument, NULL, 1},
 		{"copy", no_argument, NULL, 2},
+		{"sync-method", required_argument, NULL, 3},
 
 		{NULL, 0, NULL, 0}
 	};
 	int			option;			/* Command line option */
 	int			optindex = 0;	/* used by getopt_long */
 	int			os_user_effective_id;
+	SyncMethod	unused;
 
 	user_opts.do_sync = true;
 	user_opts.transfer_mode = TRANSFER_MODE_COPY;
@@ -199,6 +202,12 @@ parseCommandLine(int argc, char *argv[])
 				user_opts.transfer_mode = TRANSFER_MODE_COPY;
 				break;
 
+			case 3:
+				if (!parse_sync_method(optarg, &unused))
+					exit(1);
+				user_opts.sync_method = pg_strdup(optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
 						os_info.progname);
@@ -209,6 +218,9 @@ parseCommandLine(int argc, char *argv[])
 	if (optind < argc)
 		pg_fatal("too many command-line arguments (first is \"%s\")", argv[optind]);
 
+	if (!user_opts.sync_method)
+		user_opts.sync_method = pg_strdup("fsync");
+
 	if (log_opts.verbose)
 		pg_log(PG_REPORT, "Running in verbose mode");
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 4562dafcff..96bfb67167 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -192,8 +192,10 @@ main(int argc, char **argv)
 	{
 		prep_status("Sync data directory to disk");
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-				  "\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir,
-				  new_cluster.pgdata);
+				  "\"%s/initdb\" --sync-only \"%s\" --sync-method %s",
+				  new_cluster.bindir,
+				  new_cluster.pgdata,
+				  user_opts.sync_method);
 		check_ok();
 	}
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 3eea0139c7..13457b2f75 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -304,6 +304,7 @@ typedef struct
 	transferMode transfer_mode; /* copy files or link them? */
 	int			jobs;			/* number of processes/threads to use */
 	char	   *socketdir;		/* directory to use for Unix sockets */
+	char	   *sync_method;
 } UserOpts;
 
 typedef struct
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 74833c4acb..8aa7784b5f 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -51,6 +51,31 @@ static void walkdir(const char *path,
 					int (*action) (const char *fname, bool isdir),
 					bool process_symlinks);
 
+#ifdef HAVE_SYNCFS
+static void
+do_syncfs(const char *path)
+{
+	int			fd;
+
+	fd = open(path, O_RDONLY, 0);
+
+	if (fd < 0)
+	{
+		pg_log_error("could not open file \"%s\": %m", path);
+		return;
+	}
+
+	if (syncfs(fd) < 0)
+	{
+		pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+		(void) close(fd);
+		exit(EXIT_FAILURE);
+	}
+
+	(void) close(fd);
+}
+#endif
+
 /*
  * Issue fsync recursively on PGDATA and all its contents.
  *
@@ -63,7 +88,8 @@ static void walkdir(const char *path,
  */
 void
 fsync_pgdata(const char *pg_data,
-			 int serverVersion)
+			 int serverVersion,
+			 SyncMethod sync_method)
 {
 	bool		xlog_is_symlink;
 	char		pg_wal[MAXPGPATH];
@@ -89,6 +115,55 @@ fsync_pgdata(const char *pg_data,
 			xlog_is_symlink = true;
 	}
 
+#ifdef HAVE_SYNCFS
+	if (sync_method == SYNC_METHOD_SYNCFS)
+	{
+		DIR		   *dir;
+		struct dirent *de;
+
+		/*
+		 * On Linux, we don't have to open every single file one by one.  We
+		 * can use syncfs() to sync whole filesystems.  We only expect
+		 * filesystem boundaries to exist where we tolerate symlinks, namely
+		 * pg_wal and the tablespaces, so we call syncfs() for each of those
+		 * directories.
+		 */
+
+		/* Sync the top level pgdata directory. */
+		do_syncfs(pg_data);
+
+		/* If any tablespaces are configured, sync each of those. */
+		dir = opendir(pg_tblspc);
+		if (dir == NULL)
+			pg_log_error("could not open directory \"%s\": %m", pg_tblspc);
+		else
+		{
+			while (errno = 0, (de = readdir(dir)) != NULL)
+			{
+				char		subpath[MAXPGPATH * 2];
+
+				if (strcmp(de->d_name, ".") == 0 ||
+					strcmp(de->d_name, "..") == 0)
+					continue;
+
+				snprintf(subpath, sizeof(subpath), "%s/%s", pg_tblspc, de->d_name);
+				do_syncfs(subpath);
+			}
+
+			if (errno)
+				pg_log_error("could not read directory \"%s\": %m", pg_tblspc);
+
+			(void) closedir(dir);
+		}
+
+		/* If pg_wal is a symlink, process that too. */
+		if (xlog_is_symlink)
+			do_syncfs(pg_wal);
+
+		return;
+	}
+#endif							/* HAVE_SYNCFS */
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.
@@ -121,8 +196,20 @@ fsync_pgdata(const char *pg_data,
  * This is a convenient wrapper on top of walkdir().
  */
 void
-fsync_dir_recurse(const char *dir)
+fsync_dir_recurse(const char *dir, SyncMethod sync_method)
 {
+#ifdef HAVE_SYNCFS
+	if (sync_method == SYNC_METHOD_SYNCFS)
+	{
+		/*
+		 * On Linux, we don't have to open every single file one by one.  We
+		 * can use syncfs() to sync the whole filesystem.
+		 */
+		do_syncfs(dir);
+		return;
+	}
+#endif							/* HAVE_SYNCFS */
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.
diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c
index 763c991015..c65aedf8f5 100644
--- a/src/fe_utils/option_utils.c
+++ b/src/fe_utils/option_utils.c
@@ -82,3 +82,21 @@ option_parse_int(const char *optarg, const char *optname,
 		*result = val;
 	return true;
 }
+
+bool
+parse_sync_method(const char *optarg, SyncMethod *sync_method)
+{
+	if (strcmp(optarg, "fsync") == 0)
+		*sync_method = SYNC_METHOD_FSYNC;
+#ifdef HAVE_SYNCFS
+	else if (strcmp(optarg, "syncfs") == 0)
+		*sync_method = SYNC_METHOD_SYNCFS;
+#endif
+	else
+	{
+		pg_log_error("unrecognized sync method: %s", optarg);
+		return false;
+	}
+
+	return true;
+}
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index b7efa1226d..3044f7f742 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -27,12 +27,23 @@ typedef enum PGFileType
 struct iovec;					/* avoid including port/pg_iovec.h here */
 
 #ifdef FRONTEND
+
+typedef enum SyncMethod
+{
+	SYNC_METHOD_FSYNC,
+#ifdef HAVE_SYNCFS
+	SYNC_METHOD_SYNCFS
+#endif
+} SyncMethod;
+
 extern int	fsync_fname(const char *fname, bool isdir);
-extern void fsync_pgdata(const char *pg_data, int serverVersion);
-extern void fsync_dir_recurse(const char *dir);
+extern void fsync_pgdata(const char *pg_data, int serverVersion,
+						 SyncMethod sync_method);
+extern void fsync_dir_recurse(const char *dir, SyncMethod sync_method);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
-#endif
+
+#endif							/* FRONTEND */
 
 extern PGFileType get_dirent_type(const char *path,
 								  const struct dirent *de,
diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h
index b7b0654cee..3ad8737219 100644
--- a/src/include/fe_utils/option_utils.h
+++ b/src/include/fe_utils/option_utils.h
@@ -14,6 +14,8 @@
 
 #include "postgres_fe.h"
 
+#include "common/file_utils.h"
+
 typedef void (*help_handler) (const char *progname);
 
 extern void handle_help_version_opts(int argc, char *argv[],
@@ -22,5 +24,6 @@ extern void handle_help_version_opts(int argc, char *argv[],
 extern bool option_parse_int(const char *optarg, const char *optname,
 							 int min_range, int max_range,
 							 int *result);
+extern bool parse_sync_method(const char *optarg, SyncMethod *sync_method);
 
 #endif							/* OPTION_UTILS_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 6791a406fc..196f20e716 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -43,6 +43,8 @@
 #ifndef FD_H
 #define FD_H
 
+#ifndef FRONTEND
+
 #include <dirent.h>
 #include <fcntl.h>
 
@@ -195,6 +197,8 @@ extern int	durable_unlink(const char *fname, int elevel);
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
+#endif							/* ! FRONTEND */
+
 /* Filename components */
 #define PG_TEMP_FILES_DIR "pgsql_tmp"
 #define PG_TEMP_FILE_PREFIX "pgsql_tmp"
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index ab97f1794a..0635ce4a87 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2678,6 +2678,7 @@ SupportRequestSelectivity
 SupportRequestSimplify
 SupportRequestWFuncMonotonic
 Syn
+SyncMethod
 SyncOps
 SyncRepConfigData
 SyncRepStandbyData
-- 
2.25.1

#9Nathan Bossart
nathandbossart@gmail.com
In reply to: Nathan Bossart (#8)
Re: should frontend tools use syncfs() ?

On Mon, Jul 31, 2023 at 10:51:38AM -0700, Nathan Bossart wrote:

Here is a new version of the patch with documentation updates and a couple
other small improvements.

I just realized I forgot to update the --help output for these utilities.
I'll do that in the next version of the patch.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#10Nathan Bossart
nathandbossart@gmail.com
In reply to: Nathan Bossart (#9)
1 attachment(s)
Re: should frontend tools use syncfs() ?

On Mon, Jul 31, 2023 at 11:39:46AM -0700, Nathan Bossart wrote:

I just realized I forgot to update the --help output for these utilities.
I'll do that in the next version of the patch.

Done in v3. Sorry for the noise.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

Attachments:

v3-0001-allow-syncfs-in-frontend-utilities.patchtext/x-diff; charset=us-asciiDownload
From be52e4625754fa36703423e8b67d2f6cd2a16a09 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Fri, 28 Jul 2023 15:56:24 -0700
Subject: [PATCH v3 1/1] allow syncfs in frontend utilities

---
 doc/src/sgml/ref/initdb.sgml          | 27 ++++++++
 doc/src/sgml/ref/pg_basebackup.sgml   | 30 +++++++++
 doc/src/sgml/ref/pg_checksums.sgml    | 27 ++++++++
 doc/src/sgml/ref/pg_dump.sgml         | 27 ++++++++
 doc/src/sgml/ref/pg_rewind.sgml       | 27 ++++++++
 doc/src/sgml/ref/pgupgrade.sgml       | 29 +++++++++
 src/bin/initdb/initdb.c               | 12 +++-
 src/bin/pg_basebackup/pg_basebackup.c | 12 +++-
 src/bin/pg_checksums/pg_checksums.c   |  9 ++-
 src/bin/pg_dump/pg_backup.h           |  4 +-
 src/bin/pg_dump/pg_backup_archiver.c  | 13 ++--
 src/bin/pg_dump/pg_backup_archiver.h  |  1 +
 src/bin/pg_dump/pg_backup_directory.c |  2 +-
 src/bin/pg_dump/pg_dump.c             | 10 ++-
 src/bin/pg_rewind/file_ops.c          |  2 +-
 src/bin/pg_rewind/pg_rewind.c         |  9 +++
 src/bin/pg_rewind/pg_rewind.h         |  2 +
 src/bin/pg_upgrade/option.c           | 13 ++++
 src/bin/pg_upgrade/pg_upgrade.c       |  6 +-
 src/bin/pg_upgrade/pg_upgrade.h       |  1 +
 src/common/file_utils.c               | 91 ++++++++++++++++++++++++++-
 src/fe_utils/option_utils.c           | 18 ++++++
 src/include/common/file_utils.h       | 17 ++++-
 src/include/fe_utils/option_utils.h   |  3 +
 src/include/storage/fd.h              |  4 ++
 src/tools/pgindent/typedefs.list      |  1 +
 26 files changed, 376 insertions(+), 21 deletions(-)

diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 22f1011781..872fef5705 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -365,6 +365,33 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry id="app-initdb-option-sync-method">
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>initdb</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  This may be a lot
+        faster than the <literal>fsync</literal> setting, because it doesn't
+        need to open each file one by one.  On the other hand, it may be slower
+        if a file system is shared by other applications that modify a lot of
+        files, since those files will also be written to disk.  Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>initdb</command>, and relevant
+        error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="app-initdb-option-sync-only">
       <term><option>-S</option></term>
       <term><option>--sync-only</option></term>
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 79d3e657c3..af8eb43251 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -594,6 +594,36 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_basebackup</command> will recursively open and synchronize
+        all files in the backup directory.  When the plain format is used, the
+        search for files will follow symbolic links for the WAL directory and
+        each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        backup directory.  When the plain format is used,
+        <command>pg_basebackup</command> will also synchronize the file systems
+        that contain the WAL files and each tablespace.  This may be a lot
+        faster than the <literal>fsync</literal> setting, because it doesn't
+        need to open each file one by one.  On the other hand, it may be slower
+        if a file system is shared by other applications that modify a lot of
+        files, since those files will also be written to disk.  Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>pg_basebackup</command>, and
+        relevant error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml
index a3d0b0f0a3..70fb65a474 100644
--- a/doc/src/sgml/ref/pg_checksums.sgml
+++ b/doc/src/sgml/ref/pg_checksums.sgml
@@ -139,6 +139,33 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_checksums</command> will recursively open and synchronize
+        all files in the data directory.  The search for files will follow
+        symbolic links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  This may be a lot
+        faster than the <literal>fsync</literal> setting, because it doesn't
+        need to open each file one by one.  On the other hand, it may be slower
+        if a file system is shared by other applications that modify a lot of
+        files, since those files will also be written to disk. Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>pg_checksums</command>, and
+        relevant error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index a3cf0608f5..88139a3064 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -1179,6 +1179,33 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_dump --format=directory</command> will recursively open and
+        synchronize all files in the archive directory.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        archive directory.  This may be a lot faster than the
+        <literal>fsync</literal> setting, because it doesn't need to open each
+        file one by one.  On the other hand, it may be slower if the file
+        system is shared by other applications that modify a lot of files,
+        since those files will also be written to disk.  Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>pg_dump</command>, and relevant
+        error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used or
+        <option>--format</option> is not set to <literal>directory</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--table-and-children=<replaceable class="parameter">pattern</replaceable></option></term>
       <listitem>
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 15cddd086b..ed170a4c4c 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -284,6 +284,33 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_rewind</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  This may be a lot
+        faster than the <literal>fsync</literal> setting, because it doesn't
+        need to open each file one by one.  On the other hand, it may be slower
+        if a file system is shared by other applications that modify a lot of
+        files, since those files will also be written to disk.  Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>pg_rewind</command>, and
+        relevant error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-V</option></term>
       <term><option>--version</option></term>
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 7816b4c685..dd2ae6cbc9 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -190,6 +190,35 @@ PostgreSQL documentation
       variable <envar>PGSOCKETDIR</envar></para></listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_upgrade</command> will recursively open and synchronize all
+        files in the upgraded cluster's data directory.  The search for files
+        will follow symbolic links for the WAL directory and each configured
+        tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        upgrade cluster's data directory, its WAL files, and each tablespace.
+        This may be a lot faster than the <literal>fsync</literal> setting,
+        because it doesn't need to open each file one by one.  On the other
+        hand, it may be slower if a file system is shared by other applications
+        that modify a lot of files, since those files will also be written to
+        disk.  Furthermore, on versions of Linux before 5.8, I/O errors
+        encountered while writing data to disk may not be reported to
+        <command>pg_upgrade</command>, and relevant error messages may appear
+        only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-U</option> <replaceable>username</replaceable></term>
       <term><option>--username=</option><replaceable>username</replaceable></term>
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 3f4167682a..bfbe5bd3fd 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -76,6 +76,7 @@
 #include "common/restricted_token.h"
 #include "common/string.h"
 #include "common/username.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/string_utils.h"
 #include "getopt_long.h"
 #include "mb/pg_wchar.h"
@@ -165,6 +166,7 @@ static bool data_checksums = false;
 static char *xlog_dir = NULL;
 static char *str_wal_segment_size_mb = NULL;
 static int	wal_segment_size_mb;
+static SyncMethod sync_method = SYNC_METHOD_FSYNC;
 
 
 /* internal vars */
@@ -2466,6 +2468,7 @@ usage(const char *progname)
 	printf(_("  -N, --no-sync             do not wait for changes to be written safely to disk\n"));
 	printf(_("      --no-instructions     do not print instructions for next steps\n"));
 	printf(_("  -s, --show                show internal settings\n"));
+	printf(_("      --sync-method=METHOD  set method for syncing files to disk\n"));
 	printf(_("  -S, --sync-only           only sync database files to disk, then exit\n"));
 	printf(_("\nOther options:\n"));
 	printf(_("  -V, --version             output version information, then exit\n"));
@@ -3106,6 +3109,7 @@ main(int argc, char *argv[])
 		{"locale-provider", required_argument, NULL, 15},
 		{"icu-locale", required_argument, NULL, 16},
 		{"icu-rules", required_argument, NULL, 17},
+		{"sync-method", required_argument, NULL, 18},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -3285,6 +3289,10 @@ main(int argc, char *argv[])
 			case 17:
 				icu_rules = pg_strdup(optarg);
 				break;
+			case 18:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -3332,7 +3340,7 @@ main(int argc, char *argv[])
 
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 		return 0;
 	}
@@ -3409,7 +3417,7 @@ main(int argc, char *argv[])
 	{
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 	}
 	else
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1dc8efe0cb..51f29f83fd 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -148,6 +148,7 @@ static bool verify_checksums = true;
 static bool manifest = true;
 static bool manifest_force_encode = false;
 static char *manifest_checksums = NULL;
+static SyncMethod sync_method = SYNC_METHOD_FSYNC;
 
 static bool success = false;
 static bool made_new_pgdata = false;
@@ -424,6 +425,8 @@ usage(void)
 	printf(_("      --no-slot          prevent creation of temporary replication slot\n"));
 	printf(_("      --no-verify-checksums\n"
 			 "                         do not verify checksums\n"));
+	printf(_("      --sync-method=METHOD\n"
+			 "                         set method for syncing files to disk\n"));
 	printf(_("  -?, --help             show this help, then exit\n"));
 	printf(_("\nConnection options:\n"));
 	printf(_("  -d, --dbname=CONNSTR   connection string\n"));
@@ -2199,11 +2202,11 @@ BaseBackup(char *compression_algorithm, char *compression_detail,
 		if (format == 't')
 		{
 			if (strcmp(basedir, "-") != 0)
-				(void) fsync_dir_recurse(basedir);
+				(void) fsync_dir_recurse(basedir, sync_method);
 		}
 		else
 		{
-			(void) fsync_pgdata(basedir, serverVersion);
+			(void) fsync_pgdata(basedir, serverVersion, sync_method);
 		}
 	}
 
@@ -2281,6 +2284,7 @@ main(int argc, char **argv)
 		{"no-manifest", no_argument, NULL, 5},
 		{"manifest-force-encode", no_argument, NULL, 6},
 		{"manifest-checksums", required_argument, NULL, 7},
+		{"sync-method", required_argument, NULL, 8},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -2452,6 +2456,10 @@ main(int argc, char **argv)
 			case 7:
 				manifest_checksums = pg_strdup(optarg);
 				break;
+			case 8:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 19eb67e485..3ddbffd471 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -44,6 +44,7 @@ static char *only_filenode = NULL;
 static bool do_sync = true;
 static bool verbose = false;
 static bool showprogress = false;
+static SyncMethod sync_method = SYNC_METHOD_FSYNC;
 
 typedef enum
 {
@@ -87,6 +88,7 @@ usage(void)
 	printf(_("  -f, --filenode=FILENODE  check only relation with specified filenode\n"));
 	printf(_("  -N, --no-sync            do not wait for changes to be written safely to disk\n"));
 	printf(_("  -P, --progress           show progress information\n"));
+	printf(_("      --sync-method=METHOD set method for syncing files to disk\n"));
 	printf(_("  -v, --verbose            output verbose messages\n"));
 	printf(_("  -V, --version            output version information, then exit\n"));
 	printf(_("  -?, --help               show this help, then exit\n"));
@@ -445,6 +447,7 @@ main(int argc, char *argv[])
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"verbose", no_argument, NULL, 'v'},
+		{"sync-method", required_argument, NULL, 1},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -503,6 +506,10 @@ main(int argc, char *argv[])
 			case 'v':
 				verbose = true;
 				break;
+			case 1:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -633,7 +640,7 @@ main(int argc, char *argv[])
 		if (do_sync)
 		{
 			pg_log_info("syncing data directory");
-			fsync_pgdata(DataDir, PG_VERSION_NUM);
+			fsync_pgdata(DataDir, PG_VERSION_NUM, sync_method);
 		}
 
 		pg_log_info("updating control file");
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index aba780ef4b..7a2bb92938 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -24,6 +24,7 @@
 #define PG_BACKUP_H
 
 #include "common/compression.h"
+#include "common/file_utils.h"
 #include "fe_utils/simple_list.h"
 #include "libpq-fe.h"
 
@@ -307,7 +308,8 @@ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt);
 extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 							  const pg_compress_specification compression_spec,
 							  bool dosync, ArchiveMode mode,
-							  SetupWorkerPtrType setupDumpWorker);
+							  SetupWorkerPtrType setupDumpWorker,
+							  SyncMethod sync_method);
 
 /* The --list option */
 extern void PrintTOCSummary(Archive *AHX);
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 39ebcfec32..979db4bba6 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -66,7 +66,8 @@ typedef struct _parallelReadyList
 static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
 							   const pg_compress_specification compression_spec,
 							   bool dosync, ArchiveMode mode,
-							   SetupWorkerPtrType setupWorkerPtr);
+							   SetupWorkerPtrType setupWorkerPtr,
+							   SyncMethod sync_method);
 static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te);
 static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData);
 static char *sanitize_line(const char *str, bool want_hyphen);
@@ -238,11 +239,12 @@ Archive *
 CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 			  const pg_compress_specification compression_spec,
 			  bool dosync, ArchiveMode mode,
-			  SetupWorkerPtrType setupDumpWorker)
+			  SetupWorkerPtrType setupDumpWorker,
+			  SyncMethod sync_method)
 
 {
 	ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression_spec,
-								 dosync, mode, setupDumpWorker);
+								 dosync, mode, setupDumpWorker, sync_method);
 
 	return (Archive *) AH;
 }
@@ -257,7 +259,7 @@ OpenArchive(const char *FileSpec, const ArchiveFormat fmt)
 
 	compression_spec.algorithm = PG_COMPRESSION_NONE;
 	AH = _allocAH(FileSpec, fmt, compression_spec, true,
-				  archModeRead, setupRestoreWorker);
+				  archModeRead, setupRestoreWorker, SYNC_METHOD_FSYNC);
 
 	return (Archive *) AH;
 }
@@ -2233,7 +2235,7 @@ static ArchiveHandle *
 _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 		 const pg_compress_specification compression_spec,
 		 bool dosync, ArchiveMode mode,
-		 SetupWorkerPtrType setupWorkerPtr)
+		 SetupWorkerPtrType setupWorkerPtr, SyncMethod sync_method)
 {
 	ArchiveHandle *AH;
 	CompressFileHandle *CFH;
@@ -2287,6 +2289,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 	AH->mode = mode;
 	AH->compression_spec = compression_spec;
 	AH->dosync = dosync;
+	AH->sync_method = sync_method;
 
 	memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
 
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 18b38c17ab..e32e00547b 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -312,6 +312,7 @@ struct _archiveHandle
 	pg_compress_specification compression_spec; /* Requested specification for
 												 * compression */
 	bool		dosync;			/* data requested to be synced on sight */
+	SyncMethod	sync_method;
 	ArchiveMode mode;			/* File mode - r or w */
 	void	   *formatData;		/* Header data specific to file format */
 
diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c
index 7f2ac7c7fd..6faa3a511f 100644
--- a/src/bin/pg_dump/pg_backup_directory.c
+++ b/src/bin/pg_dump/pg_backup_directory.c
@@ -613,7 +613,7 @@ _CloseArchive(ArchiveHandle *AH)
 		 * individually. Just recurse once through all the files generated.
 		 */
 		if (AH->dosync)
-			fsync_dir_recurse(ctx->directory);
+			fsync_dir_recurse(ctx->directory, AH->sync_method);
 	}
 	AH->FH = NULL;
 }
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 5dab1ba9ea..b8117fe536 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -357,6 +357,7 @@ main(int argc, char **argv)
 	char	   *compression_algorithm_str = "none";
 	char	   *error_detail = NULL;
 	bool		user_compression_defined = false;
+	SyncMethod	sync_method = SYNC_METHOD_FSYNC;
 
 	static DumpOptions dopt;
 
@@ -431,6 +432,7 @@ main(int argc, char **argv)
 		{"table-and-children", required_argument, NULL, 12},
 		{"exclude-table-and-children", required_argument, NULL, 13},
 		{"exclude-table-data-and-children", required_argument, NULL, 14},
+		{"sync-method", required_argument, NULL, 15},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -657,6 +659,11 @@ main(int argc, char **argv)
 										  optarg);
 				break;
 
+			case 15:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit_nicely(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -777,7 +784,7 @@ main(int argc, char **argv)
 
 	/* Open the output file */
 	fout = CreateArchive(filename, archiveFormat, compression_spec,
-						 dosync, archiveMode, setupDumpWorker);
+						 dosync, archiveMode, setupDumpWorker, sync_method);
 
 	/* Make dump options accessible right away */
 	SetArchiveOptions(fout, &dopt, NULL);
@@ -1068,6 +1075,7 @@ help(const char *progname)
 			 "                               compress as specified\n"));
 	printf(_("  --lock-wait-timeout=TIMEOUT  fail after waiting TIMEOUT for a table lock\n"));
 	printf(_("  --no-sync                    do not wait for changes to be written safely to disk\n"));
+	printf(_("  --sync-method=METHOD         set method for syncing files to disk\n"));
 	printf(_("  -?, --help                   show this help, then exit\n"));
 
 	printf(_("\nOptions controlling the output content:\n"));
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 25996b4da4..451fb1856e 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -296,7 +296,7 @@ sync_target_dir(void)
 	if (!do_sync || dry_run)
 		return;
 
-	fsync_pgdata(datadir_target, PG_VERSION_NUM);
+	fsync_pgdata(datadir_target, PG_VERSION_NUM, sync_method);
 }
 
 
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index f7f3b8227f..25cffb897d 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -22,6 +22,7 @@
 #include "common/file_perm.h"
 #include "common/restricted_token.h"
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/recovery_gen.h"
 #include "fe_utils/string_utils.h"
 #include "file_ops.h"
@@ -74,6 +75,7 @@ bool		showprogress = false;
 bool		dry_run = false;
 bool		do_sync = true;
 bool		restore_wal = false;
+SyncMethod	sync_method = SYNC_METHOD_FSYNC;
 
 /* Target history */
 TimeLineHistoryEntry *targetHistory;
@@ -107,6 +109,7 @@ usage(const char *progname)
 			 "                                 file when running target cluster\n"));
 	printf(_("      --debug                    write a lot of debug messages\n"));
 	printf(_("      --no-ensure-shutdown       do not automatically fix unclean shutdown\n"));
+	printf(_("      --sync-method=METHOD       set method for syncing files to disk\n"));
 	printf(_("  -V, --version                  output version information, then exit\n"));
 	printf(_("  -?, --help                     show this help, then exit\n"));
 	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
@@ -131,6 +134,7 @@ main(int argc, char **argv)
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
+		{"sync-method", required_argument, NULL, 6},
 		{NULL, 0, NULL, 0}
 	};
 	int			option_index;
@@ -218,6 +222,11 @@ main(int argc, char **argv)
 				config_file = pg_strdup(optarg);
 				break;
 
+			case 6:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ef8bdc1fbb..a7ce754880 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -13,6 +13,7 @@
 
 #include "access/timeline.h"
 #include "common/logging.h"
+#include "common/file_utils.h"
 #include "datapagemap.h"
 #include "libpq-fe.h"
 #include "storage/block.h"
@@ -24,6 +25,7 @@ extern bool showprogress;
 extern bool dry_run;
 extern bool do_sync;
 extern int	WalSegSz;
+extern SyncMethod sync_method;
 
 /* Target history */
 extern TimeLineHistoryEntry *targetHistory;
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 640361009e..44675131a2 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -14,6 +14,7 @@
 #endif
 
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "getopt_long.h"
 #include "pg_upgrade.h"
 #include "utils/pidfile.h"
@@ -57,12 +58,14 @@ parseCommandLine(int argc, char *argv[])
 		{"verbose", no_argument, NULL, 'v'},
 		{"clone", no_argument, NULL, 1},
 		{"copy", no_argument, NULL, 2},
+		{"sync-method", required_argument, NULL, 3},
 
 		{NULL, 0, NULL, 0}
 	};
 	int			option;			/* Command line option */
 	int			optindex = 0;	/* used by getopt_long */
 	int			os_user_effective_id;
+	SyncMethod	unused;
 
 	user_opts.do_sync = true;
 	user_opts.transfer_mode = TRANSFER_MODE_COPY;
@@ -199,6 +202,12 @@ parseCommandLine(int argc, char *argv[])
 				user_opts.transfer_mode = TRANSFER_MODE_COPY;
 				break;
 
+			case 3:
+				if (!parse_sync_method(optarg, &unused))
+					exit(1);
+				user_opts.sync_method = pg_strdup(optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
 						os_info.progname);
@@ -209,6 +218,9 @@ parseCommandLine(int argc, char *argv[])
 	if (optind < argc)
 		pg_fatal("too many command-line arguments (first is \"%s\")", argv[optind]);
 
+	if (!user_opts.sync_method)
+		user_opts.sync_method = pg_strdup("fsync");
+
 	if (log_opts.verbose)
 		pg_log(PG_REPORT, "Running in verbose mode");
 
@@ -289,6 +301,7 @@ usage(void)
 	printf(_("  -V, --version                 display version information, then exit\n"));
 	printf(_("  --clone                       clone instead of copying files to new cluster\n"));
 	printf(_("  --copy                        copy files to new cluster (default)\n"));
+	printf(_("  --sync-method=METHOD          set method for syncing files to disk\n"));
 	printf(_("  -?, --help                    show this help, then exit\n"));
 	printf(_("\n"
 			 "Before running pg_upgrade you must:\n"
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 4562dafcff..96bfb67167 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -192,8 +192,10 @@ main(int argc, char **argv)
 	{
 		prep_status("Sync data directory to disk");
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-				  "\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir,
-				  new_cluster.pgdata);
+				  "\"%s/initdb\" --sync-only \"%s\" --sync-method %s",
+				  new_cluster.bindir,
+				  new_cluster.pgdata,
+				  user_opts.sync_method);
 		check_ok();
 	}
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 3eea0139c7..13457b2f75 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -304,6 +304,7 @@ typedef struct
 	transferMode transfer_mode; /* copy files or link them? */
 	int			jobs;			/* number of processes/threads to use */
 	char	   *socketdir;		/* directory to use for Unix sockets */
+	char	   *sync_method;
 } UserOpts;
 
 typedef struct
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 74833c4acb..8aa7784b5f 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -51,6 +51,31 @@ static void walkdir(const char *path,
 					int (*action) (const char *fname, bool isdir),
 					bool process_symlinks);
 
+#ifdef HAVE_SYNCFS
+static void
+do_syncfs(const char *path)
+{
+	int			fd;
+
+	fd = open(path, O_RDONLY, 0);
+
+	if (fd < 0)
+	{
+		pg_log_error("could not open file \"%s\": %m", path);
+		return;
+	}
+
+	if (syncfs(fd) < 0)
+	{
+		pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+		(void) close(fd);
+		exit(EXIT_FAILURE);
+	}
+
+	(void) close(fd);
+}
+#endif
+
 /*
  * Issue fsync recursively on PGDATA and all its contents.
  *
@@ -63,7 +88,8 @@ static void walkdir(const char *path,
  */
 void
 fsync_pgdata(const char *pg_data,
-			 int serverVersion)
+			 int serverVersion,
+			 SyncMethod sync_method)
 {
 	bool		xlog_is_symlink;
 	char		pg_wal[MAXPGPATH];
@@ -89,6 +115,55 @@ fsync_pgdata(const char *pg_data,
 			xlog_is_symlink = true;
 	}
 
+#ifdef HAVE_SYNCFS
+	if (sync_method == SYNC_METHOD_SYNCFS)
+	{
+		DIR		   *dir;
+		struct dirent *de;
+
+		/*
+		 * On Linux, we don't have to open every single file one by one.  We
+		 * can use syncfs() to sync whole filesystems.  We only expect
+		 * filesystem boundaries to exist where we tolerate symlinks, namely
+		 * pg_wal and the tablespaces, so we call syncfs() for each of those
+		 * directories.
+		 */
+
+		/* Sync the top level pgdata directory. */
+		do_syncfs(pg_data);
+
+		/* If any tablespaces are configured, sync each of those. */
+		dir = opendir(pg_tblspc);
+		if (dir == NULL)
+			pg_log_error("could not open directory \"%s\": %m", pg_tblspc);
+		else
+		{
+			while (errno = 0, (de = readdir(dir)) != NULL)
+			{
+				char		subpath[MAXPGPATH * 2];
+
+				if (strcmp(de->d_name, ".") == 0 ||
+					strcmp(de->d_name, "..") == 0)
+					continue;
+
+				snprintf(subpath, sizeof(subpath), "%s/%s", pg_tblspc, de->d_name);
+				do_syncfs(subpath);
+			}
+
+			if (errno)
+				pg_log_error("could not read directory \"%s\": %m", pg_tblspc);
+
+			(void) closedir(dir);
+		}
+
+		/* If pg_wal is a symlink, process that too. */
+		if (xlog_is_symlink)
+			do_syncfs(pg_wal);
+
+		return;
+	}
+#endif							/* HAVE_SYNCFS */
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.
@@ -121,8 +196,20 @@ fsync_pgdata(const char *pg_data,
  * This is a convenient wrapper on top of walkdir().
  */
 void
-fsync_dir_recurse(const char *dir)
+fsync_dir_recurse(const char *dir, SyncMethod sync_method)
 {
+#ifdef HAVE_SYNCFS
+	if (sync_method == SYNC_METHOD_SYNCFS)
+	{
+		/*
+		 * On Linux, we don't have to open every single file one by one.  We
+		 * can use syncfs() to sync the whole filesystem.
+		 */
+		do_syncfs(dir);
+		return;
+	}
+#endif							/* HAVE_SYNCFS */
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.
diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c
index 763c991015..c65aedf8f5 100644
--- a/src/fe_utils/option_utils.c
+++ b/src/fe_utils/option_utils.c
@@ -82,3 +82,21 @@ option_parse_int(const char *optarg, const char *optname,
 		*result = val;
 	return true;
 }
+
+bool
+parse_sync_method(const char *optarg, SyncMethod *sync_method)
+{
+	if (strcmp(optarg, "fsync") == 0)
+		*sync_method = SYNC_METHOD_FSYNC;
+#ifdef HAVE_SYNCFS
+	else if (strcmp(optarg, "syncfs") == 0)
+		*sync_method = SYNC_METHOD_SYNCFS;
+#endif
+	else
+	{
+		pg_log_error("unrecognized sync method: %s", optarg);
+		return false;
+	}
+
+	return true;
+}
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index b7efa1226d..3044f7f742 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -27,12 +27,23 @@ typedef enum PGFileType
 struct iovec;					/* avoid including port/pg_iovec.h here */
 
 #ifdef FRONTEND
+
+typedef enum SyncMethod
+{
+	SYNC_METHOD_FSYNC,
+#ifdef HAVE_SYNCFS
+	SYNC_METHOD_SYNCFS
+#endif
+} SyncMethod;
+
 extern int	fsync_fname(const char *fname, bool isdir);
-extern void fsync_pgdata(const char *pg_data, int serverVersion);
-extern void fsync_dir_recurse(const char *dir);
+extern void fsync_pgdata(const char *pg_data, int serverVersion,
+						 SyncMethod sync_method);
+extern void fsync_dir_recurse(const char *dir, SyncMethod sync_method);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
-#endif
+
+#endif							/* FRONTEND */
 
 extern PGFileType get_dirent_type(const char *path,
 								  const struct dirent *de,
diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h
index b7b0654cee..3ad8737219 100644
--- a/src/include/fe_utils/option_utils.h
+++ b/src/include/fe_utils/option_utils.h
@@ -14,6 +14,8 @@
 
 #include "postgres_fe.h"
 
+#include "common/file_utils.h"
+
 typedef void (*help_handler) (const char *progname);
 
 extern void handle_help_version_opts(int argc, char *argv[],
@@ -22,5 +24,6 @@ extern void handle_help_version_opts(int argc, char *argv[],
 extern bool option_parse_int(const char *optarg, const char *optname,
 							 int min_range, int max_range,
 							 int *result);
+extern bool parse_sync_method(const char *optarg, SyncMethod *sync_method);
 
 #endif							/* OPTION_UTILS_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 6791a406fc..196f20e716 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -43,6 +43,8 @@
 #ifndef FD_H
 #define FD_H
 
+#ifndef FRONTEND
+
 #include <dirent.h>
 #include <fcntl.h>
 
@@ -195,6 +197,8 @@ extern int	durable_unlink(const char *fname, int elevel);
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
+#endif							/* ! FRONTEND */
+
 /* Filename components */
 #define PG_TEMP_FILES_DIR "pgsql_tmp"
 #define PG_TEMP_FILE_PREFIX "pgsql_tmp"
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 66823bc2a7..bebb2b8e49 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2677,6 +2677,7 @@ SupportRequestSelectivity
 SupportRequestSimplify
 SupportRequestWFuncMonotonic
 Syn
+SyncMethod
 SyncOps
 SyncRepConfigData
 SyncRepStandbyData
-- 
2.25.1

#11Nathan Bossart
nathandbossart@gmail.com
In reply to: Nathan Bossart (#10)
Re: should frontend tools use syncfs() ?

I ran a couple of tests for pg_upgrade with 100k tables (created using the
script here [0]/messages/by-id/3612876.1689443232@sss.pgh.pa.us) in order to demonstrate the potential benefits of this
patch.

pg_upgrade --sync-method fsync
real 5m50.072s
user 0m10.606s
sys 0m40.298s

pg_upgrade --sync-method syncfs
real 3m44.096s
user 0m8.906s
sys 0m26.398s

pg_upgrade --no-sync
real 3m27.697s
user 0m9.056s
sys 0m26.605s

[0]: /messages/by-id/3612876.1689443232@sss.pgh.pa.us

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#12Michael Paquier
michael@paquier.xyz
In reply to: Nathan Bossart (#11)
Re: should frontend tools use syncfs() ?

On Tue, Aug 08, 2023 at 01:06:06PM -0700, Nathan Bossart wrote:

I ran a couple of tests for pg_upgrade with 100k tables (created using the
script here [0]) in order to demonstrate the potential benefits of this
patch.

That shows some nice numbers with many files, indeed. How does the
size of each file influence the difference in time?

+        else
+        {
+            while (errno = 0, (de = readdir(dir)) != NULL)
+            {
+                char        subpath[MAXPGPATH * 2];
+
+                if (strcmp(de->d_name, ".") == 0 ||
+                    strcmp(de->d_name, "..") == 0)
+                    continue;

It seems to me that there is no need to do that for in-place
tablespaces. There are relative paths in pg_tblspc/, so they would be
taken care of by the syncfs() done on the main data folder.

This does not really check if the mount points of each tablespace is
different, as well. For example, imagine that you have two
tablespaces within the same disk, syncfs() twice. Perhaps, the
current logic is OK anyway as long as the behavior is optional, but it
should be explained in the docs, at least.

I'm finding a bit confusing that fsync_pgdata() is coded in such a way
that it does a silent fallback to the cascading syncs through
walkdir() when syncfs is specified but not available in the build.
Perhaps an error is more helpful because one would then know that they
are trying something that's not around?

+       pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+       (void) close(fd);
+       exit(EXIT_FAILURE);

walkdir() reports errors and does not consider these fatal. Why the
early exit()?

I am a bit concerned about the amount of duplication this patch
introduces in the docs. Perhaps this had better be moved into a new
section of the docs to explain the tradeoffs, with each tool linking
to it?

Do we actually need --no-sync at all if --sync-method is around? We
could have an extra --sync-method=none at option level with --no-sync
still around mainly for compatibility? Or perhaps that's just
over-designing things?
--
Michael

#13Nathan Bossart
nathandbossart@gmail.com
In reply to: Michael Paquier (#12)
Re: should frontend tools use syncfs() ?

Thanks for taking a look.

On Wed, Aug 16, 2023 at 08:10:10AM +0900, Michael Paquier wrote:

On Tue, Aug 08, 2023 at 01:06:06PM -0700, Nathan Bossart wrote:

I ran a couple of tests for pg_upgrade with 100k tables (created using the
script here [0]) in order to demonstrate the potential benefits of this
patch.

That shows some nice numbers with many files, indeed. How does the
size of each file influence the difference in time?

IME the number of files tends to influence the duration much more than the
size. I assume this is because most files are already sync'd in these code
paths that loop through every file.

+        else
+        {
+            while (errno = 0, (de = readdir(dir)) != NULL)
+            {
+                char        subpath[MAXPGPATH * 2];
+
+                if (strcmp(de->d_name, ".") == 0 ||
+                    strcmp(de->d_name, "..") == 0)
+                    continue;

It seems to me that there is no need to do that for in-place
tablespaces. There are relative paths in pg_tblspc/, so they would be
taken care of by the syncfs() done on the main data folder.

This does not really check if the mount points of each tablespace is
different, as well. For example, imagine that you have two
tablespaces within the same disk, syncfs() twice. Perhaps, the
current logic is OK anyway as long as the behavior is optional, but it
should be explained in the docs, at least.

True. But I don't know if checking the mount point of each tablespace is
worth the complexity. In the worst case, we'll call syncfs() on the same
file system a few times, which is probably still much faster in most cases.
FWIW this is what recovery_init_sync_method does today, and I'm not aware
of any complaints about this behavior.

The patch does have the following note:

+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.

Do you think that is sufficient, or do you think we should really clearly
explain that you could end up calling syncfs() on the same file system a
few times if your tablespaces are on the same disk? I personally feel
like that'd be a bit too verbose for the already lengthy descriptions of
this setting.

I'm finding a bit confusing that fsync_pgdata() is coded in such a way
that it does a silent fallback to the cascading syncs through
walkdir() when syncfs is specified but not available in the build.
Perhaps an error is more helpful because one would then know that they
are trying something that's not around?

If syncfs() is not available, SYNC_METHOD_SYNCFS won't even be defined, and
parse_sync_method() should fail if "syncfs" is specified. Furthermore, the
relevant part of fsync_pgdata() won't be compiled in whenever HAVE_SYNCFS
is not defined.

+       pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+       (void) close(fd);
+       exit(EXIT_FAILURE);

walkdir() reports errors and does not consider these fatal. Why the
early exit()?

I know it claims to, but fsync_fname() does exit when fsync() fails:

returncode = fsync(fd);

/*
* Some OSes don't allow us to fsync directories at all, so we can ignore
* those errors. Anything else needs to be reported.
*/
if (returncode != 0 && !(isdir && (errno == EBADF || errno == EINVAL)))
{
pg_log_error("could not fsync file \"%s\": %m", fname);
(void) close(fd);
exit(EXIT_FAILURE);
}

I suspect that the current code does not treat failures for things like
open() as fatal because it's likely due to a lack of permissions on the
file, but it does treat failures to fsync() as fatal because it is more
likely to indicate that ѕomething is very wrong. I don't know whether this
reasoning is sound, but I tried to match the current convention in the
syncfs() code.

I am a bit concerned about the amount of duplication this patch
introduces in the docs. Perhaps this had better be moved into a new
section of the docs to explain the tradeoffs, with each tool linking
to it?

Yeah, this crossed my mind. Do you know of any existing examples of
options with links to a common section? One problem with this approach is
that there are small differences in the wording for some of the frontend
utilities, so it might be difficult to cleanly unite these sections.

Do we actually need --no-sync at all if --sync-method is around? We
could have an extra --sync-method=none at option level with --no-sync
still around mainly for compatibility? Or perhaps that's just
over-designing things?

I don't have a strong opinion. We could take up deprecating --no-sync in a
follow-up thread, though. Like you said, we'll probably need to keep it
around for backward compatibility, so it might not be worth the trouble.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#14Nathan Bossart
nathandbossart@gmail.com
In reply to: Nathan Bossart (#13)
Re: should frontend tools use syncfs() ?

On Wed, Aug 16, 2023 at 08:17:05AM -0700, Nathan Bossart wrote:

On Wed, Aug 16, 2023 at 08:10:10AM +0900, Michael Paquier wrote:

+       pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+       (void) close(fd);
+       exit(EXIT_FAILURE);

walkdir() reports errors and does not consider these fatal. Why the
early exit()?

I know it claims to, but fsync_fname() does exit when fsync() fails:

returncode = fsync(fd);

/*
* Some OSes don't allow us to fsync directories at all, so we can ignore
* those errors. Anything else needs to be reported.
*/
if (returncode != 0 && !(isdir && (errno == EBADF || errno == EINVAL)))
{
pg_log_error("could not fsync file \"%s\": %m", fname);
(void) close(fd);
exit(EXIT_FAILURE);
}

I suspect that the current code does not treat failures for things like
open() as fatal because it's likely due to a lack of permissions on the
file, but it does treat failures to fsync() as fatal because it is more
likely to indicate that ѕomething is very wrong. I don't know whether this
reasoning is sound, but I tried to match the current convention in the
syncfs() code.

Ah, it looks like this code used to treat fsync() errors as non-fatal, but
it was changed in commit 1420617. I still find it a bit strange that some
errors that prevent a file from being sync'd are non-fatal while others
_are_ fatal, but that is probably a topic for another thread.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#15Michael Paquier
michael@paquier.xyz
In reply to: Nathan Bossart (#14)
Re: should frontend tools use syncfs() ?

On Wed, Aug 16, 2023 at 08:23:25AM -0700, Nathan Bossart wrote:

Ah, it looks like this code used to treat fsync() errors as non-fatal, but
it was changed in commit 1420617. I still find it a bit strange that some
errors that prevent a file from being sync'd are non-fatal while others
_are_ fatal, but that is probably a topic for another thread.

Right. That rings a bell.
--
Michael

#16Michael Paquier
michael@paquier.xyz
In reply to: Nathan Bossart (#13)
Re: should frontend tools use syncfs() ?

On Wed, Aug 16, 2023 at 08:17:05AM -0700, Nathan Bossart wrote:

On Wed, Aug 16, 2023 at 08:10:10AM +0900, Michael Paquier wrote:

On Tue, Aug 08, 2023 at 01:06:06PM -0700, Nathan Bossart wrote:
+        else
+        {
+            while (errno = 0, (de = readdir(dir)) != NULL)
+            {
+                char        subpath[MAXPGPATH * 2];
+
+                if (strcmp(de->d_name, ".") == 0 ||
+                    strcmp(de->d_name, "..") == 0)
+                    continue;

It seems to me that there is no need to do that for in-place
tablespaces. There are relative paths in pg_tblspc/, so they would be
taken care of by the syncfs() done on the main data folder.

This does not really check if the mount points of each tablespace is
different, as well. For example, imagine that you have two
tablespaces within the same disk, syncfs() twice. Perhaps, the
current logic is OK anyway as long as the behavior is optional, but it
should be explained in the docs, at least.

True. But I don't know if checking the mount point of each tablespace is
worth the complexity.

Perhaps worth a note, this would depend on statvfs(), which is not
that portable the last time I looked at it (NetBSD, some BSD-ish? And
of course WIN32).

In the worst case, we'll call syncfs() on the same
file system a few times, which is probably still much faster in most cases.
FWIW this is what recovery_init_sync_method does today, and I'm not aware
of any complaints about this behavior.

Hmm. Okay.

The patch does have the following note:

+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.

Do you think that is sufficient, or do you think we should really clearly
explain that you could end up calling syncfs() on the same file system a
few times if your tablespaces are on the same disk? I personally feel
like that'd be a bit too verbose for the already lengthy descriptions of
this setting.

It does not hurt to mention that the code syncfs()-es each tablespace
path (not in-place tablespaces), ignoring locations that share the
same mounting point, IMO. For that, we'd better rely on
get_dirent_type() like the normal sync path.

I'm finding a bit confusing that fsync_pgdata() is coded in such a way
that it does a silent fallback to the cascading syncs through
walkdir() when syncfs is specified but not available in the build.
Perhaps an error is more helpful because one would then know that they
are trying something that's not around?

If syncfs() is not available, SYNC_METHOD_SYNCFS won't even be defined, and
parse_sync_method() should fail if "syncfs" is specified. Furthermore, the
relevant part of fsync_pgdata() won't be compiled in whenever HAVE_SYNCFS
is not defined.

That feels structurally inconsistent with what we do with other
option sets that have library dependencies. For example, look at
compression.h and what happens for pg_compress_algorithm. So, it
seems to me that it would be more friendly to list SYNC_METHOD_SYNCFS
all the time in SyncMethod even if HAVE_SYNCFS is not around, and at
least generate a warning rather than having a platform-dependent set
of options?

SyncMethod may be a bit too generic as name for the option structure.
How about a PGSyncMethod or pg_sync_method?

I am a bit concerned about the amount of duplication this patch
introduces in the docs. Perhaps this had better be moved into a new
section of the docs to explain the tradeoffs, with each tool linking
to it?

Yeah, this crossed my mind. Do you know of any existing examples of
options with links to a common section? One problem with this approach is
that there are small differences in the wording for some of the frontend
utilities, so it might be difficult to cleanly unite these sections.

The closest thing I can think of is Color Support in section
Appendixes, that describes something shared across a lot of binaries
(that would be 6 tools with this patch).

Do we actually need --no-sync at all if --sync-method is around? We
could have an extra --sync-method=none at option level with --no-sync
still around mainly for compatibility? Or perhaps that's just
over-designing things?

I don't have a strong opinion. We could take up deprecating --no-sync in a
follow-up thread, though. Like you said, we'll probably need to keep it
around for backward compatibility, so it might not be worth the trouble.

Okay, maybe that's not worth it.
--
Michael

#17Nathan Bossart
nathandbossart@gmail.com
In reply to: Michael Paquier (#16)
1 attachment(s)
Re: should frontend tools use syncfs() ?

On Thu, Aug 17, 2023 at 12:50:31PM +0900, Michael Paquier wrote:

On Wed, Aug 16, 2023 at 08:17:05AM -0700, Nathan Bossart wrote:

The patch does have the following note:

+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.

Do you think that is sufficient, or do you think we should really clearly
explain that you could end up calling syncfs() on the same file system a
few times if your tablespaces are on the same disk? I personally feel
like that'd be a bit too verbose for the already lengthy descriptions of
this setting.

It does not hurt to mention that the code syncfs()-es each tablespace
path (not in-place tablespaces), ignoring locations that share the
same mounting point, IMO. For that, we'd better rely on
get_dirent_type() like the normal sync path.

But it doesn't ignore tablespace locations that share the same mount point.
It simply calls syncfs() for each tablespace path, just like
recovery_init_sync_method.

If syncfs() is not available, SYNC_METHOD_SYNCFS won't even be defined, and
parse_sync_method() should fail if "syncfs" is specified. Furthermore, the
relevant part of fsync_pgdata() won't be compiled in whenever HAVE_SYNCFS
is not defined.

That feels structurally inconsistent with what we do with other
option sets that have library dependencies. For example, look at
compression.h and what happens for pg_compress_algorithm. So, it
seems to me that it would be more friendly to list SYNC_METHOD_SYNCFS
all the time in SyncMethod even if HAVE_SYNCFS is not around, and at
least generate a warning rather than having a platform-dependent set
of options?

Done.

SyncMethod may be a bit too generic as name for the option structure.
How about a PGSyncMethod or pg_sync_method?

In v4, I renamed this to DataDirSyncMethod and merged it with
RecoveryInitSyncMethod. I'm not wedded to the name, but that seemed
generic enough for both use-cases. As an aside, we need to be careful to
distinguish these options from those for wal_sync_method.

Yeah, this crossed my mind. Do you know of any existing examples of
options with links to a common section? One problem with this approach is
that there are small differences in the wording for some of the frontend
utilities, so it might be difficult to cleanly unite these sections.

The closest thing I can think of is Color Support in section
Appendixes, that describes something shared across a lot of binaries
(that would be 6 tools with this patch).

If I added a "syncfs() Caveats" appendix for the common parts of the docs,
it would only say something like the following:

Using syncfs may be a lot faster than using fsync, because it doesn't
need to open each file one by one. On the other hand, it may be slower
if a file system is shared by other applications that modify a lot of
files, since those files will also be written to disk. Furthermore, on
versions of Linux before 5.8, I/O errors encountered while writing data
to disk may not be reported to the calling program, and relevant error
messages may appear only in kernel logs.

Does that seem reasonable? It would reduce the duplication a little bit,
but I'm not sure it's really much of an improvement in this case.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

Attachments:

v4-0001-allow-syncfs-in-frontend-utilities.patchtext/x-diff; charset=us-asciiDownload
From 5edd4b4570617a897043086cf203af57a5596e11 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Fri, 28 Jul 2023 15:56:24 -0700
Subject: [PATCH v4 1/1] allow syncfs in frontend utilities

---
 doc/src/sgml/ref/initdb.sgml          | 27 ++++++++
 doc/src/sgml/ref/pg_basebackup.sgml   | 30 +++++++++
 doc/src/sgml/ref/pg_checksums.sgml    | 27 ++++++++
 doc/src/sgml/ref/pg_dump.sgml         | 27 ++++++++
 doc/src/sgml/ref/pg_rewind.sgml       | 27 ++++++++
 doc/src/sgml/ref/pgupgrade.sgml       | 29 +++++++++
 src/backend/storage/file/fd.c         |  4 +-
 src/backend/utils/misc/guc_tables.c   |  7 ++-
 src/bin/initdb/initdb.c               | 12 +++-
 src/bin/pg_basebackup/pg_basebackup.c | 12 +++-
 src/bin/pg_checksums/pg_checksums.c   |  9 ++-
 src/bin/pg_dump/pg_backup.h           |  4 +-
 src/bin/pg_dump/pg_backup_archiver.c  | 14 +++--
 src/bin/pg_dump/pg_backup_archiver.h  |  1 +
 src/bin/pg_dump/pg_backup_directory.c |  2 +-
 src/bin/pg_dump/pg_dump.c             | 10 ++-
 src/bin/pg_rewind/file_ops.c          |  2 +-
 src/bin/pg_rewind/pg_rewind.c         |  9 +++
 src/bin/pg_rewind/pg_rewind.h         |  2 +
 src/bin/pg_upgrade/option.c           | 13 ++++
 src/bin/pg_upgrade/pg_upgrade.c       |  6 +-
 src/bin/pg_upgrade/pg_upgrade.h       |  1 +
 src/common/file_utils.c               | 91 ++++++++++++++++++++++++++-
 src/fe_utils/option_utils.c           | 24 +++++++
 src/include/common/file_utils.h       | 15 ++++-
 src/include/fe_utils/option_utils.h   |  4 ++
 src/include/storage/fd.h              | 10 ++-
 src/tools/pgindent/typedefs.list      |  1 +
 28 files changed, 388 insertions(+), 32 deletions(-)

diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 22f1011781..872fef5705 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -365,6 +365,33 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry id="app-initdb-option-sync-method">
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>initdb</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  This may be a lot
+        faster than the <literal>fsync</literal> setting, because it doesn't
+        need to open each file one by one.  On the other hand, it may be slower
+        if a file system is shared by other applications that modify a lot of
+        files, since those files will also be written to disk.  Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>initdb</command>, and relevant
+        error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="app-initdb-option-sync-only">
       <term><option>-S</option></term>
       <term><option>--sync-only</option></term>
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 79d3e657c3..af8eb43251 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -594,6 +594,36 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_basebackup</command> will recursively open and synchronize
+        all files in the backup directory.  When the plain format is used, the
+        search for files will follow symbolic links for the WAL directory and
+        each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        backup directory.  When the plain format is used,
+        <command>pg_basebackup</command> will also synchronize the file systems
+        that contain the WAL files and each tablespace.  This may be a lot
+        faster than the <literal>fsync</literal> setting, because it doesn't
+        need to open each file one by one.  On the other hand, it may be slower
+        if a file system is shared by other applications that modify a lot of
+        files, since those files will also be written to disk.  Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>pg_basebackup</command>, and
+        relevant error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml
index a3d0b0f0a3..70fb65a474 100644
--- a/doc/src/sgml/ref/pg_checksums.sgml
+++ b/doc/src/sgml/ref/pg_checksums.sgml
@@ -139,6 +139,33 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_checksums</command> will recursively open and synchronize
+        all files in the data directory.  The search for files will follow
+        symbolic links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  This may be a lot
+        faster than the <literal>fsync</literal> setting, because it doesn't
+        need to open each file one by one.  On the other hand, it may be slower
+        if a file system is shared by other applications that modify a lot of
+        files, since those files will also be written to disk. Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>pg_checksums</command>, and
+        relevant error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index a3cf0608f5..88139a3064 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -1179,6 +1179,33 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_dump --format=directory</command> will recursively open and
+        synchronize all files in the archive directory.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        archive directory.  This may be a lot faster than the
+        <literal>fsync</literal> setting, because it doesn't need to open each
+        file one by one.  On the other hand, it may be slower if the file
+        system is shared by other applications that modify a lot of files,
+        since those files will also be written to disk.  Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>pg_dump</command>, and relevant
+        error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used or
+        <option>--format</option> is not set to <literal>directory</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--table-and-children=<replaceable class="parameter">pattern</replaceable></option></term>
       <listitem>
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 15cddd086b..ed170a4c4c 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -284,6 +284,33 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_rewind</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  This may be a lot
+        faster than the <literal>fsync</literal> setting, because it doesn't
+        need to open each file one by one.  On the other hand, it may be slower
+        if a file system is shared by other applications that modify a lot of
+        files, since those files will also be written to disk.  Furthermore, on
+        versions of Linux before 5.8, I/O errors encountered while writing data
+        to disk may not be reported to <command>pg_rewind</command>, and
+        relevant error messages may appear only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-V</option></term>
       <term><option>--version</option></term>
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 7816b4c685..dd2ae6cbc9 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -190,6 +190,35 @@ PostgreSQL documentation
       variable <envar>PGSOCKETDIR</envar></para></listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_upgrade</command> will recursively open and synchronize all
+        files in the upgraded cluster's data directory.  The search for files
+        will follow symbolic links for the WAL directory and each configured
+        tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        upgrade cluster's data directory, its WAL files, and each tablespace.
+        This may be a lot faster than the <literal>fsync</literal> setting,
+        because it doesn't need to open each file one by one.  On the other
+        hand, it may be slower if a file system is shared by other applications
+        that modify a lot of files, since those files will also be written to
+        disk.  Furthermore, on versions of Linux before 5.8, I/O errors
+        encountered while writing data to disk may not be reported to
+        <command>pg_upgrade</command>, and relevant error messages may appear
+        only in kernel logs.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-U</option> <replaceable>username</replaceable></term>
       <term><option>--username=</option><replaceable>username</replaceable></term>
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index a027a8aabc..206db91217 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -162,7 +162,7 @@ int			max_safe_fds = FD_MINFREE;	/* default if not changed */
 bool		data_sync_retry = false;
 
 /* How SyncDataDirectory() should do its job. */
-int			recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC;
+int			recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 /* Which kinds of files should be opened with PG_O_DIRECT. */
 int			io_direct_flags;
@@ -3513,7 +3513,7 @@ SyncDataDirectory(void)
 	}
 
 #ifdef HAVE_SYNCFS
-	if (recovery_init_sync_method == RECOVERY_INIT_SYNC_METHOD_SYNCFS)
+	if (recovery_init_sync_method == DATA_DIR_SYNC_METHOD_SYNCFS)
 	{
 		DIR		   *dir;
 		struct dirent *de;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index f9dba43b8c..331f65cbe6 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -41,6 +41,7 @@
 #include "commands/trigger.h"
 #include "commands/user.h"
 #include "commands/vacuum.h"
+#include "common/file_utils.h"
 #include "common/scram-common.h"
 #include "jit/jit.h"
 #include "libpq/auth.h"
@@ -430,9 +431,9 @@ StaticAssertDecl(lengthof(ssl_protocol_versions_info) == (PG_TLS1_3_VERSION + 2)
 				 "array length mismatch");
 
 static const struct config_enum_entry recovery_init_sync_method_options[] = {
-	{"fsync", RECOVERY_INIT_SYNC_METHOD_FSYNC, false},
+	{"fsync", DATA_DIR_SYNC_METHOD_FSYNC, false},
 #ifdef HAVE_SYNCFS
-	{"syncfs", RECOVERY_INIT_SYNC_METHOD_SYNCFS, false},
+	{"syncfs", DATA_DIR_SYNC_METHOD_SYNCFS, false},
 #endif
 	{NULL, 0, false}
 };
@@ -4964,7 +4965,7 @@ struct config_enum ConfigureNamesEnum[] =
 			gettext_noop("Sets the method for synchronizing the data directory before crash recovery."),
 		},
 		&recovery_init_sync_method,
-		RECOVERY_INIT_SYNC_METHOD_FSYNC, recovery_init_sync_method_options,
+		DATA_DIR_SYNC_METHOD_FSYNC, recovery_init_sync_method_options,
 		NULL, NULL, NULL
 	},
 
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 3f4167682a..6d7d10cffb 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -76,6 +76,7 @@
 #include "common/restricted_token.h"
 #include "common/string.h"
 #include "common/username.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/string_utils.h"
 #include "getopt_long.h"
 #include "mb/pg_wchar.h"
@@ -165,6 +166,7 @@ static bool data_checksums = false;
 static char *xlog_dir = NULL;
 static char *str_wal_segment_size_mb = NULL;
 static int	wal_segment_size_mb;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 
 /* internal vars */
@@ -2466,6 +2468,7 @@ usage(const char *progname)
 	printf(_("  -N, --no-sync             do not wait for changes to be written safely to disk\n"));
 	printf(_("      --no-instructions     do not print instructions for next steps\n"));
 	printf(_("  -s, --show                show internal settings\n"));
+	printf(_("      --sync-method=METHOD  set method for syncing files to disk\n"));
 	printf(_("  -S, --sync-only           only sync database files to disk, then exit\n"));
 	printf(_("\nOther options:\n"));
 	printf(_("  -V, --version             output version information, then exit\n"));
@@ -3106,6 +3109,7 @@ main(int argc, char *argv[])
 		{"locale-provider", required_argument, NULL, 15},
 		{"icu-locale", required_argument, NULL, 16},
 		{"icu-rules", required_argument, NULL, 17},
+		{"sync-method", required_argument, NULL, 18},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -3285,6 +3289,10 @@ main(int argc, char *argv[])
 			case 17:
 				icu_rules = pg_strdup(optarg);
 				break;
+			case 18:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -3332,7 +3340,7 @@ main(int argc, char *argv[])
 
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 		return 0;
 	}
@@ -3409,7 +3417,7 @@ main(int argc, char *argv[])
 	{
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 	}
 	else
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1dc8efe0cb..977c793a67 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -148,6 +148,7 @@ static bool verify_checksums = true;
 static bool manifest = true;
 static bool manifest_force_encode = false;
 static char *manifest_checksums = NULL;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 static bool success = false;
 static bool made_new_pgdata = false;
@@ -424,6 +425,8 @@ usage(void)
 	printf(_("      --no-slot          prevent creation of temporary replication slot\n"));
 	printf(_("      --no-verify-checksums\n"
 			 "                         do not verify checksums\n"));
+	printf(_("      --sync-method=METHOD\n"
+			 "                         set method for syncing files to disk\n"));
 	printf(_("  -?, --help             show this help, then exit\n"));
 	printf(_("\nConnection options:\n"));
 	printf(_("  -d, --dbname=CONNSTR   connection string\n"));
@@ -2199,11 +2202,11 @@ BaseBackup(char *compression_algorithm, char *compression_detail,
 		if (format == 't')
 		{
 			if (strcmp(basedir, "-") != 0)
-				(void) fsync_dir_recurse(basedir);
+				(void) fsync_dir_recurse(basedir, sync_method);
 		}
 		else
 		{
-			(void) fsync_pgdata(basedir, serverVersion);
+			(void) fsync_pgdata(basedir, serverVersion, sync_method);
 		}
 	}
 
@@ -2281,6 +2284,7 @@ main(int argc, char **argv)
 		{"no-manifest", no_argument, NULL, 5},
 		{"manifest-force-encode", no_argument, NULL, 6},
 		{"manifest-checksums", required_argument, NULL, 7},
+		{"sync-method", required_argument, NULL, 8},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -2452,6 +2456,10 @@ main(int argc, char **argv)
 			case 7:
 				manifest_checksums = pg_strdup(optarg);
 				break;
+			case 8:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 19eb67e485..adcc767b0f 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -44,6 +44,7 @@ static char *only_filenode = NULL;
 static bool do_sync = true;
 static bool verbose = false;
 static bool showprogress = false;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 typedef enum
 {
@@ -87,6 +88,7 @@ usage(void)
 	printf(_("  -f, --filenode=FILENODE  check only relation with specified filenode\n"));
 	printf(_("  -N, --no-sync            do not wait for changes to be written safely to disk\n"));
 	printf(_("  -P, --progress           show progress information\n"));
+	printf(_("      --sync-method=METHOD set method for syncing files to disk\n"));
 	printf(_("  -v, --verbose            output verbose messages\n"));
 	printf(_("  -V, --version            output version information, then exit\n"));
 	printf(_("  -?, --help               show this help, then exit\n"));
@@ -445,6 +447,7 @@ main(int argc, char *argv[])
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"verbose", no_argument, NULL, 'v'},
+		{"sync-method", required_argument, NULL, 1},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -503,6 +506,10 @@ main(int argc, char *argv[])
 			case 'v':
 				verbose = true;
 				break;
+			case 1:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -633,7 +640,7 @@ main(int argc, char *argv[])
 		if (do_sync)
 		{
 			pg_log_info("syncing data directory");
-			fsync_pgdata(DataDir, PG_VERSION_NUM);
+			fsync_pgdata(DataDir, PG_VERSION_NUM, sync_method);
 		}
 
 		pg_log_info("updating control file");
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index aba780ef4b..3a57cdd97d 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -24,6 +24,7 @@
 #define PG_BACKUP_H
 
 #include "common/compression.h"
+#include "common/file_utils.h"
 #include "fe_utils/simple_list.h"
 #include "libpq-fe.h"
 
@@ -307,7 +308,8 @@ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt);
 extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 							  const pg_compress_specification compression_spec,
 							  bool dosync, ArchiveMode mode,
-							  SetupWorkerPtrType setupDumpWorker);
+							  SetupWorkerPtrType setupDumpWorker,
+							  DataDirSyncMethod sync_method);
 
 /* The --list option */
 extern void PrintTOCSummary(Archive *AHX);
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 39ebcfec32..4d83381d84 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -66,7 +66,8 @@ typedef struct _parallelReadyList
 static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
 							   const pg_compress_specification compression_spec,
 							   bool dosync, ArchiveMode mode,
-							   SetupWorkerPtrType setupWorkerPtr);
+							   SetupWorkerPtrType setupWorkerPtr,
+							   DataDirSyncMethod sync_method);
 static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te);
 static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData);
 static char *sanitize_line(const char *str, bool want_hyphen);
@@ -238,11 +239,12 @@ Archive *
 CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 			  const pg_compress_specification compression_spec,
 			  bool dosync, ArchiveMode mode,
-			  SetupWorkerPtrType setupDumpWorker)
+			  SetupWorkerPtrType setupDumpWorker,
+			  DataDirSyncMethod sync_method)
 
 {
 	ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression_spec,
-								 dosync, mode, setupDumpWorker);
+								 dosync, mode, setupDumpWorker, sync_method);
 
 	return (Archive *) AH;
 }
@@ -257,7 +259,8 @@ OpenArchive(const char *FileSpec, const ArchiveFormat fmt)
 
 	compression_spec.algorithm = PG_COMPRESSION_NONE;
 	AH = _allocAH(FileSpec, fmt, compression_spec, true,
-				  archModeRead, setupRestoreWorker);
+				  archModeRead, setupRestoreWorker,
+				  DATA_DIR_SYNC_METHOD_FSYNC);
 
 	return (Archive *) AH;
 }
@@ -2233,7 +2236,7 @@ static ArchiveHandle *
 _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 		 const pg_compress_specification compression_spec,
 		 bool dosync, ArchiveMode mode,
-		 SetupWorkerPtrType setupWorkerPtr)
+		 SetupWorkerPtrType setupWorkerPtr, DataDirSyncMethod sync_method)
 {
 	ArchiveHandle *AH;
 	CompressFileHandle *CFH;
@@ -2287,6 +2290,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 	AH->mode = mode;
 	AH->compression_spec = compression_spec;
 	AH->dosync = dosync;
+	AH->sync_method = sync_method;
 
 	memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
 
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 18b38c17ab..b07673933d 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -312,6 +312,7 @@ struct _archiveHandle
 	pg_compress_specification compression_spec; /* Requested specification for
 												 * compression */
 	bool		dosync;			/* data requested to be synced on sight */
+	DataDirSyncMethod sync_method;
 	ArchiveMode mode;			/* File mode - r or w */
 	void	   *formatData;		/* Header data specific to file format */
 
diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c
index 7f2ac7c7fd..6faa3a511f 100644
--- a/src/bin/pg_dump/pg_backup_directory.c
+++ b/src/bin/pg_dump/pg_backup_directory.c
@@ -613,7 +613,7 @@ _CloseArchive(ArchiveHandle *AH)
 		 * individually. Just recurse once through all the files generated.
 		 */
 		if (AH->dosync)
-			fsync_dir_recurse(ctx->directory);
+			fsync_dir_recurse(ctx->directory, AH->sync_method);
 	}
 	AH->FH = NULL;
 }
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 5dab1ba9ea..895360dac7 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -357,6 +357,7 @@ main(int argc, char **argv)
 	char	   *compression_algorithm_str = "none";
 	char	   *error_detail = NULL;
 	bool		user_compression_defined = false;
+	DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 	static DumpOptions dopt;
 
@@ -431,6 +432,7 @@ main(int argc, char **argv)
 		{"table-and-children", required_argument, NULL, 12},
 		{"exclude-table-and-children", required_argument, NULL, 13},
 		{"exclude-table-data-and-children", required_argument, NULL, 14},
+		{"sync-method", required_argument, NULL, 15},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -657,6 +659,11 @@ main(int argc, char **argv)
 										  optarg);
 				break;
 
+			case 15:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit_nicely(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -777,7 +784,7 @@ main(int argc, char **argv)
 
 	/* Open the output file */
 	fout = CreateArchive(filename, archiveFormat, compression_spec,
-						 dosync, archiveMode, setupDumpWorker);
+						 dosync, archiveMode, setupDumpWorker, sync_method);
 
 	/* Make dump options accessible right away */
 	SetArchiveOptions(fout, &dopt, NULL);
@@ -1068,6 +1075,7 @@ help(const char *progname)
 			 "                               compress as specified\n"));
 	printf(_("  --lock-wait-timeout=TIMEOUT  fail after waiting TIMEOUT for a table lock\n"));
 	printf(_("  --no-sync                    do not wait for changes to be written safely to disk\n"));
+	printf(_("  --sync-method=METHOD         set method for syncing files to disk\n"));
 	printf(_("  -?, --help                   show this help, then exit\n"));
 
 	printf(_("\nOptions controlling the output content:\n"));
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 25996b4da4..451fb1856e 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -296,7 +296,7 @@ sync_target_dir(void)
 	if (!do_sync || dry_run)
 		return;
 
-	fsync_pgdata(datadir_target, PG_VERSION_NUM);
+	fsync_pgdata(datadir_target, PG_VERSION_NUM, sync_method);
 }
 
 
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index f7f3b8227f..64a7469f22 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -22,6 +22,7 @@
 #include "common/file_perm.h"
 #include "common/restricted_token.h"
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/recovery_gen.h"
 #include "fe_utils/string_utils.h"
 #include "file_ops.h"
@@ -74,6 +75,7 @@ bool		showprogress = false;
 bool		dry_run = false;
 bool		do_sync = true;
 bool		restore_wal = false;
+DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 /* Target history */
 TimeLineHistoryEntry *targetHistory;
@@ -107,6 +109,7 @@ usage(const char *progname)
 			 "                                 file when running target cluster\n"));
 	printf(_("      --debug                    write a lot of debug messages\n"));
 	printf(_("      --no-ensure-shutdown       do not automatically fix unclean shutdown\n"));
+	printf(_("      --sync-method=METHOD       set method for syncing files to disk\n"));
 	printf(_("  -V, --version                  output version information, then exit\n"));
 	printf(_("  -?, --help                     show this help, then exit\n"));
 	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
@@ -131,6 +134,7 @@ main(int argc, char **argv)
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
+		{"sync-method", required_argument, NULL, 6},
 		{NULL, 0, NULL, 0}
 	};
 	int			option_index;
@@ -218,6 +222,11 @@ main(int argc, char **argv)
 				config_file = pg_strdup(optarg);
 				break;
 
+			case 6:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ef8bdc1fbb..05729adfef 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -13,6 +13,7 @@
 
 #include "access/timeline.h"
 #include "common/logging.h"
+#include "common/file_utils.h"
 #include "datapagemap.h"
 #include "libpq-fe.h"
 #include "storage/block.h"
@@ -24,6 +25,7 @@ extern bool showprogress;
 extern bool dry_run;
 extern bool do_sync;
 extern int	WalSegSz;
+extern DataDirSyncMethod sync_method;
 
 /* Target history */
 extern TimeLineHistoryEntry *targetHistory;
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 640361009e..b9d900d0db 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -14,6 +14,7 @@
 #endif
 
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "getopt_long.h"
 #include "pg_upgrade.h"
 #include "utils/pidfile.h"
@@ -57,12 +58,14 @@ parseCommandLine(int argc, char *argv[])
 		{"verbose", no_argument, NULL, 'v'},
 		{"clone", no_argument, NULL, 1},
 		{"copy", no_argument, NULL, 2},
+		{"sync-method", required_argument, NULL, 3},
 
 		{NULL, 0, NULL, 0}
 	};
 	int			option;			/* Command line option */
 	int			optindex = 0;	/* used by getopt_long */
 	int			os_user_effective_id;
+	DataDirSyncMethod unused;
 
 	user_opts.do_sync = true;
 	user_opts.transfer_mode = TRANSFER_MODE_COPY;
@@ -199,6 +202,12 @@ parseCommandLine(int argc, char *argv[])
 				user_opts.transfer_mode = TRANSFER_MODE_COPY;
 				break;
 
+			case 3:
+				if (!parse_sync_method(optarg, &unused))
+					exit(1);
+				user_opts.sync_method = pg_strdup(optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
 						os_info.progname);
@@ -209,6 +218,9 @@ parseCommandLine(int argc, char *argv[])
 	if (optind < argc)
 		pg_fatal("too many command-line arguments (first is \"%s\")", argv[optind]);
 
+	if (!user_opts.sync_method)
+		user_opts.sync_method = pg_strdup("fsync");
+
 	if (log_opts.verbose)
 		pg_log(PG_REPORT, "Running in verbose mode");
 
@@ -289,6 +301,7 @@ usage(void)
 	printf(_("  -V, --version                 display version information, then exit\n"));
 	printf(_("  --clone                       clone instead of copying files to new cluster\n"));
 	printf(_("  --copy                        copy files to new cluster (default)\n"));
+	printf(_("  --sync-method=METHOD          set method for syncing files to disk\n"));
 	printf(_("  -?, --help                    show this help, then exit\n"));
 	printf(_("\n"
 			 "Before running pg_upgrade you must:\n"
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 4562dafcff..96bfb67167 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -192,8 +192,10 @@ main(int argc, char **argv)
 	{
 		prep_status("Sync data directory to disk");
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-				  "\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir,
-				  new_cluster.pgdata);
+				  "\"%s/initdb\" --sync-only \"%s\" --sync-method %s",
+				  new_cluster.bindir,
+				  new_cluster.pgdata,
+				  user_opts.sync_method);
 		check_ok();
 	}
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 3eea0139c7..13457b2f75 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -304,6 +304,7 @@ typedef struct
 	transferMode transfer_mode; /* copy files or link them? */
 	int			jobs;			/* number of processes/threads to use */
 	char	   *socketdir;		/* directory to use for Unix sockets */
+	char	   *sync_method;
 } UserOpts;
 
 typedef struct
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 74833c4acb..c977c990ad 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -51,6 +51,31 @@ static void walkdir(const char *path,
 					int (*action) (const char *fname, bool isdir),
 					bool process_symlinks);
 
+#ifdef HAVE_SYNCFS
+static void
+do_syncfs(const char *path)
+{
+	int			fd;
+
+	fd = open(path, O_RDONLY, 0);
+
+	if (fd < 0)
+	{
+		pg_log_error("could not open file \"%s\": %m", path);
+		return;
+	}
+
+	if (syncfs(fd) < 0)
+	{
+		pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+		(void) close(fd);
+		exit(EXIT_FAILURE);
+	}
+
+	(void) close(fd);
+}
+#endif
+
 /*
  * Issue fsync recursively on PGDATA and all its contents.
  *
@@ -63,7 +88,8 @@ static void walkdir(const char *path,
  */
 void
 fsync_pgdata(const char *pg_data,
-			 int serverVersion)
+			 int serverVersion,
+			 DataDirSyncMethod sync_method)
 {
 	bool		xlog_is_symlink;
 	char		pg_wal[MAXPGPATH];
@@ -89,6 +115,55 @@ fsync_pgdata(const char *pg_data,
 			xlog_is_symlink = true;
 	}
 
+#ifdef HAVE_SYNCFS
+	if (sync_method == DATA_DIR_SYNC_METHOD_SYNCFS)
+	{
+		DIR		   *dir;
+		struct dirent *de;
+
+		/*
+		 * On Linux, we don't have to open every single file one by one.  We
+		 * can use syncfs() to sync whole filesystems.  We only expect
+		 * filesystem boundaries to exist where we tolerate symlinks, namely
+		 * pg_wal and the tablespaces, so we call syncfs() for each of those
+		 * directories.
+		 */
+
+		/* Sync the top level pgdata directory. */
+		do_syncfs(pg_data);
+
+		/* If any tablespaces are configured, sync each of those. */
+		dir = opendir(pg_tblspc);
+		if (dir == NULL)
+			pg_log_error("could not open directory \"%s\": %m", pg_tblspc);
+		else
+		{
+			while (errno = 0, (de = readdir(dir)) != NULL)
+			{
+				char		subpath[MAXPGPATH * 2];
+
+				if (strcmp(de->d_name, ".") == 0 ||
+					strcmp(de->d_name, "..") == 0)
+					continue;
+
+				snprintf(subpath, sizeof(subpath), "%s/%s", pg_tblspc, de->d_name);
+				do_syncfs(subpath);
+			}
+
+			if (errno)
+				pg_log_error("could not read directory \"%s\": %m", pg_tblspc);
+
+			(void) closedir(dir);
+		}
+
+		/* If pg_wal is a symlink, process that too. */
+		if (xlog_is_symlink)
+			do_syncfs(pg_wal);
+
+		return;
+	}
+#endif							/* HAVE_SYNCFS */
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.
@@ -121,8 +196,20 @@ fsync_pgdata(const char *pg_data,
  * This is a convenient wrapper on top of walkdir().
  */
 void
-fsync_dir_recurse(const char *dir)
+fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
 {
+#ifdef HAVE_SYNCFS
+	if (sync_method == DATA_DIR_SYNC_METHOD_SYNCFS)
+	{
+		/*
+		 * On Linux, we don't have to open every single file one by one.  We
+		 * can use syncfs() to sync the whole filesystem.
+		 */
+		do_syncfs(dir);
+		return;
+	}
+#endif							/* HAVE_SYNCFS */
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.
diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c
index 763c991015..36ac689964 100644
--- a/src/fe_utils/option_utils.c
+++ b/src/fe_utils/option_utils.c
@@ -82,3 +82,27 @@ option_parse_int(const char *optarg, const char *optname,
 		*result = val;
 	return true;
 }
+
+bool
+parse_sync_method(const char *optarg, DataDirSyncMethod *sync_method)
+{
+	if (strcmp(optarg, "fsync") == 0)
+		*sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
+	else if (strcmp(optarg, "syncfs") == 0)
+	{
+#ifdef HAVE_SYNCFS
+		*sync_method = DATA_DIR_SYNC_METHOD_SYNCFS;
+#else
+		pg_log_error("this build does not support sync method \"%s\"",
+					 "syncfs");
+		return false;
+#endif
+	}
+	else
+	{
+		pg_log_error("unrecognized sync method: %s", optarg);
+		return false;
+	}
+
+	return true;
+}
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index b7efa1226d..93aa5bdaf8 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -26,13 +26,22 @@ typedef enum PGFileType
 
 struct iovec;					/* avoid including port/pg_iovec.h here */
 
+typedef enum DataDirSyncMethod
+{
+	DATA_DIR_SYNC_METHOD_FSYNC,
+	DATA_DIR_SYNC_METHOD_SYNCFS
+} DataDirSyncMethod;
+
 #ifdef FRONTEND
+
 extern int	fsync_fname(const char *fname, bool isdir);
-extern void fsync_pgdata(const char *pg_data, int serverVersion);
-extern void fsync_dir_recurse(const char *dir);
+extern void fsync_pgdata(const char *pg_data, int serverVersion,
+						 DataDirSyncMethod sync_method);
+extern void fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
-#endif
+
+#endif							/* FRONTEND */
 
 extern PGFileType get_dirent_type(const char *path,
 								  const struct dirent *de,
diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h
index b7b0654cee..6f3a965916 100644
--- a/src/include/fe_utils/option_utils.h
+++ b/src/include/fe_utils/option_utils.h
@@ -14,6 +14,8 @@
 
 #include "postgres_fe.h"
 
+#include "common/file_utils.h"
+
 typedef void (*help_handler) (const char *progname);
 
 extern void handle_help_version_opts(int argc, char *argv[],
@@ -22,5 +24,7 @@ extern void handle_help_version_opts(int argc, char *argv[],
 extern bool option_parse_int(const char *optarg, const char *optname,
 							 int min_range, int max_range,
 							 int *result);
+extern bool parse_sync_method(const char *optarg,
+							  DataDirSyncMethod *sync_method);
 
 #endif							/* OPTION_UTILS_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 6791a406fc..dedb773304 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -43,15 +43,11 @@
 #ifndef FD_H
 #define FD_H
 
+#ifndef FRONTEND
+
 #include <dirent.h>
 #include <fcntl.h>
 
-typedef enum RecoveryInitSyncMethod
-{
-	RECOVERY_INIT_SYNC_METHOD_FSYNC,
-	RECOVERY_INIT_SYNC_METHOD_SYNCFS
-}			RecoveryInitSyncMethod;
-
 typedef int File;
 
 
@@ -195,6 +191,8 @@ extern int	durable_unlink(const char *fname, int elevel);
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
+#endif							/* ! FRONTEND */
+
 /* Filename components */
 #define PG_TEMP_FILES_DIR "pgsql_tmp"
 #define PG_TEMP_FILE_PREFIX "pgsql_tmp"
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 51b7951ad8..07387ed251 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2678,6 +2678,7 @@ SupportRequestSelectivity
 SupportRequestSimplify
 SupportRequestWFuncMonotonic
 Syn
+DataDirSyncMethod
 SyncOps
 SyncRepConfigData
 SyncRepStandbyData
-- 
2.25.1

#18Robert Haas
robertmhaas@gmail.com
In reply to: Michael Paquier (#16)
Re: should frontend tools use syncfs() ?

On Wed, Aug 16, 2023 at 11:50 PM Michael Paquier <michael@paquier.xyz> wrote:

Do we actually need --no-sync at all if --sync-method is around? We
could have an extra --sync-method=none at option level with --no-sync
still around mainly for compatibility? Or perhaps that's just
over-designing things?

I don't have a strong opinion. We could take up deprecating --no-sync in a
follow-up thread, though. Like you said, we'll probably need to keep it
around for backward compatibility, so it might not be worth the trouble.

Okay, maybe that's not worth it.

Doesn't seem worth it to me. I think --no-sync is more intuitive than
--sync-method=none, it's certainly shorter, and it's a pretty
important setting because we use it when running the regression tests.

--
Robert Haas
EDB: http://www.enterprisedb.com

#19Michael Paquier
michael@paquier.xyz
In reply to: Robert Haas (#18)
Re: should frontend tools use syncfs() ?

On Mon, Aug 21, 2023 at 04:08:46PM -0400, Robert Haas wrote:

Doesn't seem worth it to me. I think --no-sync is more intuitive than
--sync-method=none, it's certainly shorter, and it's a pretty
important setting because we use it when running the regression tests.

No arguments against that ;)
--
Michael

#20Michael Paquier
michael@paquier.xyz
In reply to: Nathan Bossart (#17)
Re: should frontend tools use syncfs() ?

On Fri, Aug 18, 2023 at 09:01:11AM -0700, Nathan Bossart wrote:

On Thu, Aug 17, 2023 at 12:50:31PM +0900, Michael Paquier wrote:

SyncMethod may be a bit too generic as name for the option structure.
How about a PGSyncMethod or pg_sync_method?

In v4, I renamed this to DataDirSyncMethod and merged it with
RecoveryInitSyncMethod. I'm not wedded to the name, but that seemed
generic enough for both use-cases. As an aside, we need to be careful to
distinguish these options from those for wal_sync_method.

Okay.

Yeah, this crossed my mind. Do you know of any existing examples of
options with links to a common section? One problem with this approach is
that there are small differences in the wording for some of the frontend
utilities, so it might be difficult to cleanly unite these sections.

The closest thing I can think of is Color Support in section
Appendixes, that describes something shared across a lot of binaries
(that would be 6 tools with this patch).

If I added a "syncfs() Caveats" appendix for the common parts of the docs,
it would only say something like the following:

Using syncfs may be a lot faster than using fsync, because it doesn't
need to open each file one by one. On the other hand, it may be slower
if a file system is shared by other applications that modify a lot of
files, since those files will also be written to disk. Furthermore, on
versions of Linux before 5.8, I/O errors encountered while writing data
to disk may not be reported to the calling program, and relevant error
messages may appear only in kernel logs.

Does that seem reasonable? It would reduce the duplication a little bit,
but I'm not sure it's really much of an improvement in this case.

This would cut 60% (?) of the documentation added by the patch for
these six tools, so that looks like an improvement to me. Perhaps
other may disagree, so more opinions are welcome.

--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -43,15 +43,11 @@
 #ifndef FD_H
 #define FD_H
+#ifndef FRONTEND
+
 #include <dirent.h>
 #include <fcntl.h>

Ugh. So you need this part because pg_rewind's filemap.c includes
fd.h, and pg_rewind also needs file_utils.h. This is not the fault of
your patch, but this does not make the situation better, either.. It
looks like we need to think harder about this layer. An improvement
would be to split file_utils.c so as its frontend-only code is moved
to OBJS_FRONTEND in a new file with a new header? It should be OK to
keep DataDirSyncMethod in file_utils.h as long as the split is clean.
--
Michael

#21Nathan Bossart
nathandbossart@gmail.com
In reply to: Michael Paquier (#20)
Re: should frontend tools use syncfs() ?

On Tue, Aug 22, 2023 at 08:56:26AM +0900, Michael Paquier wrote:

--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -43,15 +43,11 @@
#ifndef FD_H
#define FD_H
+#ifndef FRONTEND
+
#include <dirent.h>
#include <fcntl.h>

Ugh. So you need this part because pg_rewind's filemap.c includes
fd.h, and pg_rewind also needs file_utils.h. This is not the fault of
your patch, but this does not make the situation better, either.. It
looks like we need to think harder about this layer. An improvement
would be to split file_utils.c so as its frontend-only code is moved
to OBJS_FRONTEND in a new file with a new header? It should be OK to
keep DataDirSyncMethod in file_utils.h as long as the split is clean.

I'm hoping there's a simpler path forward here. pg_rewind only needs the
following lines from fd.h:

/* Filename components */
#define PG_TEMP_FILES_DIR "pgsql_tmp"
#define PG_TEMP_FILE_PREFIX "pgsql_tmp"

Maybe we could move these to file_utils.h instead. WDYT?

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#22Michael Paquier
michael@paquier.xyz
In reply to: Nathan Bossart (#21)
Re: should frontend tools use syncfs() ?

On Mon, Aug 21, 2023 at 06:44:07PM -0700, Nathan Bossart wrote:

I'm hoping there's a simpler path forward here. pg_rewind only needs the
following lines from fd.h:

/* Filename components */
#define PG_TEMP_FILES_DIR "pgsql_tmp"
#define PG_TEMP_FILE_PREFIX "pgsql_tmp"

Maybe we could move these to file_utils.h instead. WDYT?

I guess so.. At the same time, something can be said about
pg_checksums that redeclares PG_TEMP_FILE_PREFIX and PG_TEMP_FILES_DIR
because it does not want to include fd.h and its sync routines.
--
Michael

#23Nathan Bossart
nathandbossart@gmail.com
In reply to: Michael Paquier (#22)
1 attachment(s)
Re: should frontend tools use syncfs() ?

On Tue, Aug 22, 2023 at 10:50:01AM +0900, Michael Paquier wrote:

On Mon, Aug 21, 2023 at 06:44:07PM -0700, Nathan Bossart wrote:

I'm hoping there's a simpler path forward here. pg_rewind only needs the
following lines from fd.h:

/* Filename components */
#define PG_TEMP_FILES_DIR "pgsql_tmp"
#define PG_TEMP_FILE_PREFIX "pgsql_tmp"

Maybe we could move these to file_utils.h instead. WDYT?

I guess so.. At the same time, something can be said about
pg_checksums that redeclares PG_TEMP_FILE_PREFIX and PG_TEMP_FILES_DIR
because it does not want to include fd.h and its sync routines.

This would look something like the attached patch. I think this is nicer.
With this patch, we don't have to choose between including fd.h or
redefining the macros in the frontend code.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

Attachments:

0001-move-PG_TEMP_FILE-macros-to-file_utils.h.patchtext/x-diff; charset=us-asciiDownload
From af40f64933b5d8829b7e1641ce57e4ef8fd5a2c2 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Mon, 21 Aug 2023 19:05:14 -0700
Subject: [PATCH 1/1] move PG_TEMP_FILE* macros to file_utils.h

---
 src/backend/backup/basebackup.c     |  2 +-
 src/backend/storage/file/fileset.c  |  1 +
 src/bin/pg_checksums/pg_checksums.c | 10 ----------
 src/bin/pg_rewind/filemap.c         |  2 +-
 src/include/common/file_utils.h     |  4 ++++
 src/include/storage/fd.h            |  4 ----
 6 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c
index 45be21131c..5d66014499 100644
--- a/src/backend/backup/basebackup.c
+++ b/src/backend/backup/basebackup.c
@@ -25,6 +25,7 @@
 #include "commands/defrem.h"
 #include "common/compression.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "lib/stringinfo.h"
 #include "miscadmin.h"
 #include "nodes/pg_list.h"
@@ -37,7 +38,6 @@
 #include "storage/bufpage.h"
 #include "storage/checksum.h"
 #include "storage/dsm_impl.h"
-#include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/reinit.h"
 #include "utils/builtins.h"
diff --git a/src/backend/storage/file/fileset.c b/src/backend/storage/file/fileset.c
index e9951b0269..84cae32548 100644
--- a/src/backend/storage/file/fileset.c
+++ b/src/backend/storage/file/fileset.c
@@ -25,6 +25,7 @@
 
 #include "catalog/pg_tablespace.h"
 #include "commands/tablespace.h"
+#include "common/file_utils.h"
 #include "common/hashfn.h"
 #include "miscadmin.h"
 #include "storage/ipc.h"
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 19eb67e485..9011a19b4e 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -52,16 +52,6 @@ typedef enum
 	PG_MODE_ENABLE
 } PgChecksumMode;
 
-/*
- * Filename components.
- *
- * XXX: fd.h is not declared here as frontend side code is not able to
- * interact with the backend-side definitions for the various fsync
- * wrappers.
- */
-#define PG_TEMP_FILES_DIR "pgsql_tmp"
-#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
-
 static PgChecksumMode mode = PG_MODE_CHECK;
 
 static const char *progname;
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index bd5c598e20..58280d9abc 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -27,12 +27,12 @@
 #include <unistd.h>
 
 #include "catalog/pg_tablespace_d.h"
+#include "common/file_utils.h"
 #include "common/hashfn.h"
 #include "common/string.h"
 #include "datapagemap.h"
 #include "filemap.h"
 #include "pg_rewind.h"
-#include "storage/fd.h"
 
 /*
  * Define a hash table which we can use to store information about the files
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index b7efa1226d..dd1532bcb0 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -46,4 +46,8 @@ extern ssize_t pg_pwritev_with_retry(int fd,
 
 extern ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset);
 
+/* Filename components */
+#define PG_TEMP_FILES_DIR "pgsql_tmp"
+#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
+
 #endif							/* FILE_UTILS_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 6791a406fc..aea30c0622 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -195,8 +195,4 @@ extern int	durable_unlink(const char *fname, int elevel);
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
-/* Filename components */
-#define PG_TEMP_FILES_DIR "pgsql_tmp"
-#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
-
 #endif							/* FD_H */
-- 
2.25.1

#24Michael Paquier
michael@paquier.xyz
In reply to: Nathan Bossart (#23)
Re: should frontend tools use syncfs() ?

On Mon, Aug 21, 2023 at 07:06:32PM -0700, Nathan Bossart wrote:

This would look something like the attached patch. I think this is nicer.
With this patch, we don't have to choose between including fd.h or
redefining the macros in the frontend code.

Yes, this one is moving the needle in the good direction. +1.
--
Michael

#25Nathan Bossart
nathandbossart@gmail.com
In reply to: Michael Paquier (#24)
2 attachment(s)
Re: should frontend tools use syncfs() ?

On Tue, Aug 22, 2023 at 12:53:53PM +0900, Michael Paquier wrote:

On Mon, Aug 21, 2023 at 07:06:32PM -0700, Nathan Bossart wrote:

This would look something like the attached patch. I think this is nicer.
With this patch, we don't have to choose between including fd.h or
redefining the macros in the frontend code.

Yes, this one is moving the needle in the good direction. +1.

Great. Here is a new patch set that includes this change as well as the
suggested documentation updates.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

Attachments:

v5-0001-move-PG_TEMP_FILE-macros-to-file_utils.h.patchtext/x-diff; charset=us-asciiDownload
From 25357aefb8956e12b4555881346c5b4a73c1f4e5 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Mon, 21 Aug 2023 19:05:14 -0700
Subject: [PATCH v5 1/2] move PG_TEMP_FILE* macros to file_utils.h

---
 src/backend/backup/basebackup.c     |  2 +-
 src/backend/postmaster/postmaster.c |  1 +
 src/backend/storage/file/fileset.c  |  1 +
 src/bin/pg_checksums/pg_checksums.c | 10 ----------
 src/bin/pg_rewind/filemap.c         |  2 +-
 src/include/common/file_utils.h     |  4 ++++
 src/include/storage/fd.h            |  4 ----
 7 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c
index 45be21131c..5d66014499 100644
--- a/src/backend/backup/basebackup.c
+++ b/src/backend/backup/basebackup.c
@@ -25,6 +25,7 @@
 #include "commands/defrem.h"
 #include "common/compression.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "lib/stringinfo.h"
 #include "miscadmin.h"
 #include "nodes/pg_list.h"
@@ -37,7 +38,6 @@
 #include "storage/bufpage.h"
 #include "storage/checksum.h"
 #include "storage/dsm_impl.h"
-#include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/reinit.h"
 #include "utils/builtins.h"
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 9c8ec779f9..d375dcb795 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -94,6 +94,7 @@
 #include "access/xlogrecovery.h"
 #include "catalog/pg_control.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "common/ip.h"
 #include "common/pg_prng.h"
 #include "common/string.h"
diff --git a/src/backend/storage/file/fileset.c b/src/backend/storage/file/fileset.c
index e9951b0269..84cae32548 100644
--- a/src/backend/storage/file/fileset.c
+++ b/src/backend/storage/file/fileset.c
@@ -25,6 +25,7 @@
 
 #include "catalog/pg_tablespace.h"
 #include "commands/tablespace.h"
+#include "common/file_utils.h"
 #include "common/hashfn.h"
 #include "miscadmin.h"
 #include "storage/ipc.h"
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 19eb67e485..9011a19b4e 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -52,16 +52,6 @@ typedef enum
 	PG_MODE_ENABLE
 } PgChecksumMode;
 
-/*
- * Filename components.
- *
- * XXX: fd.h is not declared here as frontend side code is not able to
- * interact with the backend-side definitions for the various fsync
- * wrappers.
- */
-#define PG_TEMP_FILES_DIR "pgsql_tmp"
-#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
-
 static PgChecksumMode mode = PG_MODE_CHECK;
 
 static const char *progname;
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index bd5c598e20..58280d9abc 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -27,12 +27,12 @@
 #include <unistd.h>
 
 #include "catalog/pg_tablespace_d.h"
+#include "common/file_utils.h"
 #include "common/hashfn.h"
 #include "common/string.h"
 #include "datapagemap.h"
 #include "filemap.h"
 #include "pg_rewind.h"
-#include "storage/fd.h"
 
 /*
  * Define a hash table which we can use to store information about the files
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index b7efa1226d..dd1532bcb0 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -46,4 +46,8 @@ extern ssize_t pg_pwritev_with_retry(int fd,
 
 extern ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset);
 
+/* Filename components */
+#define PG_TEMP_FILES_DIR "pgsql_tmp"
+#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
+
 #endif							/* FILE_UTILS_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 6791a406fc..aea30c0622 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -195,8 +195,4 @@ extern int	durable_unlink(const char *fname, int elevel);
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
-/* Filename components */
-#define PG_TEMP_FILES_DIR "pgsql_tmp"
-#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
-
 #endif							/* FD_H */
-- 
2.25.1

v5-0002-allow-syncfs-in-frontend-utilities.patchtext/x-diff; charset=us-asciiDownload
From 4cc8395720a3af1916f3baada04826177783ec80 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Fri, 28 Jul 2023 15:56:24 -0700
Subject: [PATCH v5 2/2] allow syncfs in frontend utilities

---
 doc/src/sgml/config.sgml              | 12 +---
 doc/src/sgml/filelist.sgml            |  1 +
 doc/src/sgml/postgres.sgml            |  1 +
 doc/src/sgml/ref/initdb.sgml          | 22 +++++++
 doc/src/sgml/ref/pg_basebackup.sgml   | 25 ++++++++
 doc/src/sgml/ref/pg_checksums.sgml    | 22 +++++++
 doc/src/sgml/ref/pg_dump.sgml         | 21 +++++++
 doc/src/sgml/ref/pg_rewind.sgml       | 22 +++++++
 doc/src/sgml/ref/pgupgrade.sgml       | 23 +++++++
 doc/src/sgml/syncfs.sgml              | 36 +++++++++++
 src/backend/storage/file/fd.c         |  4 +-
 src/backend/utils/misc/guc_tables.c   |  7 ++-
 src/bin/initdb/initdb.c               | 12 +++-
 src/bin/pg_basebackup/pg_basebackup.c | 12 +++-
 src/bin/pg_checksums/pg_checksums.c   |  9 ++-
 src/bin/pg_dump/pg_backup.h           |  4 +-
 src/bin/pg_dump/pg_backup_archiver.c  | 14 +++--
 src/bin/pg_dump/pg_backup_archiver.h  |  1 +
 src/bin/pg_dump/pg_backup_directory.c |  2 +-
 src/bin/pg_dump/pg_dump.c             | 10 ++-
 src/bin/pg_rewind/file_ops.c          |  2 +-
 src/bin/pg_rewind/pg_rewind.c         |  9 +++
 src/bin/pg_rewind/pg_rewind.h         |  2 +
 src/bin/pg_upgrade/option.c           | 13 ++++
 src/bin/pg_upgrade/pg_upgrade.c       |  6 +-
 src/bin/pg_upgrade/pg_upgrade.h       |  1 +
 src/common/file_utils.c               | 91 ++++++++++++++++++++++++++-
 src/fe_utils/option_utils.c           | 24 +++++++
 src/include/common/file_utils.h       | 11 +++-
 src/include/fe_utils/option_utils.h   |  4 ++
 src/include/storage/fd.h              |  6 --
 src/tools/pgindent/typedefs.list      |  1 +
 32 files changed, 390 insertions(+), 40 deletions(-)
 create mode 100644 doc/src/sgml/syncfs.sgml

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 11251fa05e..77b008b854 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -10580,15 +10580,9 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
         On Linux, <literal>syncfs</literal> may be used instead, to ask the
         operating system to synchronize the whole file systems that contain the
         data directory, the WAL files and each tablespace (but not any other
-        file systems that may be reachable through symbolic links).  This may
-        be a lot faster than the <literal>fsync</literal> setting, because it
-        doesn't need to open each file one by one.  On the other hand, it may
-        be slower if a file system is shared by other applications that
-        modify a lot of files, since those files will also be written to disk.
-        Furthermore, on versions of Linux before 5.8, I/O errors encountered
-        while writing data to disk may not be reported to
-        <productname>PostgreSQL</productname>, and relevant error messages may
-        appear only in kernel logs.
+        file systems that may be reachable through symbolic links).  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
        </para>
        <para>
         This parameter can only be set in the
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index 63b0fc2a46..df5d7c3025 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -184,6 +184,7 @@
 <!ENTITY acronyms   SYSTEM "acronyms.sgml">
 <!ENTITY glossary   SYSTEM "glossary.sgml">
 <!ENTITY color      SYSTEM "color.sgml">
+<!ENTITY syncfs     SYSTEM "syncfs.sgml">
 
 <!ENTITY features-supported   SYSTEM "features-supported.sgml">
 <!ENTITY features-unsupported SYSTEM "features-unsupported.sgml">
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index 2e271862fc..f629524be0 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -294,6 +294,7 @@ break is not needed in a wider output rendering.
   &acronyms;
   &glossary;
   &color;
+  &syncfs;
   &obsolete;
 
  </part>
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 22f1011781..8a09c5c438 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -365,6 +365,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry id="app-initdb-option-sync-method">
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>initdb</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="app-initdb-option-sync-only">
       <term><option>-S</option></term>
       <term><option>--sync-only</option></term>
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 79d3e657c3..d2b8ddd200 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -594,6 +594,31 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_basebackup</command> will recursively open and synchronize
+        all files in the backup directory.  When the plain format is used, the
+        search for files will follow symbolic links for the WAL directory and
+        each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        backup directory.  When the plain format is used,
+        <command>pg_basebackup</command> will also synchronize the file systems
+        that contain the WAL files and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml
index a3d0b0f0a3..7b44ba71cf 100644
--- a/doc/src/sgml/ref/pg_checksums.sgml
+++ b/doc/src/sgml/ref/pg_checksums.sgml
@@ -139,6 +139,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_checksums</command> will recursively open and synchronize
+        all files in the data directory.  The search for files will follow
+        symbolic links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index a3cf0608f5..c1e2220b3c 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -1179,6 +1179,27 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_dump --format=directory</command> will recursively open and
+        synchronize all files in the archive directory.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        archive directory.  See <xref linkend="syncfs"/> for more information
+        about using <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used or
+        <option>--format</option> is not set to <literal>directory</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--table-and-children=<replaceable class="parameter">pattern</replaceable></option></term>
       <listitem>
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 15cddd086b..80dff16168 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -284,6 +284,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_rewind</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-V</option></term>
       <term><option>--version</option></term>
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 7816b4c685..6c033485ea 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -190,6 +190,29 @@ PostgreSQL documentation
       variable <envar>PGSOCKETDIR</envar></para></listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_upgrade</command> will recursively open and synchronize all
+        files in the upgraded cluster's data directory.  The search for files
+        will follow symbolic links for the WAL directory and each configured
+        tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        upgrade cluster's data directory, its WAL files, and each tablespace.
+        See <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-U</option> <replaceable>username</replaceable></term>
       <term><option>--username=</option><replaceable>username</replaceable></term>
diff --git a/doc/src/sgml/syncfs.sgml b/doc/src/sgml/syncfs.sgml
new file mode 100644
index 0000000000..00457d2457
--- /dev/null
+++ b/doc/src/sgml/syncfs.sgml
@@ -0,0 +1,36 @@
+<!-- doc/src/sgml/syncfs.sgml -->
+
+<appendix id="syncfs">
+ <title><function>syncfs()</function> Caveats</title>
+
+ <indexterm zone="syncfs">
+  <primary>syncfs</primary>
+ </indexterm>
+
+ <para>
+  On Linux <function>syncfs()</function> may be specified for some
+  configuration parameters (e.g.,
+  <xref linkend="guc-recovery-init-sync-method"/>), server applications (e.g.,
+  <application>pg_upgrade</application>), and client applications (e.g.,
+  <application>pg_basebackup</application>) that involve synchronizing many
+  files to disk.  <function>syncfs()</function> is advantageous in many cases,
+  but there are some trade-offs to keep in mind.
+ </para>
+
+ <para>
+  Since <function>syncfs()</function> instructs the operating system to
+  synchronize a whole file system, it typically requires many fewer system
+  calls than using <function>fsync()</function> to synchronize each file one by
+  one.  Therefore, using <function>syncfs()</function> may be a lot faster than
+  using <function>fsync()</function>.  However, it may be slower if a file
+  system is shared by other applications that modify a lot of files, since
+  those files will also be written to disk.
+ </para>
+
+ <para>
+  Furthermore, on versions of Linux before 5.8, I/O errors encountered while
+  writing data to disk may not be reported to the calling program, and relevant
+  error messages may appear only in kernel logs.
+ </para>
+
+</appendix>
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index a027a8aabc..206db91217 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -162,7 +162,7 @@ int			max_safe_fds = FD_MINFREE;	/* default if not changed */
 bool		data_sync_retry = false;
 
 /* How SyncDataDirectory() should do its job. */
-int			recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC;
+int			recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 /* Which kinds of files should be opened with PG_O_DIRECT. */
 int			io_direct_flags;
@@ -3513,7 +3513,7 @@ SyncDataDirectory(void)
 	}
 
 #ifdef HAVE_SYNCFS
-	if (recovery_init_sync_method == RECOVERY_INIT_SYNC_METHOD_SYNCFS)
+	if (recovery_init_sync_method == DATA_DIR_SYNC_METHOD_SYNCFS)
 	{
 		DIR		   *dir;
 		struct dirent *de;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index f9dba43b8c..331f65cbe6 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -41,6 +41,7 @@
 #include "commands/trigger.h"
 #include "commands/user.h"
 #include "commands/vacuum.h"
+#include "common/file_utils.h"
 #include "common/scram-common.h"
 #include "jit/jit.h"
 #include "libpq/auth.h"
@@ -430,9 +431,9 @@ StaticAssertDecl(lengthof(ssl_protocol_versions_info) == (PG_TLS1_3_VERSION + 2)
 				 "array length mismatch");
 
 static const struct config_enum_entry recovery_init_sync_method_options[] = {
-	{"fsync", RECOVERY_INIT_SYNC_METHOD_FSYNC, false},
+	{"fsync", DATA_DIR_SYNC_METHOD_FSYNC, false},
 #ifdef HAVE_SYNCFS
-	{"syncfs", RECOVERY_INIT_SYNC_METHOD_SYNCFS, false},
+	{"syncfs", DATA_DIR_SYNC_METHOD_SYNCFS, false},
 #endif
 	{NULL, 0, false}
 };
@@ -4964,7 +4965,7 @@ struct config_enum ConfigureNamesEnum[] =
 			gettext_noop("Sets the method for synchronizing the data directory before crash recovery."),
 		},
 		&recovery_init_sync_method,
-		RECOVERY_INIT_SYNC_METHOD_FSYNC, recovery_init_sync_method_options,
+		DATA_DIR_SYNC_METHOD_FSYNC, recovery_init_sync_method_options,
 		NULL, NULL, NULL
 	},
 
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 3f4167682a..6d7d10cffb 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -76,6 +76,7 @@
 #include "common/restricted_token.h"
 #include "common/string.h"
 #include "common/username.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/string_utils.h"
 #include "getopt_long.h"
 #include "mb/pg_wchar.h"
@@ -165,6 +166,7 @@ static bool data_checksums = false;
 static char *xlog_dir = NULL;
 static char *str_wal_segment_size_mb = NULL;
 static int	wal_segment_size_mb;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 
 /* internal vars */
@@ -2466,6 +2468,7 @@ usage(const char *progname)
 	printf(_("  -N, --no-sync             do not wait for changes to be written safely to disk\n"));
 	printf(_("      --no-instructions     do not print instructions for next steps\n"));
 	printf(_("  -s, --show                show internal settings\n"));
+	printf(_("      --sync-method=METHOD  set method for syncing files to disk\n"));
 	printf(_("  -S, --sync-only           only sync database files to disk, then exit\n"));
 	printf(_("\nOther options:\n"));
 	printf(_("  -V, --version             output version information, then exit\n"));
@@ -3106,6 +3109,7 @@ main(int argc, char *argv[])
 		{"locale-provider", required_argument, NULL, 15},
 		{"icu-locale", required_argument, NULL, 16},
 		{"icu-rules", required_argument, NULL, 17},
+		{"sync-method", required_argument, NULL, 18},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -3285,6 +3289,10 @@ main(int argc, char *argv[])
 			case 17:
 				icu_rules = pg_strdup(optarg);
 				break;
+			case 18:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -3332,7 +3340,7 @@ main(int argc, char *argv[])
 
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 		return 0;
 	}
@@ -3409,7 +3417,7 @@ main(int argc, char *argv[])
 	{
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 	}
 	else
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1dc8efe0cb..977c793a67 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -148,6 +148,7 @@ static bool verify_checksums = true;
 static bool manifest = true;
 static bool manifest_force_encode = false;
 static char *manifest_checksums = NULL;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 static bool success = false;
 static bool made_new_pgdata = false;
@@ -424,6 +425,8 @@ usage(void)
 	printf(_("      --no-slot          prevent creation of temporary replication slot\n"));
 	printf(_("      --no-verify-checksums\n"
 			 "                         do not verify checksums\n"));
+	printf(_("      --sync-method=METHOD\n"
+			 "                         set method for syncing files to disk\n"));
 	printf(_("  -?, --help             show this help, then exit\n"));
 	printf(_("\nConnection options:\n"));
 	printf(_("  -d, --dbname=CONNSTR   connection string\n"));
@@ -2199,11 +2202,11 @@ BaseBackup(char *compression_algorithm, char *compression_detail,
 		if (format == 't')
 		{
 			if (strcmp(basedir, "-") != 0)
-				(void) fsync_dir_recurse(basedir);
+				(void) fsync_dir_recurse(basedir, sync_method);
 		}
 		else
 		{
-			(void) fsync_pgdata(basedir, serverVersion);
+			(void) fsync_pgdata(basedir, serverVersion, sync_method);
 		}
 	}
 
@@ -2281,6 +2284,7 @@ main(int argc, char **argv)
 		{"no-manifest", no_argument, NULL, 5},
 		{"manifest-force-encode", no_argument, NULL, 6},
 		{"manifest-checksums", required_argument, NULL, 7},
+		{"sync-method", required_argument, NULL, 8},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -2452,6 +2456,10 @@ main(int argc, char **argv)
 			case 7:
 				manifest_checksums = pg_strdup(optarg);
 				break;
+			case 8:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 9011a19b4e..9fb2c5b3c7 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -44,6 +44,7 @@ static char *only_filenode = NULL;
 static bool do_sync = true;
 static bool verbose = false;
 static bool showprogress = false;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 typedef enum
 {
@@ -77,6 +78,7 @@ usage(void)
 	printf(_("  -f, --filenode=FILENODE  check only relation with specified filenode\n"));
 	printf(_("  -N, --no-sync            do not wait for changes to be written safely to disk\n"));
 	printf(_("  -P, --progress           show progress information\n"));
+	printf(_("      --sync-method=METHOD set method for syncing files to disk\n"));
 	printf(_("  -v, --verbose            output verbose messages\n"));
 	printf(_("  -V, --version            output version information, then exit\n"));
 	printf(_("  -?, --help               show this help, then exit\n"));
@@ -435,6 +437,7 @@ main(int argc, char *argv[])
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"verbose", no_argument, NULL, 'v'},
+		{"sync-method", required_argument, NULL, 1},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -493,6 +496,10 @@ main(int argc, char *argv[])
 			case 'v':
 				verbose = true;
 				break;
+			case 1:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -623,7 +630,7 @@ main(int argc, char *argv[])
 		if (do_sync)
 		{
 			pg_log_info("syncing data directory");
-			fsync_pgdata(DataDir, PG_VERSION_NUM);
+			fsync_pgdata(DataDir, PG_VERSION_NUM, sync_method);
 		}
 
 		pg_log_info("updating control file");
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index aba780ef4b..3a57cdd97d 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -24,6 +24,7 @@
 #define PG_BACKUP_H
 
 #include "common/compression.h"
+#include "common/file_utils.h"
 #include "fe_utils/simple_list.h"
 #include "libpq-fe.h"
 
@@ -307,7 +308,8 @@ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt);
 extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 							  const pg_compress_specification compression_spec,
 							  bool dosync, ArchiveMode mode,
-							  SetupWorkerPtrType setupDumpWorker);
+							  SetupWorkerPtrType setupDumpWorker,
+							  DataDirSyncMethod sync_method);
 
 /* The --list option */
 extern void PrintTOCSummary(Archive *AHX);
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 39ebcfec32..4d83381d84 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -66,7 +66,8 @@ typedef struct _parallelReadyList
 static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
 							   const pg_compress_specification compression_spec,
 							   bool dosync, ArchiveMode mode,
-							   SetupWorkerPtrType setupWorkerPtr);
+							   SetupWorkerPtrType setupWorkerPtr,
+							   DataDirSyncMethod sync_method);
 static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te);
 static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData);
 static char *sanitize_line(const char *str, bool want_hyphen);
@@ -238,11 +239,12 @@ Archive *
 CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 			  const pg_compress_specification compression_spec,
 			  bool dosync, ArchiveMode mode,
-			  SetupWorkerPtrType setupDumpWorker)
+			  SetupWorkerPtrType setupDumpWorker,
+			  DataDirSyncMethod sync_method)
 
 {
 	ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression_spec,
-								 dosync, mode, setupDumpWorker);
+								 dosync, mode, setupDumpWorker, sync_method);
 
 	return (Archive *) AH;
 }
@@ -257,7 +259,8 @@ OpenArchive(const char *FileSpec, const ArchiveFormat fmt)
 
 	compression_spec.algorithm = PG_COMPRESSION_NONE;
 	AH = _allocAH(FileSpec, fmt, compression_spec, true,
-				  archModeRead, setupRestoreWorker);
+				  archModeRead, setupRestoreWorker,
+				  DATA_DIR_SYNC_METHOD_FSYNC);
 
 	return (Archive *) AH;
 }
@@ -2233,7 +2236,7 @@ static ArchiveHandle *
 _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 		 const pg_compress_specification compression_spec,
 		 bool dosync, ArchiveMode mode,
-		 SetupWorkerPtrType setupWorkerPtr)
+		 SetupWorkerPtrType setupWorkerPtr, DataDirSyncMethod sync_method)
 {
 	ArchiveHandle *AH;
 	CompressFileHandle *CFH;
@@ -2287,6 +2290,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 	AH->mode = mode;
 	AH->compression_spec = compression_spec;
 	AH->dosync = dosync;
+	AH->sync_method = sync_method;
 
 	memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
 
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 18b38c17ab..b07673933d 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -312,6 +312,7 @@ struct _archiveHandle
 	pg_compress_specification compression_spec; /* Requested specification for
 												 * compression */
 	bool		dosync;			/* data requested to be synced on sight */
+	DataDirSyncMethod sync_method;
 	ArchiveMode mode;			/* File mode - r or w */
 	void	   *formatData;		/* Header data specific to file format */
 
diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c
index 7f2ac7c7fd..6faa3a511f 100644
--- a/src/bin/pg_dump/pg_backup_directory.c
+++ b/src/bin/pg_dump/pg_backup_directory.c
@@ -613,7 +613,7 @@ _CloseArchive(ArchiveHandle *AH)
 		 * individually. Just recurse once through all the files generated.
 		 */
 		if (AH->dosync)
-			fsync_dir_recurse(ctx->directory);
+			fsync_dir_recurse(ctx->directory, AH->sync_method);
 	}
 	AH->FH = NULL;
 }
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 5dab1ba9ea..895360dac7 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -357,6 +357,7 @@ main(int argc, char **argv)
 	char	   *compression_algorithm_str = "none";
 	char	   *error_detail = NULL;
 	bool		user_compression_defined = false;
+	DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 	static DumpOptions dopt;
 
@@ -431,6 +432,7 @@ main(int argc, char **argv)
 		{"table-and-children", required_argument, NULL, 12},
 		{"exclude-table-and-children", required_argument, NULL, 13},
 		{"exclude-table-data-and-children", required_argument, NULL, 14},
+		{"sync-method", required_argument, NULL, 15},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -657,6 +659,11 @@ main(int argc, char **argv)
 										  optarg);
 				break;
 
+			case 15:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit_nicely(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -777,7 +784,7 @@ main(int argc, char **argv)
 
 	/* Open the output file */
 	fout = CreateArchive(filename, archiveFormat, compression_spec,
-						 dosync, archiveMode, setupDumpWorker);
+						 dosync, archiveMode, setupDumpWorker, sync_method);
 
 	/* Make dump options accessible right away */
 	SetArchiveOptions(fout, &dopt, NULL);
@@ -1068,6 +1075,7 @@ help(const char *progname)
 			 "                               compress as specified\n"));
 	printf(_("  --lock-wait-timeout=TIMEOUT  fail after waiting TIMEOUT for a table lock\n"));
 	printf(_("  --no-sync                    do not wait for changes to be written safely to disk\n"));
+	printf(_("  --sync-method=METHOD         set method for syncing files to disk\n"));
 	printf(_("  -?, --help                   show this help, then exit\n"));
 
 	printf(_("\nOptions controlling the output content:\n"));
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 25996b4da4..451fb1856e 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -296,7 +296,7 @@ sync_target_dir(void)
 	if (!do_sync || dry_run)
 		return;
 
-	fsync_pgdata(datadir_target, PG_VERSION_NUM);
+	fsync_pgdata(datadir_target, PG_VERSION_NUM, sync_method);
 }
 
 
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index f7f3b8227f..64a7469f22 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -22,6 +22,7 @@
 #include "common/file_perm.h"
 #include "common/restricted_token.h"
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/recovery_gen.h"
 #include "fe_utils/string_utils.h"
 #include "file_ops.h"
@@ -74,6 +75,7 @@ bool		showprogress = false;
 bool		dry_run = false;
 bool		do_sync = true;
 bool		restore_wal = false;
+DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 /* Target history */
 TimeLineHistoryEntry *targetHistory;
@@ -107,6 +109,7 @@ usage(const char *progname)
 			 "                                 file when running target cluster\n"));
 	printf(_("      --debug                    write a lot of debug messages\n"));
 	printf(_("      --no-ensure-shutdown       do not automatically fix unclean shutdown\n"));
+	printf(_("      --sync-method=METHOD       set method for syncing files to disk\n"));
 	printf(_("  -V, --version                  output version information, then exit\n"));
 	printf(_("  -?, --help                     show this help, then exit\n"));
 	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
@@ -131,6 +134,7 @@ main(int argc, char **argv)
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
+		{"sync-method", required_argument, NULL, 6},
 		{NULL, 0, NULL, 0}
 	};
 	int			option_index;
@@ -218,6 +222,11 @@ main(int argc, char **argv)
 				config_file = pg_strdup(optarg);
 				break;
 
+			case 6:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ef8bdc1fbb..05729adfef 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -13,6 +13,7 @@
 
 #include "access/timeline.h"
 #include "common/logging.h"
+#include "common/file_utils.h"
 #include "datapagemap.h"
 #include "libpq-fe.h"
 #include "storage/block.h"
@@ -24,6 +25,7 @@ extern bool showprogress;
 extern bool dry_run;
 extern bool do_sync;
 extern int	WalSegSz;
+extern DataDirSyncMethod sync_method;
 
 /* Target history */
 extern TimeLineHistoryEntry *targetHistory;
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 640361009e..b9d900d0db 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -14,6 +14,7 @@
 #endif
 
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "getopt_long.h"
 #include "pg_upgrade.h"
 #include "utils/pidfile.h"
@@ -57,12 +58,14 @@ parseCommandLine(int argc, char *argv[])
 		{"verbose", no_argument, NULL, 'v'},
 		{"clone", no_argument, NULL, 1},
 		{"copy", no_argument, NULL, 2},
+		{"sync-method", required_argument, NULL, 3},
 
 		{NULL, 0, NULL, 0}
 	};
 	int			option;			/* Command line option */
 	int			optindex = 0;	/* used by getopt_long */
 	int			os_user_effective_id;
+	DataDirSyncMethod unused;
 
 	user_opts.do_sync = true;
 	user_opts.transfer_mode = TRANSFER_MODE_COPY;
@@ -199,6 +202,12 @@ parseCommandLine(int argc, char *argv[])
 				user_opts.transfer_mode = TRANSFER_MODE_COPY;
 				break;
 
+			case 3:
+				if (!parse_sync_method(optarg, &unused))
+					exit(1);
+				user_opts.sync_method = pg_strdup(optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
 						os_info.progname);
@@ -209,6 +218,9 @@ parseCommandLine(int argc, char *argv[])
 	if (optind < argc)
 		pg_fatal("too many command-line arguments (first is \"%s\")", argv[optind]);
 
+	if (!user_opts.sync_method)
+		user_opts.sync_method = pg_strdup("fsync");
+
 	if (log_opts.verbose)
 		pg_log(PG_REPORT, "Running in verbose mode");
 
@@ -289,6 +301,7 @@ usage(void)
 	printf(_("  -V, --version                 display version information, then exit\n"));
 	printf(_("  --clone                       clone instead of copying files to new cluster\n"));
 	printf(_("  --copy                        copy files to new cluster (default)\n"));
+	printf(_("  --sync-method=METHOD          set method for syncing files to disk\n"));
 	printf(_("  -?, --help                    show this help, then exit\n"));
 	printf(_("\n"
 			 "Before running pg_upgrade you must:\n"
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 4562dafcff..96bfb67167 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -192,8 +192,10 @@ main(int argc, char **argv)
 	{
 		prep_status("Sync data directory to disk");
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-				  "\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir,
-				  new_cluster.pgdata);
+				  "\"%s/initdb\" --sync-only \"%s\" --sync-method %s",
+				  new_cluster.bindir,
+				  new_cluster.pgdata,
+				  user_opts.sync_method);
 		check_ok();
 	}
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 3eea0139c7..13457b2f75 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -304,6 +304,7 @@ typedef struct
 	transferMode transfer_mode; /* copy files or link them? */
 	int			jobs;			/* number of processes/threads to use */
 	char	   *socketdir;		/* directory to use for Unix sockets */
+	char	   *sync_method;
 } UserOpts;
 
 typedef struct
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 74833c4acb..c977c990ad 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -51,6 +51,31 @@ static void walkdir(const char *path,
 					int (*action) (const char *fname, bool isdir),
 					bool process_symlinks);
 
+#ifdef HAVE_SYNCFS
+static void
+do_syncfs(const char *path)
+{
+	int			fd;
+
+	fd = open(path, O_RDONLY, 0);
+
+	if (fd < 0)
+	{
+		pg_log_error("could not open file \"%s\": %m", path);
+		return;
+	}
+
+	if (syncfs(fd) < 0)
+	{
+		pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+		(void) close(fd);
+		exit(EXIT_FAILURE);
+	}
+
+	(void) close(fd);
+}
+#endif
+
 /*
  * Issue fsync recursively on PGDATA and all its contents.
  *
@@ -63,7 +88,8 @@ static void walkdir(const char *path,
  */
 void
 fsync_pgdata(const char *pg_data,
-			 int serverVersion)
+			 int serverVersion,
+			 DataDirSyncMethod sync_method)
 {
 	bool		xlog_is_symlink;
 	char		pg_wal[MAXPGPATH];
@@ -89,6 +115,55 @@ fsync_pgdata(const char *pg_data,
 			xlog_is_symlink = true;
 	}
 
+#ifdef HAVE_SYNCFS
+	if (sync_method == DATA_DIR_SYNC_METHOD_SYNCFS)
+	{
+		DIR		   *dir;
+		struct dirent *de;
+
+		/*
+		 * On Linux, we don't have to open every single file one by one.  We
+		 * can use syncfs() to sync whole filesystems.  We only expect
+		 * filesystem boundaries to exist where we tolerate symlinks, namely
+		 * pg_wal and the tablespaces, so we call syncfs() for each of those
+		 * directories.
+		 */
+
+		/* Sync the top level pgdata directory. */
+		do_syncfs(pg_data);
+
+		/* If any tablespaces are configured, sync each of those. */
+		dir = opendir(pg_tblspc);
+		if (dir == NULL)
+			pg_log_error("could not open directory \"%s\": %m", pg_tblspc);
+		else
+		{
+			while (errno = 0, (de = readdir(dir)) != NULL)
+			{
+				char		subpath[MAXPGPATH * 2];
+
+				if (strcmp(de->d_name, ".") == 0 ||
+					strcmp(de->d_name, "..") == 0)
+					continue;
+
+				snprintf(subpath, sizeof(subpath), "%s/%s", pg_tblspc, de->d_name);
+				do_syncfs(subpath);
+			}
+
+			if (errno)
+				pg_log_error("could not read directory \"%s\": %m", pg_tblspc);
+
+			(void) closedir(dir);
+		}
+
+		/* If pg_wal is a symlink, process that too. */
+		if (xlog_is_symlink)
+			do_syncfs(pg_wal);
+
+		return;
+	}
+#endif							/* HAVE_SYNCFS */
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.
@@ -121,8 +196,20 @@ fsync_pgdata(const char *pg_data,
  * This is a convenient wrapper on top of walkdir().
  */
 void
-fsync_dir_recurse(const char *dir)
+fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
 {
+#ifdef HAVE_SYNCFS
+	if (sync_method == DATA_DIR_SYNC_METHOD_SYNCFS)
+	{
+		/*
+		 * On Linux, we don't have to open every single file one by one.  We
+		 * can use syncfs() to sync the whole filesystem.
+		 */
+		do_syncfs(dir);
+		return;
+	}
+#endif							/* HAVE_SYNCFS */
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.
diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c
index 763c991015..36ac689964 100644
--- a/src/fe_utils/option_utils.c
+++ b/src/fe_utils/option_utils.c
@@ -82,3 +82,27 @@ option_parse_int(const char *optarg, const char *optname,
 		*result = val;
 	return true;
 }
+
+bool
+parse_sync_method(const char *optarg, DataDirSyncMethod *sync_method)
+{
+	if (strcmp(optarg, "fsync") == 0)
+		*sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
+	else if (strcmp(optarg, "syncfs") == 0)
+	{
+#ifdef HAVE_SYNCFS
+		*sync_method = DATA_DIR_SYNC_METHOD_SYNCFS;
+#else
+		pg_log_error("this build does not support sync method \"%s\"",
+					 "syncfs");
+		return false;
+#endif
+	}
+	else
+	{
+		pg_log_error("unrecognized sync method: %s", optarg);
+		return false;
+	}
+
+	return true;
+}
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index dd1532bcb0..09d1e5d561 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -26,10 +26,17 @@ typedef enum PGFileType
 
 struct iovec;					/* avoid including port/pg_iovec.h here */
 
+typedef enum DataDirSyncMethod
+{
+	DATA_DIR_SYNC_METHOD_FSYNC,
+	DATA_DIR_SYNC_METHOD_SYNCFS
+} DataDirSyncMethod;
+
 #ifdef FRONTEND
 extern int	fsync_fname(const char *fname, bool isdir);
-extern void fsync_pgdata(const char *pg_data, int serverVersion);
-extern void fsync_dir_recurse(const char *dir);
+extern void fsync_pgdata(const char *pg_data, int serverVersion,
+						 DataDirSyncMethod sync_method);
+extern void fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
 #endif
diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h
index b7b0654cee..6f3a965916 100644
--- a/src/include/fe_utils/option_utils.h
+++ b/src/include/fe_utils/option_utils.h
@@ -14,6 +14,8 @@
 
 #include "postgres_fe.h"
 
+#include "common/file_utils.h"
+
 typedef void (*help_handler) (const char *progname);
 
 extern void handle_help_version_opts(int argc, char *argv[],
@@ -22,5 +24,7 @@ extern void handle_help_version_opts(int argc, char *argv[],
 extern bool option_parse_int(const char *optarg, const char *optname,
 							 int min_range, int max_range,
 							 int *result);
+extern bool parse_sync_method(const char *optarg,
+							  DataDirSyncMethod *sync_method);
 
 #endif							/* OPTION_UTILS_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index aea30c0622..d9d5d9da5f 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -46,12 +46,6 @@
 #include <dirent.h>
 #include <fcntl.h>
 
-typedef enum RecoveryInitSyncMethod
-{
-	RECOVERY_INIT_SYNC_METHOD_FSYNC,
-	RECOVERY_INIT_SYNC_METHOD_SYNCFS
-}			RecoveryInitSyncMethod;
-
 typedef int File;
 
 
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 51b7951ad8..46c916ae24 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -544,6 +544,7 @@ DR_printtup
 DR_sqlfunction
 DR_transientrel
 DWORD
+DataDirSyncMethod
 DataDumperPtr
 DataPageDeleteStack
 DatabaseInfo
-- 
2.25.1

#26Nathan Bossart
nathandbossart@gmail.com
In reply to: Nathan Bossart (#25)
2 attachment(s)
Re: should frontend tools use syncfs() ?

rebased

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

Attachments:

v6-0001-move-PG_TEMP_FILE-macros-to-file_utils.h.patchtext/x-diff; charset=us-asciiDownload
From 1bb28cfe5d40670386ae663e14c8854dc1b5486d Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Mon, 21 Aug 2023 19:05:14 -0700
Subject: [PATCH v6 1/2] move PG_TEMP_FILE* macros to file_utils.h

---
 src/backend/backup/basebackup.c     |  2 +-
 src/backend/postmaster/postmaster.c |  1 +
 src/backend/storage/file/fileset.c  |  1 +
 src/bin/pg_checksums/pg_checksums.c | 10 ----------
 src/bin/pg_rewind/filemap.c         |  2 +-
 src/include/common/file_utils.h     |  4 ++++
 src/include/storage/fd.h            |  4 ----
 7 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c
index 45be21131c..5d66014499 100644
--- a/src/backend/backup/basebackup.c
+++ b/src/backend/backup/basebackup.c
@@ -25,6 +25,7 @@
 #include "commands/defrem.h"
 #include "common/compression.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "lib/stringinfo.h"
 #include "miscadmin.h"
 #include "nodes/pg_list.h"
@@ -37,7 +38,6 @@
 #include "storage/bufpage.h"
 #include "storage/checksum.h"
 #include "storage/dsm_impl.h"
-#include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/reinit.h"
 #include "utils/builtins.h"
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index d7bfb28ff3..54e9bfb8c4 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -94,6 +94,7 @@
 #include "access/xlogrecovery.h"
 #include "catalog/pg_control.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "common/ip.h"
 #include "common/pg_prng.h"
 #include "common/string.h"
diff --git a/src/backend/storage/file/fileset.c b/src/backend/storage/file/fileset.c
index e9951b0269..84cae32548 100644
--- a/src/backend/storage/file/fileset.c
+++ b/src/backend/storage/file/fileset.c
@@ -25,6 +25,7 @@
 
 #include "catalog/pg_tablespace.h"
 #include "commands/tablespace.h"
+#include "common/file_utils.h"
 #include "common/hashfn.h"
 #include "miscadmin.h"
 #include "storage/ipc.h"
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 19eb67e485..9011a19b4e 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -52,16 +52,6 @@ typedef enum
 	PG_MODE_ENABLE
 } PgChecksumMode;
 
-/*
- * Filename components.
- *
- * XXX: fd.h is not declared here as frontend side code is not able to
- * interact with the backend-side definitions for the various fsync
- * wrappers.
- */
-#define PG_TEMP_FILES_DIR "pgsql_tmp"
-#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
-
 static PgChecksumMode mode = PG_MODE_CHECK;
 
 static const char *progname;
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index bd5c598e20..58280d9abc 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -27,12 +27,12 @@
 #include <unistd.h>
 
 #include "catalog/pg_tablespace_d.h"
+#include "common/file_utils.h"
 #include "common/hashfn.h"
 #include "common/string.h"
 #include "datapagemap.h"
 #include "filemap.h"
 #include "pg_rewind.h"
-#include "storage/fd.h"
 
 /*
  * Define a hash table which we can use to store information about the files
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index b7efa1226d..dd1532bcb0 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -46,4 +46,8 @@ extern ssize_t pg_pwritev_with_retry(int fd,
 
 extern ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset);
 
+/* Filename components */
+#define PG_TEMP_FILES_DIR "pgsql_tmp"
+#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
+
 #endif							/* FILE_UTILS_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 6791a406fc..aea30c0622 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -195,8 +195,4 @@ extern int	durable_unlink(const char *fname, int elevel);
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
-/* Filename components */
-#define PG_TEMP_FILES_DIR "pgsql_tmp"
-#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
-
 #endif							/* FD_H */
-- 
2.25.1

v6-0002-allow-syncfs-in-frontend-utilities.patchtext/x-diff; charset=us-asciiDownload
From 81e87db711bb90f4641b41b4f863f1f5064eb05d Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Fri, 28 Jul 2023 15:56:24 -0700
Subject: [PATCH v6 2/2] allow syncfs in frontend utilities

---
 doc/src/sgml/config.sgml              | 12 +---
 doc/src/sgml/filelist.sgml            |  1 +
 doc/src/sgml/postgres.sgml            |  1 +
 doc/src/sgml/ref/initdb.sgml          | 22 +++++++
 doc/src/sgml/ref/pg_basebackup.sgml   | 25 ++++++++
 doc/src/sgml/ref/pg_checksums.sgml    | 22 +++++++
 doc/src/sgml/ref/pg_dump.sgml         | 21 +++++++
 doc/src/sgml/ref/pg_rewind.sgml       | 22 +++++++
 doc/src/sgml/ref/pgupgrade.sgml       | 23 +++++++
 doc/src/sgml/syncfs.sgml              | 36 +++++++++++
 src/backend/storage/file/fd.c         |  4 +-
 src/backend/utils/misc/guc_tables.c   |  7 ++-
 src/bin/initdb/initdb.c               | 11 +++-
 src/bin/pg_basebackup/pg_basebackup.c | 12 +++-
 src/bin/pg_checksums/pg_checksums.c   |  9 ++-
 src/bin/pg_dump/pg_backup.h           |  4 +-
 src/bin/pg_dump/pg_backup_archiver.c  | 14 +++--
 src/bin/pg_dump/pg_backup_archiver.h  |  1 +
 src/bin/pg_dump/pg_backup_directory.c |  2 +-
 src/bin/pg_dump/pg_dump.c             | 10 ++-
 src/bin/pg_rewind/file_ops.c          |  2 +-
 src/bin/pg_rewind/pg_rewind.c         |  9 +++
 src/bin/pg_rewind/pg_rewind.h         |  2 +
 src/bin/pg_upgrade/option.c           | 13 ++++
 src/bin/pg_upgrade/pg_upgrade.c       |  6 +-
 src/bin/pg_upgrade/pg_upgrade.h       |  1 +
 src/common/file_utils.c               | 91 ++++++++++++++++++++++++++-
 src/fe_utils/option_utils.c           | 24 +++++++
 src/include/common/file_utils.h       | 11 +++-
 src/include/fe_utils/option_utils.h   |  4 ++
 src/include/storage/fd.h              |  6 --
 src/tools/pgindent/typedefs.list      |  1 +
 32 files changed, 389 insertions(+), 40 deletions(-)
 create mode 100644 doc/src/sgml/syncfs.sgml

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 694d667bf9..2c7d9f1262 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -10580,15 +10580,9 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
         On Linux, <literal>syncfs</literal> may be used instead, to ask the
         operating system to synchronize the whole file systems that contain the
         data directory, the WAL files and each tablespace (but not any other
-        file systems that may be reachable through symbolic links).  This may
-        be a lot faster than the <literal>fsync</literal> setting, because it
-        doesn't need to open each file one by one.  On the other hand, it may
-        be slower if a file system is shared by other applications that
-        modify a lot of files, since those files will also be written to disk.
-        Furthermore, on versions of Linux before 5.8, I/O errors encountered
-        while writing data to disk may not be reported to
-        <productname>PostgreSQL</productname>, and relevant error messages may
-        appear only in kernel logs.
+        file systems that may be reachable through symbolic links).  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
        </para>
        <para>
         This parameter can only be set in the
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index 63b0fc2a46..df5d7c3025 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -184,6 +184,7 @@
 <!ENTITY acronyms   SYSTEM "acronyms.sgml">
 <!ENTITY glossary   SYSTEM "glossary.sgml">
 <!ENTITY color      SYSTEM "color.sgml">
+<!ENTITY syncfs     SYSTEM "syncfs.sgml">
 
 <!ENTITY features-supported   SYSTEM "features-supported.sgml">
 <!ENTITY features-unsupported SYSTEM "features-unsupported.sgml">
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index 2e271862fc..f629524be0 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -294,6 +294,7 @@ break is not needed in a wider output rendering.
   &acronyms;
   &glossary;
   &color;
+  &syncfs;
   &obsolete;
 
  </part>
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 22f1011781..8a09c5c438 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -365,6 +365,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry id="app-initdb-option-sync-method">
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>initdb</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="app-initdb-option-sync-only">
       <term><option>-S</option></term>
       <term><option>--sync-only</option></term>
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 79d3e657c3..d2b8ddd200 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -594,6 +594,31 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_basebackup</command> will recursively open and synchronize
+        all files in the backup directory.  When the plain format is used, the
+        search for files will follow symbolic links for the WAL directory and
+        each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        backup directory.  When the plain format is used,
+        <command>pg_basebackup</command> will also synchronize the file systems
+        that contain the WAL files and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml
index a3d0b0f0a3..7b44ba71cf 100644
--- a/doc/src/sgml/ref/pg_checksums.sgml
+++ b/doc/src/sgml/ref/pg_checksums.sgml
@@ -139,6 +139,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_checksums</command> will recursively open and synchronize
+        all files in the data directory.  The search for files will follow
+        symbolic links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index a3cf0608f5..c1e2220b3c 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -1179,6 +1179,27 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_dump --format=directory</command> will recursively open and
+        synchronize all files in the archive directory.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        archive directory.  See <xref linkend="syncfs"/> for more information
+        about using <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used or
+        <option>--format</option> is not set to <literal>directory</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--table-and-children=<replaceable class="parameter">pattern</replaceable></option></term>
       <listitem>
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 15cddd086b..80dff16168 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -284,6 +284,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_rewind</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-V</option></term>
       <term><option>--version</option></term>
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 7816b4c685..6c033485ea 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -190,6 +190,29 @@ PostgreSQL documentation
       variable <envar>PGSOCKETDIR</envar></para></listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_upgrade</command> will recursively open and synchronize all
+        files in the upgraded cluster's data directory.  The search for files
+        will follow symbolic links for the WAL directory and each configured
+        tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        upgrade cluster's data directory, its WAL files, and each tablespace.
+        See <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-U</option> <replaceable>username</replaceable></term>
       <term><option>--username=</option><replaceable>username</replaceable></term>
diff --git a/doc/src/sgml/syncfs.sgml b/doc/src/sgml/syncfs.sgml
new file mode 100644
index 0000000000..00457d2457
--- /dev/null
+++ b/doc/src/sgml/syncfs.sgml
@@ -0,0 +1,36 @@
+<!-- doc/src/sgml/syncfs.sgml -->
+
+<appendix id="syncfs">
+ <title><function>syncfs()</function> Caveats</title>
+
+ <indexterm zone="syncfs">
+  <primary>syncfs</primary>
+ </indexterm>
+
+ <para>
+  On Linux <function>syncfs()</function> may be specified for some
+  configuration parameters (e.g.,
+  <xref linkend="guc-recovery-init-sync-method"/>), server applications (e.g.,
+  <application>pg_upgrade</application>), and client applications (e.g.,
+  <application>pg_basebackup</application>) that involve synchronizing many
+  files to disk.  <function>syncfs()</function> is advantageous in many cases,
+  but there are some trade-offs to keep in mind.
+ </para>
+
+ <para>
+  Since <function>syncfs()</function> instructs the operating system to
+  synchronize a whole file system, it typically requires many fewer system
+  calls than using <function>fsync()</function> to synchronize each file one by
+  one.  Therefore, using <function>syncfs()</function> may be a lot faster than
+  using <function>fsync()</function>.  However, it may be slower if a file
+  system is shared by other applications that modify a lot of files, since
+  those files will also be written to disk.
+ </para>
+
+ <para>
+  Furthermore, on versions of Linux before 5.8, I/O errors encountered while
+  writing data to disk may not be reported to the calling program, and relevant
+  error messages may appear only in kernel logs.
+ </para>
+
+</appendix>
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index b490a76ba7..3fed475c38 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -162,7 +162,7 @@ int			max_safe_fds = FD_MINFREE;	/* default if not changed */
 bool		data_sync_retry = false;
 
 /* How SyncDataDirectory() should do its job. */
-int			recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC;
+int			recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 /* Which kinds of files should be opened with PG_O_DIRECT. */
 int			io_direct_flags;
@@ -3513,7 +3513,7 @@ SyncDataDirectory(void)
 	}
 
 #ifdef HAVE_SYNCFS
-	if (recovery_init_sync_method == RECOVERY_INIT_SYNC_METHOD_SYNCFS)
+	if (recovery_init_sync_method == DATA_DIR_SYNC_METHOD_SYNCFS)
 	{
 		DIR		   *dir;
 		struct dirent *de;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index e565a3092f..5abebe9a9c 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -41,6 +41,7 @@
 #include "commands/trigger.h"
 #include "commands/user.h"
 #include "commands/vacuum.h"
+#include "common/file_utils.h"
 #include "common/scram-common.h"
 #include "jit/jit.h"
 #include "libpq/auth.h"
@@ -430,9 +431,9 @@ StaticAssertDecl(lengthof(ssl_protocol_versions_info) == (PG_TLS1_3_VERSION + 2)
 				 "array length mismatch");
 
 static const struct config_enum_entry recovery_init_sync_method_options[] = {
-	{"fsync", RECOVERY_INIT_SYNC_METHOD_FSYNC, false},
+	{"fsync", DATA_DIR_SYNC_METHOD_FSYNC, false},
 #ifdef HAVE_SYNCFS
-	{"syncfs", RECOVERY_INIT_SYNC_METHOD_SYNCFS, false},
+	{"syncfs", DATA_DIR_SYNC_METHOD_SYNCFS, false},
 #endif
 	{NULL, 0, false}
 };
@@ -4964,7 +4965,7 @@ struct config_enum ConfigureNamesEnum[] =
 			gettext_noop("Sets the method for synchronizing the data directory before crash recovery."),
 		},
 		&recovery_init_sync_method,
-		RECOVERY_INIT_SYNC_METHOD_FSYNC, recovery_init_sync_method_options,
+		DATA_DIR_SYNC_METHOD_FSYNC, recovery_init_sync_method_options,
 		NULL, NULL, NULL
 	},
 
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 905b979947..543927b8b4 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -165,6 +165,7 @@ static bool show_setting = false;
 static bool data_checksums = false;
 static char *xlog_dir = NULL;
 static int	wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024);
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 
 /* internal vars */
@@ -2466,6 +2467,7 @@ usage(const char *progname)
 	printf(_("  -N, --no-sync             do not wait for changes to be written safely to disk\n"));
 	printf(_("      --no-instructions     do not print instructions for next steps\n"));
 	printf(_("  -s, --show                show internal settings\n"));
+	printf(_("      --sync-method=METHOD  set method for syncing files to disk\n"));
 	printf(_("  -S, --sync-only           only sync database files to disk, then exit\n"));
 	printf(_("\nOther options:\n"));
 	printf(_("  -V, --version             output version information, then exit\n"));
@@ -3106,6 +3108,7 @@ main(int argc, char *argv[])
 		{"locale-provider", required_argument, NULL, 15},
 		{"icu-locale", required_argument, NULL, 16},
 		{"icu-rules", required_argument, NULL, 17},
+		{"sync-method", required_argument, NULL, 18},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -3286,6 +3289,10 @@ main(int argc, char *argv[])
 			case 17:
 				icu_rules = pg_strdup(optarg);
 				break;
+			case 18:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -3333,7 +3340,7 @@ main(int argc, char *argv[])
 
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 		return 0;
 	}
@@ -3396,7 +3403,7 @@ main(int argc, char *argv[])
 	{
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 	}
 	else
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1dc8efe0cb..977c793a67 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -148,6 +148,7 @@ static bool verify_checksums = true;
 static bool manifest = true;
 static bool manifest_force_encode = false;
 static char *manifest_checksums = NULL;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 static bool success = false;
 static bool made_new_pgdata = false;
@@ -424,6 +425,8 @@ usage(void)
 	printf(_("      --no-slot          prevent creation of temporary replication slot\n"));
 	printf(_("      --no-verify-checksums\n"
 			 "                         do not verify checksums\n"));
+	printf(_("      --sync-method=METHOD\n"
+			 "                         set method for syncing files to disk\n"));
 	printf(_("  -?, --help             show this help, then exit\n"));
 	printf(_("\nConnection options:\n"));
 	printf(_("  -d, --dbname=CONNSTR   connection string\n"));
@@ -2199,11 +2202,11 @@ BaseBackup(char *compression_algorithm, char *compression_detail,
 		if (format == 't')
 		{
 			if (strcmp(basedir, "-") != 0)
-				(void) fsync_dir_recurse(basedir);
+				(void) fsync_dir_recurse(basedir, sync_method);
 		}
 		else
 		{
-			(void) fsync_pgdata(basedir, serverVersion);
+			(void) fsync_pgdata(basedir, serverVersion, sync_method);
 		}
 	}
 
@@ -2281,6 +2284,7 @@ main(int argc, char **argv)
 		{"no-manifest", no_argument, NULL, 5},
 		{"manifest-force-encode", no_argument, NULL, 6},
 		{"manifest-checksums", required_argument, NULL, 7},
+		{"sync-method", required_argument, NULL, 8},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -2452,6 +2456,10 @@ main(int argc, char **argv)
 			case 7:
 				manifest_checksums = pg_strdup(optarg);
 				break;
+			case 8:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 9011a19b4e..9fb2c5b3c7 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -44,6 +44,7 @@ static char *only_filenode = NULL;
 static bool do_sync = true;
 static bool verbose = false;
 static bool showprogress = false;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 typedef enum
 {
@@ -77,6 +78,7 @@ usage(void)
 	printf(_("  -f, --filenode=FILENODE  check only relation with specified filenode\n"));
 	printf(_("  -N, --no-sync            do not wait for changes to be written safely to disk\n"));
 	printf(_("  -P, --progress           show progress information\n"));
+	printf(_("      --sync-method=METHOD set method for syncing files to disk\n"));
 	printf(_("  -v, --verbose            output verbose messages\n"));
 	printf(_("  -V, --version            output version information, then exit\n"));
 	printf(_("  -?, --help               show this help, then exit\n"));
@@ -435,6 +437,7 @@ main(int argc, char *argv[])
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"verbose", no_argument, NULL, 'v'},
+		{"sync-method", required_argument, NULL, 1},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -493,6 +496,10 @@ main(int argc, char *argv[])
 			case 'v':
 				verbose = true;
 				break;
+			case 1:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -623,7 +630,7 @@ main(int argc, char *argv[])
 		if (do_sync)
 		{
 			pg_log_info("syncing data directory");
-			fsync_pgdata(DataDir, PG_VERSION_NUM);
+			fsync_pgdata(DataDir, PG_VERSION_NUM, sync_method);
 		}
 
 		pg_log_info("updating control file");
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index aba780ef4b..3a57cdd97d 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -24,6 +24,7 @@
 #define PG_BACKUP_H
 
 #include "common/compression.h"
+#include "common/file_utils.h"
 #include "fe_utils/simple_list.h"
 #include "libpq-fe.h"
 
@@ -307,7 +308,8 @@ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt);
 extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 							  const pg_compress_specification compression_spec,
 							  bool dosync, ArchiveMode mode,
-							  SetupWorkerPtrType setupDumpWorker);
+							  SetupWorkerPtrType setupDumpWorker,
+							  DataDirSyncMethod sync_method);
 
 /* The --list option */
 extern void PrintTOCSummary(Archive *AHX);
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 39ebcfec32..4d83381d84 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -66,7 +66,8 @@ typedef struct _parallelReadyList
 static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
 							   const pg_compress_specification compression_spec,
 							   bool dosync, ArchiveMode mode,
-							   SetupWorkerPtrType setupWorkerPtr);
+							   SetupWorkerPtrType setupWorkerPtr,
+							   DataDirSyncMethod sync_method);
 static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te);
 static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData);
 static char *sanitize_line(const char *str, bool want_hyphen);
@@ -238,11 +239,12 @@ Archive *
 CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 			  const pg_compress_specification compression_spec,
 			  bool dosync, ArchiveMode mode,
-			  SetupWorkerPtrType setupDumpWorker)
+			  SetupWorkerPtrType setupDumpWorker,
+			  DataDirSyncMethod sync_method)
 
 {
 	ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression_spec,
-								 dosync, mode, setupDumpWorker);
+								 dosync, mode, setupDumpWorker, sync_method);
 
 	return (Archive *) AH;
 }
@@ -257,7 +259,8 @@ OpenArchive(const char *FileSpec, const ArchiveFormat fmt)
 
 	compression_spec.algorithm = PG_COMPRESSION_NONE;
 	AH = _allocAH(FileSpec, fmt, compression_spec, true,
-				  archModeRead, setupRestoreWorker);
+				  archModeRead, setupRestoreWorker,
+				  DATA_DIR_SYNC_METHOD_FSYNC);
 
 	return (Archive *) AH;
 }
@@ -2233,7 +2236,7 @@ static ArchiveHandle *
 _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 		 const pg_compress_specification compression_spec,
 		 bool dosync, ArchiveMode mode,
-		 SetupWorkerPtrType setupWorkerPtr)
+		 SetupWorkerPtrType setupWorkerPtr, DataDirSyncMethod sync_method)
 {
 	ArchiveHandle *AH;
 	CompressFileHandle *CFH;
@@ -2287,6 +2290,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 	AH->mode = mode;
 	AH->compression_spec = compression_spec;
 	AH->dosync = dosync;
+	AH->sync_method = sync_method;
 
 	memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
 
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 18b38c17ab..b07673933d 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -312,6 +312,7 @@ struct _archiveHandle
 	pg_compress_specification compression_spec; /* Requested specification for
 												 * compression */
 	bool		dosync;			/* data requested to be synced on sight */
+	DataDirSyncMethod sync_method;
 	ArchiveMode mode;			/* File mode - r or w */
 	void	   *formatData;		/* Header data specific to file format */
 
diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c
index 7f2ac7c7fd..6faa3a511f 100644
--- a/src/bin/pg_dump/pg_backup_directory.c
+++ b/src/bin/pg_dump/pg_backup_directory.c
@@ -613,7 +613,7 @@ _CloseArchive(ArchiveHandle *AH)
 		 * individually. Just recurse once through all the files generated.
 		 */
 		if (AH->dosync)
-			fsync_dir_recurse(ctx->directory);
+			fsync_dir_recurse(ctx->directory, AH->sync_method);
 	}
 	AH->FH = NULL;
 }
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 65f64c282d..dc4a28e81e 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -357,6 +357,7 @@ main(int argc, char **argv)
 	char	   *compression_algorithm_str = "none";
 	char	   *error_detail = NULL;
 	bool		user_compression_defined = false;
+	DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 	static DumpOptions dopt;
 
@@ -431,6 +432,7 @@ main(int argc, char **argv)
 		{"table-and-children", required_argument, NULL, 12},
 		{"exclude-table-and-children", required_argument, NULL, 13},
 		{"exclude-table-data-and-children", required_argument, NULL, 14},
+		{"sync-method", required_argument, NULL, 15},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -657,6 +659,11 @@ main(int argc, char **argv)
 										  optarg);
 				break;
 
+			case 15:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit_nicely(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -777,7 +784,7 @@ main(int argc, char **argv)
 
 	/* Open the output file */
 	fout = CreateArchive(filename, archiveFormat, compression_spec,
-						 dosync, archiveMode, setupDumpWorker);
+						 dosync, archiveMode, setupDumpWorker, sync_method);
 
 	/* Make dump options accessible right away */
 	SetArchiveOptions(fout, &dopt, NULL);
@@ -1068,6 +1075,7 @@ help(const char *progname)
 			 "                               compress as specified\n"));
 	printf(_("  --lock-wait-timeout=TIMEOUT  fail after waiting TIMEOUT for a table lock\n"));
 	printf(_("  --no-sync                    do not wait for changes to be written safely to disk\n"));
+	printf(_("  --sync-method=METHOD         set method for syncing files to disk\n"));
 	printf(_("  -?, --help                   show this help, then exit\n"));
 
 	printf(_("\nOptions controlling the output content:\n"));
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 25996b4da4..451fb1856e 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -296,7 +296,7 @@ sync_target_dir(void)
 	if (!do_sync || dry_run)
 		return;
 
-	fsync_pgdata(datadir_target, PG_VERSION_NUM);
+	fsync_pgdata(datadir_target, PG_VERSION_NUM, sync_method);
 }
 
 
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 7f69f02441..bfd44a284e 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -22,6 +22,7 @@
 #include "common/file_perm.h"
 #include "common/restricted_token.h"
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/recovery_gen.h"
 #include "fe_utils/string_utils.h"
 #include "file_ops.h"
@@ -74,6 +75,7 @@ bool		showprogress = false;
 bool		dry_run = false;
 bool		do_sync = true;
 bool		restore_wal = false;
+DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 /* Target history */
 TimeLineHistoryEntry *targetHistory;
@@ -107,6 +109,7 @@ usage(const char *progname)
 			 "                                 file when running target cluster\n"));
 	printf(_("      --debug                    write a lot of debug messages\n"));
 	printf(_("      --no-ensure-shutdown       do not automatically fix unclean shutdown\n"));
+	printf(_("      --sync-method=METHOD       set method for syncing files to disk\n"));
 	printf(_("  -V, --version                  output version information, then exit\n"));
 	printf(_("  -?, --help                     show this help, then exit\n"));
 	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
@@ -131,6 +134,7 @@ main(int argc, char **argv)
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
+		{"sync-method", required_argument, NULL, 6},
 		{NULL, 0, NULL, 0}
 	};
 	int			option_index;
@@ -218,6 +222,11 @@ main(int argc, char **argv)
 				config_file = pg_strdup(optarg);
 				break;
 
+			case 6:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ef8bdc1fbb..05729adfef 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -13,6 +13,7 @@
 
 #include "access/timeline.h"
 #include "common/logging.h"
+#include "common/file_utils.h"
 #include "datapagemap.h"
 #include "libpq-fe.h"
 #include "storage/block.h"
@@ -24,6 +25,7 @@ extern bool showprogress;
 extern bool dry_run;
 extern bool do_sync;
 extern int	WalSegSz;
+extern DataDirSyncMethod sync_method;
 
 /* Target history */
 extern TimeLineHistoryEntry *targetHistory;
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 640361009e..b9d900d0db 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -14,6 +14,7 @@
 #endif
 
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "getopt_long.h"
 #include "pg_upgrade.h"
 #include "utils/pidfile.h"
@@ -57,12 +58,14 @@ parseCommandLine(int argc, char *argv[])
 		{"verbose", no_argument, NULL, 'v'},
 		{"clone", no_argument, NULL, 1},
 		{"copy", no_argument, NULL, 2},
+		{"sync-method", required_argument, NULL, 3},
 
 		{NULL, 0, NULL, 0}
 	};
 	int			option;			/* Command line option */
 	int			optindex = 0;	/* used by getopt_long */
 	int			os_user_effective_id;
+	DataDirSyncMethod unused;
 
 	user_opts.do_sync = true;
 	user_opts.transfer_mode = TRANSFER_MODE_COPY;
@@ -199,6 +202,12 @@ parseCommandLine(int argc, char *argv[])
 				user_opts.transfer_mode = TRANSFER_MODE_COPY;
 				break;
 
+			case 3:
+				if (!parse_sync_method(optarg, &unused))
+					exit(1);
+				user_opts.sync_method = pg_strdup(optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
 						os_info.progname);
@@ -209,6 +218,9 @@ parseCommandLine(int argc, char *argv[])
 	if (optind < argc)
 		pg_fatal("too many command-line arguments (first is \"%s\")", argv[optind]);
 
+	if (!user_opts.sync_method)
+		user_opts.sync_method = pg_strdup("fsync");
+
 	if (log_opts.verbose)
 		pg_log(PG_REPORT, "Running in verbose mode");
 
@@ -289,6 +301,7 @@ usage(void)
 	printf(_("  -V, --version                 display version information, then exit\n"));
 	printf(_("  --clone                       clone instead of copying files to new cluster\n"));
 	printf(_("  --copy                        copy files to new cluster (default)\n"));
+	printf(_("  --sync-method=METHOD          set method for syncing files to disk\n"));
 	printf(_("  -?, --help                    show this help, then exit\n"));
 	printf(_("\n"
 			 "Before running pg_upgrade you must:\n"
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 4562dafcff..96bfb67167 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -192,8 +192,10 @@ main(int argc, char **argv)
 	{
 		prep_status("Sync data directory to disk");
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-				  "\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir,
-				  new_cluster.pgdata);
+				  "\"%s/initdb\" --sync-only \"%s\" --sync-method %s",
+				  new_cluster.bindir,
+				  new_cluster.pgdata,
+				  user_opts.sync_method);
 		check_ok();
 	}
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 7afa96716e..842f3b6cd3 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -304,6 +304,7 @@ typedef struct
 	transferMode transfer_mode; /* copy files or link them? */
 	int			jobs;			/* number of processes/threads to use */
 	char	   *socketdir;		/* directory to use for Unix sockets */
+	char	   *sync_method;
 } UserOpts;
 
 typedef struct
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 74833c4acb..c977c990ad 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -51,6 +51,31 @@ static void walkdir(const char *path,
 					int (*action) (const char *fname, bool isdir),
 					bool process_symlinks);
 
+#ifdef HAVE_SYNCFS
+static void
+do_syncfs(const char *path)
+{
+	int			fd;
+
+	fd = open(path, O_RDONLY, 0);
+
+	if (fd < 0)
+	{
+		pg_log_error("could not open file \"%s\": %m", path);
+		return;
+	}
+
+	if (syncfs(fd) < 0)
+	{
+		pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+		(void) close(fd);
+		exit(EXIT_FAILURE);
+	}
+
+	(void) close(fd);
+}
+#endif
+
 /*
  * Issue fsync recursively on PGDATA and all its contents.
  *
@@ -63,7 +88,8 @@ static void walkdir(const char *path,
  */
 void
 fsync_pgdata(const char *pg_data,
-			 int serverVersion)
+			 int serverVersion,
+			 DataDirSyncMethod sync_method)
 {
 	bool		xlog_is_symlink;
 	char		pg_wal[MAXPGPATH];
@@ -89,6 +115,55 @@ fsync_pgdata(const char *pg_data,
 			xlog_is_symlink = true;
 	}
 
+#ifdef HAVE_SYNCFS
+	if (sync_method == DATA_DIR_SYNC_METHOD_SYNCFS)
+	{
+		DIR		   *dir;
+		struct dirent *de;
+
+		/*
+		 * On Linux, we don't have to open every single file one by one.  We
+		 * can use syncfs() to sync whole filesystems.  We only expect
+		 * filesystem boundaries to exist where we tolerate symlinks, namely
+		 * pg_wal and the tablespaces, so we call syncfs() for each of those
+		 * directories.
+		 */
+
+		/* Sync the top level pgdata directory. */
+		do_syncfs(pg_data);
+
+		/* If any tablespaces are configured, sync each of those. */
+		dir = opendir(pg_tblspc);
+		if (dir == NULL)
+			pg_log_error("could not open directory \"%s\": %m", pg_tblspc);
+		else
+		{
+			while (errno = 0, (de = readdir(dir)) != NULL)
+			{
+				char		subpath[MAXPGPATH * 2];
+
+				if (strcmp(de->d_name, ".") == 0 ||
+					strcmp(de->d_name, "..") == 0)
+					continue;
+
+				snprintf(subpath, sizeof(subpath), "%s/%s", pg_tblspc, de->d_name);
+				do_syncfs(subpath);
+			}
+
+			if (errno)
+				pg_log_error("could not read directory \"%s\": %m", pg_tblspc);
+
+			(void) closedir(dir);
+		}
+
+		/* If pg_wal is a symlink, process that too. */
+		if (xlog_is_symlink)
+			do_syncfs(pg_wal);
+
+		return;
+	}
+#endif							/* HAVE_SYNCFS */
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.
@@ -121,8 +196,20 @@ fsync_pgdata(const char *pg_data,
  * This is a convenient wrapper on top of walkdir().
  */
 void
-fsync_dir_recurse(const char *dir)
+fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
 {
+#ifdef HAVE_SYNCFS
+	if (sync_method == DATA_DIR_SYNC_METHOD_SYNCFS)
+	{
+		/*
+		 * On Linux, we don't have to open every single file one by one.  We
+		 * can use syncfs() to sync the whole filesystem.
+		 */
+		do_syncfs(dir);
+		return;
+	}
+#endif							/* HAVE_SYNCFS */
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.
diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c
index 763c991015..36ac689964 100644
--- a/src/fe_utils/option_utils.c
+++ b/src/fe_utils/option_utils.c
@@ -82,3 +82,27 @@ option_parse_int(const char *optarg, const char *optname,
 		*result = val;
 	return true;
 }
+
+bool
+parse_sync_method(const char *optarg, DataDirSyncMethod *sync_method)
+{
+	if (strcmp(optarg, "fsync") == 0)
+		*sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
+	else if (strcmp(optarg, "syncfs") == 0)
+	{
+#ifdef HAVE_SYNCFS
+		*sync_method = DATA_DIR_SYNC_METHOD_SYNCFS;
+#else
+		pg_log_error("this build does not support sync method \"%s\"",
+					 "syncfs");
+		return false;
+#endif
+	}
+	else
+	{
+		pg_log_error("unrecognized sync method: %s", optarg);
+		return false;
+	}
+
+	return true;
+}
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index dd1532bcb0..09d1e5d561 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -26,10 +26,17 @@ typedef enum PGFileType
 
 struct iovec;					/* avoid including port/pg_iovec.h here */
 
+typedef enum DataDirSyncMethod
+{
+	DATA_DIR_SYNC_METHOD_FSYNC,
+	DATA_DIR_SYNC_METHOD_SYNCFS
+} DataDirSyncMethod;
+
 #ifdef FRONTEND
 extern int	fsync_fname(const char *fname, bool isdir);
-extern void fsync_pgdata(const char *pg_data, int serverVersion);
-extern void fsync_dir_recurse(const char *dir);
+extern void fsync_pgdata(const char *pg_data, int serverVersion,
+						 DataDirSyncMethod sync_method);
+extern void fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
 #endif
diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h
index b7b0654cee..6f3a965916 100644
--- a/src/include/fe_utils/option_utils.h
+++ b/src/include/fe_utils/option_utils.h
@@ -14,6 +14,8 @@
 
 #include "postgres_fe.h"
 
+#include "common/file_utils.h"
+
 typedef void (*help_handler) (const char *progname);
 
 extern void handle_help_version_opts(int argc, char *argv[],
@@ -22,5 +24,7 @@ extern void handle_help_version_opts(int argc, char *argv[],
 extern bool option_parse_int(const char *optarg, const char *optname,
 							 int min_range, int max_range,
 							 int *result);
+extern bool parse_sync_method(const char *optarg,
+							  DataDirSyncMethod *sync_method);
 
 #endif							/* OPTION_UTILS_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index aea30c0622..d9d5d9da5f 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -46,12 +46,6 @@
 #include <dirent.h>
 #include <fcntl.h>
 
-typedef enum RecoveryInitSyncMethod
-{
-	RECOVERY_INIT_SYNC_METHOD_FSYNC,
-	RECOVERY_INIT_SYNC_METHOD_SYNCFS
-}			RecoveryInitSyncMethod;
-
 typedef int File;
 
 
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 49a33c0387..fe571c6265 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -545,6 +545,7 @@ DR_printtup
 DR_sqlfunction
 DR_transientrel
 DWORD
+DataDirSyncMethod
 DataDumperPtr
 DataPageDeleteStack
 DatabaseInfo
-- 
2.25.1

#27Michael Paquier
michael@paquier.xyz
In reply to: Nathan Bossart (#26)
Re: should frontend tools use syncfs() ?

On Tue, Aug 29, 2023 at 08:45:59AM -0700, Nathan Bossart wrote:

rebased

0001 looks OK, worth its own, independent, commit.

I understand that I'm perhaps sounding pedantic about fsync_pgdata()..
But, after thinking more about it, I would still make this code fail
hard with an exit(EXIT_FAILURE) to let any C code calling directly
this routine with sync_method = DATA_DIR_SYNC_METHOD_SYNCFS know that
the build does not allow the use of this option when we don't have
HAVE_SYNCFS. parse_sync_method() offers some protection, but adding
this restriction also in the execution path is more friendly than
falling back silently to the default of flushing each file if
fsync_pgdata() is called with syncfs but the build does not support
it. At least that's more predictible.

I'm fine with the doc changes.
--
Michael

#28Nathan Bossart
nathandbossart@gmail.com
In reply to: Michael Paquier (#27)
2 attachment(s)
Re: should frontend tools use syncfs() ?

On Wed, Aug 30, 2023 at 09:10:47AM +0900, Michael Paquier wrote:

I understand that I'm perhaps sounding pedantic about fsync_pgdata()..
But, after thinking more about it, I would still make this code fail
hard with an exit(EXIT_FAILURE) to let any C code calling directly
this routine with sync_method = DATA_DIR_SYNC_METHOD_SYNCFS know that
the build does not allow the use of this option when we don't have
HAVE_SYNCFS. parse_sync_method() offers some protection, but adding
this restriction also in the execution path is more friendly than
falling back silently to the default of flushing each file if
fsync_pgdata() is called with syncfs but the build does not support
it. At least that's more predictible.

That seems fair enough. I did this in v7. I restructured fsync_pgdata()
and fsync_dir_recurse() so that any new sync methods should cause compiler
warnings until they are implemented.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

Attachments:

v7-0001-move-PG_TEMP_FILE-macros-to-file_utils.h.patchtext/x-diff; charset=us-asciiDownload
From f5ecb3c8b5d397c94a9f8ab9a053b3a0cfd7f854 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Mon, 21 Aug 2023 19:05:14 -0700
Subject: [PATCH v7 1/2] move PG_TEMP_FILE* macros to file_utils.h

---
 src/backend/backup/basebackup.c     |  2 +-
 src/backend/postmaster/postmaster.c |  1 +
 src/backend/storage/file/fileset.c  |  1 +
 src/bin/pg_checksums/pg_checksums.c | 10 ----------
 src/bin/pg_rewind/filemap.c         |  2 +-
 src/include/common/file_utils.h     |  4 ++++
 src/include/storage/fd.h            |  4 ----
 7 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c
index 45be21131c..5d66014499 100644
--- a/src/backend/backup/basebackup.c
+++ b/src/backend/backup/basebackup.c
@@ -25,6 +25,7 @@
 #include "commands/defrem.h"
 #include "common/compression.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "lib/stringinfo.h"
 #include "miscadmin.h"
 #include "nodes/pg_list.h"
@@ -37,7 +38,6 @@
 #include "storage/bufpage.h"
 #include "storage/checksum.h"
 #include "storage/dsm_impl.h"
-#include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/reinit.h"
 #include "utils/builtins.h"
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index d7bfb28ff3..54e9bfb8c4 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -94,6 +94,7 @@
 #include "access/xlogrecovery.h"
 #include "catalog/pg_control.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "common/ip.h"
 #include "common/pg_prng.h"
 #include "common/string.h"
diff --git a/src/backend/storage/file/fileset.c b/src/backend/storage/file/fileset.c
index e9951b0269..84cae32548 100644
--- a/src/backend/storage/file/fileset.c
+++ b/src/backend/storage/file/fileset.c
@@ -25,6 +25,7 @@
 
 #include "catalog/pg_tablespace.h"
 #include "commands/tablespace.h"
+#include "common/file_utils.h"
 #include "common/hashfn.h"
 #include "miscadmin.h"
 #include "storage/ipc.h"
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 19eb67e485..9011a19b4e 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -52,16 +52,6 @@ typedef enum
 	PG_MODE_ENABLE
 } PgChecksumMode;
 
-/*
- * Filename components.
- *
- * XXX: fd.h is not declared here as frontend side code is not able to
- * interact with the backend-side definitions for the various fsync
- * wrappers.
- */
-#define PG_TEMP_FILES_DIR "pgsql_tmp"
-#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
-
 static PgChecksumMode mode = PG_MODE_CHECK;
 
 static const char *progname;
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index bd5c598e20..58280d9abc 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -27,12 +27,12 @@
 #include <unistd.h>
 
 #include "catalog/pg_tablespace_d.h"
+#include "common/file_utils.h"
 #include "common/hashfn.h"
 #include "common/string.h"
 #include "datapagemap.h"
 #include "filemap.h"
 #include "pg_rewind.h"
-#include "storage/fd.h"
 
 /*
  * Define a hash table which we can use to store information about the files
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index b7efa1226d..dd1532bcb0 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -46,4 +46,8 @@ extern ssize_t pg_pwritev_with_retry(int fd,
 
 extern ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset);
 
+/* Filename components */
+#define PG_TEMP_FILES_DIR "pgsql_tmp"
+#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
+
 #endif							/* FILE_UTILS_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 6791a406fc..aea30c0622 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -195,8 +195,4 @@ extern int	durable_unlink(const char *fname, int elevel);
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
-/* Filename components */
-#define PG_TEMP_FILES_DIR "pgsql_tmp"
-#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
-
 #endif							/* FD_H */
-- 
2.25.1

v7-0002-allow-syncfs-in-frontend-utilities.patchtext/x-diff; charset=us-asciiDownload
From 77f0843a9a4d795cc82693990bede3ec2cbc3824 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Fri, 28 Jul 2023 15:56:24 -0700
Subject: [PATCH v7 2/2] allow syncfs in frontend utilities

---
 doc/src/sgml/config.sgml              |  12 +-
 doc/src/sgml/filelist.sgml            |   1 +
 doc/src/sgml/postgres.sgml            |   1 +
 doc/src/sgml/ref/initdb.sgml          |  22 ++++
 doc/src/sgml/ref/pg_basebackup.sgml   |  25 ++++
 doc/src/sgml/ref/pg_checksums.sgml    |  22 ++++
 doc/src/sgml/ref/pg_dump.sgml         |  21 ++++
 doc/src/sgml/ref/pg_rewind.sgml       |  22 ++++
 doc/src/sgml/ref/pgupgrade.sgml       |  23 ++++
 doc/src/sgml/syncfs.sgml              |  36 ++++++
 src/backend/storage/file/fd.c         |   4 +-
 src/backend/utils/misc/guc_tables.c   |   7 +-
 src/bin/initdb/initdb.c               |  11 +-
 src/bin/pg_basebackup/pg_basebackup.c |  12 +-
 src/bin/pg_checksums/pg_checksums.c   |   9 +-
 src/bin/pg_dump/pg_backup.h           |   4 +-
 src/bin/pg_dump/pg_backup_archiver.c  |  14 ++-
 src/bin/pg_dump/pg_backup_archiver.h  |   1 +
 src/bin/pg_dump/pg_backup_directory.c |   2 +-
 src/bin/pg_dump/pg_dump.c             |  10 +-
 src/bin/pg_rewind/file_ops.c          |   2 +-
 src/bin/pg_rewind/pg_rewind.c         |   9 ++
 src/bin/pg_rewind/pg_rewind.h         |   2 +
 src/bin/pg_upgrade/option.c           |  13 ++
 src/bin/pg_upgrade/pg_upgrade.c       |   6 +-
 src/bin/pg_upgrade/pg_upgrade.h       |   1 +
 src/common/file_utils.c               | 170 +++++++++++++++++++++-----
 src/fe_utils/option_utils.c           |  24 ++++
 src/include/common/file_utils.h       |  11 +-
 src/include/fe_utils/option_utils.h   |   4 +
 src/include/storage/fd.h              |   6 -
 src/tools/pgindent/typedefs.list      |   1 +
 32 files changed, 441 insertions(+), 67 deletions(-)
 create mode 100644 doc/src/sgml/syncfs.sgml

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 694d667bf9..2c7d9f1262 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -10580,15 +10580,9 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
         On Linux, <literal>syncfs</literal> may be used instead, to ask the
         operating system to synchronize the whole file systems that contain the
         data directory, the WAL files and each tablespace (but not any other
-        file systems that may be reachable through symbolic links).  This may
-        be a lot faster than the <literal>fsync</literal> setting, because it
-        doesn't need to open each file one by one.  On the other hand, it may
-        be slower if a file system is shared by other applications that
-        modify a lot of files, since those files will also be written to disk.
-        Furthermore, on versions of Linux before 5.8, I/O errors encountered
-        while writing data to disk may not be reported to
-        <productname>PostgreSQL</productname>, and relevant error messages may
-        appear only in kernel logs.
+        file systems that may be reachable through symbolic links).  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
        </para>
        <para>
         This parameter can only be set in the
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index 63b0fc2a46..df5d7c3025 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -184,6 +184,7 @@
 <!ENTITY acronyms   SYSTEM "acronyms.sgml">
 <!ENTITY glossary   SYSTEM "glossary.sgml">
 <!ENTITY color      SYSTEM "color.sgml">
+<!ENTITY syncfs     SYSTEM "syncfs.sgml">
 
 <!ENTITY features-supported   SYSTEM "features-supported.sgml">
 <!ENTITY features-unsupported SYSTEM "features-unsupported.sgml">
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index 2e271862fc..f629524be0 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -294,6 +294,7 @@ break is not needed in a wider output rendering.
   &acronyms;
   &glossary;
   &color;
+  &syncfs;
   &obsolete;
 
  </part>
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 22f1011781..8a09c5c438 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -365,6 +365,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry id="app-initdb-option-sync-method">
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>initdb</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="app-initdb-option-sync-only">
       <term><option>-S</option></term>
       <term><option>--sync-only</option></term>
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 79d3e657c3..d2b8ddd200 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -594,6 +594,31 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_basebackup</command> will recursively open and synchronize
+        all files in the backup directory.  When the plain format is used, the
+        search for files will follow symbolic links for the WAL directory and
+        each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        backup directory.  When the plain format is used,
+        <command>pg_basebackup</command> will also synchronize the file systems
+        that contain the WAL files and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml
index a3d0b0f0a3..7b44ba71cf 100644
--- a/doc/src/sgml/ref/pg_checksums.sgml
+++ b/doc/src/sgml/ref/pg_checksums.sgml
@@ -139,6 +139,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_checksums</command> will recursively open and synchronize
+        all files in the data directory.  The search for files will follow
+        symbolic links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index a3cf0608f5..c1e2220b3c 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -1179,6 +1179,27 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_dump --format=directory</command> will recursively open and
+        synchronize all files in the archive directory.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        archive directory.  See <xref linkend="syncfs"/> for more information
+        about using <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used or
+        <option>--format</option> is not set to <literal>directory</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--table-and-children=<replaceable class="parameter">pattern</replaceable></option></term>
       <listitem>
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 15cddd086b..80dff16168 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -284,6 +284,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_rewind</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-V</option></term>
       <term><option>--version</option></term>
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 7816b4c685..6c033485ea 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -190,6 +190,29 @@ PostgreSQL documentation
       variable <envar>PGSOCKETDIR</envar></para></listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_upgrade</command> will recursively open and synchronize all
+        files in the upgraded cluster's data directory.  The search for files
+        will follow symbolic links for the WAL directory and each configured
+        tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        upgrade cluster's data directory, its WAL files, and each tablespace.
+        See <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-U</option> <replaceable>username</replaceable></term>
       <term><option>--username=</option><replaceable>username</replaceable></term>
diff --git a/doc/src/sgml/syncfs.sgml b/doc/src/sgml/syncfs.sgml
new file mode 100644
index 0000000000..00457d2457
--- /dev/null
+++ b/doc/src/sgml/syncfs.sgml
@@ -0,0 +1,36 @@
+<!-- doc/src/sgml/syncfs.sgml -->
+
+<appendix id="syncfs">
+ <title><function>syncfs()</function> Caveats</title>
+
+ <indexterm zone="syncfs">
+  <primary>syncfs</primary>
+ </indexterm>
+
+ <para>
+  On Linux <function>syncfs()</function> may be specified for some
+  configuration parameters (e.g.,
+  <xref linkend="guc-recovery-init-sync-method"/>), server applications (e.g.,
+  <application>pg_upgrade</application>), and client applications (e.g.,
+  <application>pg_basebackup</application>) that involve synchronizing many
+  files to disk.  <function>syncfs()</function> is advantageous in many cases,
+  but there are some trade-offs to keep in mind.
+ </para>
+
+ <para>
+  Since <function>syncfs()</function> instructs the operating system to
+  synchronize a whole file system, it typically requires many fewer system
+  calls than using <function>fsync()</function> to synchronize each file one by
+  one.  Therefore, using <function>syncfs()</function> may be a lot faster than
+  using <function>fsync()</function>.  However, it may be slower if a file
+  system is shared by other applications that modify a lot of files, since
+  those files will also be written to disk.
+ </para>
+
+ <para>
+  Furthermore, on versions of Linux before 5.8, I/O errors encountered while
+  writing data to disk may not be reported to the calling program, and relevant
+  error messages may appear only in kernel logs.
+ </para>
+
+</appendix>
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index b490a76ba7..3fed475c38 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -162,7 +162,7 @@ int			max_safe_fds = FD_MINFREE;	/* default if not changed */
 bool		data_sync_retry = false;
 
 /* How SyncDataDirectory() should do its job. */
-int			recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC;
+int			recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 /* Which kinds of files should be opened with PG_O_DIRECT. */
 int			io_direct_flags;
@@ -3513,7 +3513,7 @@ SyncDataDirectory(void)
 	}
 
 #ifdef HAVE_SYNCFS
-	if (recovery_init_sync_method == RECOVERY_INIT_SYNC_METHOD_SYNCFS)
+	if (recovery_init_sync_method == DATA_DIR_SYNC_METHOD_SYNCFS)
 	{
 		DIR		   *dir;
 		struct dirent *de;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index e565a3092f..5abebe9a9c 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -41,6 +41,7 @@
 #include "commands/trigger.h"
 #include "commands/user.h"
 #include "commands/vacuum.h"
+#include "common/file_utils.h"
 #include "common/scram-common.h"
 #include "jit/jit.h"
 #include "libpq/auth.h"
@@ -430,9 +431,9 @@ StaticAssertDecl(lengthof(ssl_protocol_versions_info) == (PG_TLS1_3_VERSION + 2)
 				 "array length mismatch");
 
 static const struct config_enum_entry recovery_init_sync_method_options[] = {
-	{"fsync", RECOVERY_INIT_SYNC_METHOD_FSYNC, false},
+	{"fsync", DATA_DIR_SYNC_METHOD_FSYNC, false},
 #ifdef HAVE_SYNCFS
-	{"syncfs", RECOVERY_INIT_SYNC_METHOD_SYNCFS, false},
+	{"syncfs", DATA_DIR_SYNC_METHOD_SYNCFS, false},
 #endif
 	{NULL, 0, false}
 };
@@ -4964,7 +4965,7 @@ struct config_enum ConfigureNamesEnum[] =
 			gettext_noop("Sets the method for synchronizing the data directory before crash recovery."),
 		},
 		&recovery_init_sync_method,
-		RECOVERY_INIT_SYNC_METHOD_FSYNC, recovery_init_sync_method_options,
+		DATA_DIR_SYNC_METHOD_FSYNC, recovery_init_sync_method_options,
 		NULL, NULL, NULL
 	},
 
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 905b979947..543927b8b4 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -165,6 +165,7 @@ static bool show_setting = false;
 static bool data_checksums = false;
 static char *xlog_dir = NULL;
 static int	wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024);
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 
 /* internal vars */
@@ -2466,6 +2467,7 @@ usage(const char *progname)
 	printf(_("  -N, --no-sync             do not wait for changes to be written safely to disk\n"));
 	printf(_("      --no-instructions     do not print instructions for next steps\n"));
 	printf(_("  -s, --show                show internal settings\n"));
+	printf(_("      --sync-method=METHOD  set method for syncing files to disk\n"));
 	printf(_("  -S, --sync-only           only sync database files to disk, then exit\n"));
 	printf(_("\nOther options:\n"));
 	printf(_("  -V, --version             output version information, then exit\n"));
@@ -3106,6 +3108,7 @@ main(int argc, char *argv[])
 		{"locale-provider", required_argument, NULL, 15},
 		{"icu-locale", required_argument, NULL, 16},
 		{"icu-rules", required_argument, NULL, 17},
+		{"sync-method", required_argument, NULL, 18},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -3286,6 +3289,10 @@ main(int argc, char *argv[])
 			case 17:
 				icu_rules = pg_strdup(optarg);
 				break;
+			case 18:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -3333,7 +3340,7 @@ main(int argc, char *argv[])
 
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 		return 0;
 	}
@@ -3396,7 +3403,7 @@ main(int argc, char *argv[])
 	{
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 	}
 	else
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1dc8efe0cb..977c793a67 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -148,6 +148,7 @@ static bool verify_checksums = true;
 static bool manifest = true;
 static bool manifest_force_encode = false;
 static char *manifest_checksums = NULL;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 static bool success = false;
 static bool made_new_pgdata = false;
@@ -424,6 +425,8 @@ usage(void)
 	printf(_("      --no-slot          prevent creation of temporary replication slot\n"));
 	printf(_("      --no-verify-checksums\n"
 			 "                         do not verify checksums\n"));
+	printf(_("      --sync-method=METHOD\n"
+			 "                         set method for syncing files to disk\n"));
 	printf(_("  -?, --help             show this help, then exit\n"));
 	printf(_("\nConnection options:\n"));
 	printf(_("  -d, --dbname=CONNSTR   connection string\n"));
@@ -2199,11 +2202,11 @@ BaseBackup(char *compression_algorithm, char *compression_detail,
 		if (format == 't')
 		{
 			if (strcmp(basedir, "-") != 0)
-				(void) fsync_dir_recurse(basedir);
+				(void) fsync_dir_recurse(basedir, sync_method);
 		}
 		else
 		{
-			(void) fsync_pgdata(basedir, serverVersion);
+			(void) fsync_pgdata(basedir, serverVersion, sync_method);
 		}
 	}
 
@@ -2281,6 +2284,7 @@ main(int argc, char **argv)
 		{"no-manifest", no_argument, NULL, 5},
 		{"manifest-force-encode", no_argument, NULL, 6},
 		{"manifest-checksums", required_argument, NULL, 7},
+		{"sync-method", required_argument, NULL, 8},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -2452,6 +2456,10 @@ main(int argc, char **argv)
 			case 7:
 				manifest_checksums = pg_strdup(optarg);
 				break;
+			case 8:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 9011a19b4e..9fb2c5b3c7 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -44,6 +44,7 @@ static char *only_filenode = NULL;
 static bool do_sync = true;
 static bool verbose = false;
 static bool showprogress = false;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 typedef enum
 {
@@ -77,6 +78,7 @@ usage(void)
 	printf(_("  -f, --filenode=FILENODE  check only relation with specified filenode\n"));
 	printf(_("  -N, --no-sync            do not wait for changes to be written safely to disk\n"));
 	printf(_("  -P, --progress           show progress information\n"));
+	printf(_("      --sync-method=METHOD set method for syncing files to disk\n"));
 	printf(_("  -v, --verbose            output verbose messages\n"));
 	printf(_("  -V, --version            output version information, then exit\n"));
 	printf(_("  -?, --help               show this help, then exit\n"));
@@ -435,6 +437,7 @@ main(int argc, char *argv[])
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"verbose", no_argument, NULL, 'v'},
+		{"sync-method", required_argument, NULL, 1},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -493,6 +496,10 @@ main(int argc, char *argv[])
 			case 'v':
 				verbose = true;
 				break;
+			case 1:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -623,7 +630,7 @@ main(int argc, char *argv[])
 		if (do_sync)
 		{
 			pg_log_info("syncing data directory");
-			fsync_pgdata(DataDir, PG_VERSION_NUM);
+			fsync_pgdata(DataDir, PG_VERSION_NUM, sync_method);
 		}
 
 		pg_log_info("updating control file");
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index aba780ef4b..3a57cdd97d 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -24,6 +24,7 @@
 #define PG_BACKUP_H
 
 #include "common/compression.h"
+#include "common/file_utils.h"
 #include "fe_utils/simple_list.h"
 #include "libpq-fe.h"
 
@@ -307,7 +308,8 @@ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt);
 extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 							  const pg_compress_specification compression_spec,
 							  bool dosync, ArchiveMode mode,
-							  SetupWorkerPtrType setupDumpWorker);
+							  SetupWorkerPtrType setupDumpWorker,
+							  DataDirSyncMethod sync_method);
 
 /* The --list option */
 extern void PrintTOCSummary(Archive *AHX);
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 39ebcfec32..4d83381d84 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -66,7 +66,8 @@ typedef struct _parallelReadyList
 static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
 							   const pg_compress_specification compression_spec,
 							   bool dosync, ArchiveMode mode,
-							   SetupWorkerPtrType setupWorkerPtr);
+							   SetupWorkerPtrType setupWorkerPtr,
+							   DataDirSyncMethod sync_method);
 static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te);
 static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData);
 static char *sanitize_line(const char *str, bool want_hyphen);
@@ -238,11 +239,12 @@ Archive *
 CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 			  const pg_compress_specification compression_spec,
 			  bool dosync, ArchiveMode mode,
-			  SetupWorkerPtrType setupDumpWorker)
+			  SetupWorkerPtrType setupDumpWorker,
+			  DataDirSyncMethod sync_method)
 
 {
 	ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression_spec,
-								 dosync, mode, setupDumpWorker);
+								 dosync, mode, setupDumpWorker, sync_method);
 
 	return (Archive *) AH;
 }
@@ -257,7 +259,8 @@ OpenArchive(const char *FileSpec, const ArchiveFormat fmt)
 
 	compression_spec.algorithm = PG_COMPRESSION_NONE;
 	AH = _allocAH(FileSpec, fmt, compression_spec, true,
-				  archModeRead, setupRestoreWorker);
+				  archModeRead, setupRestoreWorker,
+				  DATA_DIR_SYNC_METHOD_FSYNC);
 
 	return (Archive *) AH;
 }
@@ -2233,7 +2236,7 @@ static ArchiveHandle *
 _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 		 const pg_compress_specification compression_spec,
 		 bool dosync, ArchiveMode mode,
-		 SetupWorkerPtrType setupWorkerPtr)
+		 SetupWorkerPtrType setupWorkerPtr, DataDirSyncMethod sync_method)
 {
 	ArchiveHandle *AH;
 	CompressFileHandle *CFH;
@@ -2287,6 +2290,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 	AH->mode = mode;
 	AH->compression_spec = compression_spec;
 	AH->dosync = dosync;
+	AH->sync_method = sync_method;
 
 	memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
 
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 18b38c17ab..b07673933d 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -312,6 +312,7 @@ struct _archiveHandle
 	pg_compress_specification compression_spec; /* Requested specification for
 												 * compression */
 	bool		dosync;			/* data requested to be synced on sight */
+	DataDirSyncMethod sync_method;
 	ArchiveMode mode;			/* File mode - r or w */
 	void	   *formatData;		/* Header data specific to file format */
 
diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c
index 7f2ac7c7fd..6faa3a511f 100644
--- a/src/bin/pg_dump/pg_backup_directory.c
+++ b/src/bin/pg_dump/pg_backup_directory.c
@@ -613,7 +613,7 @@ _CloseArchive(ArchiveHandle *AH)
 		 * individually. Just recurse once through all the files generated.
 		 */
 		if (AH->dosync)
-			fsync_dir_recurse(ctx->directory);
+			fsync_dir_recurse(ctx->directory, AH->sync_method);
 	}
 	AH->FH = NULL;
 }
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 65f64c282d..dc4a28e81e 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -357,6 +357,7 @@ main(int argc, char **argv)
 	char	   *compression_algorithm_str = "none";
 	char	   *error_detail = NULL;
 	bool		user_compression_defined = false;
+	DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 	static DumpOptions dopt;
 
@@ -431,6 +432,7 @@ main(int argc, char **argv)
 		{"table-and-children", required_argument, NULL, 12},
 		{"exclude-table-and-children", required_argument, NULL, 13},
 		{"exclude-table-data-and-children", required_argument, NULL, 14},
+		{"sync-method", required_argument, NULL, 15},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -657,6 +659,11 @@ main(int argc, char **argv)
 										  optarg);
 				break;
 
+			case 15:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit_nicely(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -777,7 +784,7 @@ main(int argc, char **argv)
 
 	/* Open the output file */
 	fout = CreateArchive(filename, archiveFormat, compression_spec,
-						 dosync, archiveMode, setupDumpWorker);
+						 dosync, archiveMode, setupDumpWorker, sync_method);
 
 	/* Make dump options accessible right away */
 	SetArchiveOptions(fout, &dopt, NULL);
@@ -1068,6 +1075,7 @@ help(const char *progname)
 			 "                               compress as specified\n"));
 	printf(_("  --lock-wait-timeout=TIMEOUT  fail after waiting TIMEOUT for a table lock\n"));
 	printf(_("  --no-sync                    do not wait for changes to be written safely to disk\n"));
+	printf(_("  --sync-method=METHOD         set method for syncing files to disk\n"));
 	printf(_("  -?, --help                   show this help, then exit\n"));
 
 	printf(_("\nOptions controlling the output content:\n"));
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 25996b4da4..451fb1856e 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -296,7 +296,7 @@ sync_target_dir(void)
 	if (!do_sync || dry_run)
 		return;
 
-	fsync_pgdata(datadir_target, PG_VERSION_NUM);
+	fsync_pgdata(datadir_target, PG_VERSION_NUM, sync_method);
 }
 
 
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 7f69f02441..bfd44a284e 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -22,6 +22,7 @@
 #include "common/file_perm.h"
 #include "common/restricted_token.h"
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/recovery_gen.h"
 #include "fe_utils/string_utils.h"
 #include "file_ops.h"
@@ -74,6 +75,7 @@ bool		showprogress = false;
 bool		dry_run = false;
 bool		do_sync = true;
 bool		restore_wal = false;
+DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 /* Target history */
 TimeLineHistoryEntry *targetHistory;
@@ -107,6 +109,7 @@ usage(const char *progname)
 			 "                                 file when running target cluster\n"));
 	printf(_("      --debug                    write a lot of debug messages\n"));
 	printf(_("      --no-ensure-shutdown       do not automatically fix unclean shutdown\n"));
+	printf(_("      --sync-method=METHOD       set method for syncing files to disk\n"));
 	printf(_("  -V, --version                  output version information, then exit\n"));
 	printf(_("  -?, --help                     show this help, then exit\n"));
 	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
@@ -131,6 +134,7 @@ main(int argc, char **argv)
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
+		{"sync-method", required_argument, NULL, 6},
 		{NULL, 0, NULL, 0}
 	};
 	int			option_index;
@@ -218,6 +222,11 @@ main(int argc, char **argv)
 				config_file = pg_strdup(optarg);
 				break;
 
+			case 6:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ef8bdc1fbb..05729adfef 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -13,6 +13,7 @@
 
 #include "access/timeline.h"
 #include "common/logging.h"
+#include "common/file_utils.h"
 #include "datapagemap.h"
 #include "libpq-fe.h"
 #include "storage/block.h"
@@ -24,6 +25,7 @@ extern bool showprogress;
 extern bool dry_run;
 extern bool do_sync;
 extern int	WalSegSz;
+extern DataDirSyncMethod sync_method;
 
 /* Target history */
 extern TimeLineHistoryEntry *targetHistory;
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 640361009e..b9d900d0db 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -14,6 +14,7 @@
 #endif
 
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "getopt_long.h"
 #include "pg_upgrade.h"
 #include "utils/pidfile.h"
@@ -57,12 +58,14 @@ parseCommandLine(int argc, char *argv[])
 		{"verbose", no_argument, NULL, 'v'},
 		{"clone", no_argument, NULL, 1},
 		{"copy", no_argument, NULL, 2},
+		{"sync-method", required_argument, NULL, 3},
 
 		{NULL, 0, NULL, 0}
 	};
 	int			option;			/* Command line option */
 	int			optindex = 0;	/* used by getopt_long */
 	int			os_user_effective_id;
+	DataDirSyncMethod unused;
 
 	user_opts.do_sync = true;
 	user_opts.transfer_mode = TRANSFER_MODE_COPY;
@@ -199,6 +202,12 @@ parseCommandLine(int argc, char *argv[])
 				user_opts.transfer_mode = TRANSFER_MODE_COPY;
 				break;
 
+			case 3:
+				if (!parse_sync_method(optarg, &unused))
+					exit(1);
+				user_opts.sync_method = pg_strdup(optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
 						os_info.progname);
@@ -209,6 +218,9 @@ parseCommandLine(int argc, char *argv[])
 	if (optind < argc)
 		pg_fatal("too many command-line arguments (first is \"%s\")", argv[optind]);
 
+	if (!user_opts.sync_method)
+		user_opts.sync_method = pg_strdup("fsync");
+
 	if (log_opts.verbose)
 		pg_log(PG_REPORT, "Running in verbose mode");
 
@@ -289,6 +301,7 @@ usage(void)
 	printf(_("  -V, --version                 display version information, then exit\n"));
 	printf(_("  --clone                       clone instead of copying files to new cluster\n"));
 	printf(_("  --copy                        copy files to new cluster (default)\n"));
+	printf(_("  --sync-method=METHOD          set method for syncing files to disk\n"));
 	printf(_("  -?, --help                    show this help, then exit\n"));
 	printf(_("\n"
 			 "Before running pg_upgrade you must:\n"
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 4562dafcff..96bfb67167 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -192,8 +192,10 @@ main(int argc, char **argv)
 	{
 		prep_status("Sync data directory to disk");
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-				  "\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir,
-				  new_cluster.pgdata);
+				  "\"%s/initdb\" --sync-only \"%s\" --sync-method %s",
+				  new_cluster.bindir,
+				  new_cluster.pgdata,
+				  user_opts.sync_method);
 		check_ok();
 	}
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 7afa96716e..842f3b6cd3 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -304,6 +304,7 @@ typedef struct
 	transferMode transfer_mode; /* copy files or link them? */
 	int			jobs;			/* number of processes/threads to use */
 	char	   *socketdir;		/* directory to use for Unix sockets */
+	char	   *sync_method;
 } UserOpts;
 
 typedef struct
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 74833c4acb..05c73c0bb7 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -51,6 +51,31 @@ static void walkdir(const char *path,
 					int (*action) (const char *fname, bool isdir),
 					bool process_symlinks);
 
+#ifdef HAVE_SYNCFS
+static void
+do_syncfs(const char *path)
+{
+	int			fd;
+
+	fd = open(path, O_RDONLY, 0);
+
+	if (fd < 0)
+	{
+		pg_log_error("could not open file \"%s\": %m", path);
+		return;
+	}
+
+	if (syncfs(fd) < 0)
+	{
+		pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+		(void) close(fd);
+		exit(EXIT_FAILURE);
+	}
+
+	(void) close(fd);
+}
+#endif
+
 /*
  * Issue fsync recursively on PGDATA and all its contents.
  *
@@ -63,7 +88,8 @@ static void walkdir(const char *path,
  */
 void
 fsync_pgdata(const char *pg_data,
-			 int serverVersion)
+			 int serverVersion,
+			 DataDirSyncMethod sync_method)
 {
 	bool		xlog_is_symlink;
 	char		pg_wal[MAXPGPATH];
@@ -89,30 +115,93 @@ fsync_pgdata(const char *pg_data,
 			xlog_is_symlink = true;
 	}
 
-	/*
-	 * If possible, hint to the kernel that we're soon going to fsync the data
-	 * directory and its contents.
-	 */
+	switch (sync_method)
+	{
+		case DATA_DIR_SYNC_METHOD_SYNCFS:
+			{
+#ifndef HAVE_SYNCFS
+				pg_log_error("this build does not support sync method \"%s\"",
+							 "syncfs");
+				exit(EXIT_FAILURE);
+#else
+				DIR		   *dir;
+				struct dirent *de;
+
+				/*
+				 * On Linux, we don't have to open every single file one by
+				 * one.  We can use syncfs() to sync whole filesystems.  We
+				 * only expect filesystem boundaries to exist where we
+				 * tolerate symlinks, namely pg_wal and the tablespaces, so we
+				 * call syncfs() for each of those directories.
+				 */
+
+				/* Sync the top level pgdata directory. */
+				do_syncfs(pg_data);
+
+				/* If any tablespaces are configured, sync each of those. */
+				dir = opendir(pg_tblspc);
+				if (dir == NULL)
+					pg_log_error("could not open directory \"%s\": %m",
+								 pg_tblspc);
+				else
+				{
+					while (errno = 0, (de = readdir(dir)) != NULL)
+					{
+						char		subpath[MAXPGPATH * 2];
+
+						if (strcmp(de->d_name, ".") == 0 ||
+							strcmp(de->d_name, "..") == 0)
+							continue;
+
+						snprintf(subpath, sizeof(subpath), "%s/%s",
+								 pg_tblspc, de->d_name);
+						do_syncfs(subpath);
+					}
+
+					if (errno)
+						pg_log_error("could not read directory \"%s\": %m",
+									 pg_tblspc);
+
+					(void) closedir(dir);
+				}
+
+				/* If pg_wal is a symlink, process that too. */
+				if (xlog_is_symlink)
+					do_syncfs(pg_wal);
+#endif							/* HAVE_SYNCFS */
+			}
+			break;
+
+		case DATA_DIR_SYNC_METHOD_FSYNC:
+			{
+				/*
+				 * If possible, hint to the kernel that we're soon going to
+				 * fsync the data directory and its contents.
+				 */
 #ifdef PG_FLUSH_DATA_WORKS
-	walkdir(pg_data, pre_sync_fname, false);
-	if (xlog_is_symlink)
-		walkdir(pg_wal, pre_sync_fname, false);
-	walkdir(pg_tblspc, pre_sync_fname, true);
+				walkdir(pg_data, pre_sync_fname, false);
+				if (xlog_is_symlink)
+					walkdir(pg_wal, pre_sync_fname, false);
+				walkdir(pg_tblspc, pre_sync_fname, true);
 #endif
 
-	/*
-	 * Now we do the fsync()s in the same order.
-	 *
-	 * The main call ignores symlinks, so in addition to specially processing
-	 * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
-	 * process_symlinks = true.  Note that if there are any plain directories
-	 * in pg_tblspc, they'll get fsync'd twice.  That's not an expected case
-	 * so we don't worry about optimizing it.
-	 */
-	walkdir(pg_data, fsync_fname, false);
-	if (xlog_is_symlink)
-		walkdir(pg_wal, fsync_fname, false);
-	walkdir(pg_tblspc, fsync_fname, true);
+				/*
+				 * Now we do the fsync()s in the same order.
+				 *
+				 * The main call ignores symlinks, so in addition to specially
+				 * processing pg_wal if it's a symlink, pg_tblspc has to be
+				 * visited separately with process_symlinks = true.  Note that
+				 * if there are any plain directories in pg_tblspc, they'll
+				 * get fsync'd twice. That's not an expected case so we don't
+				 * worry about optimizing it.
+				 */
+				walkdir(pg_data, fsync_fname, false);
+				if (xlog_is_symlink)
+					walkdir(pg_wal, fsync_fname, false);
+				walkdir(pg_tblspc, fsync_fname, true);
+			}
+			break;
+	}
 }
 
 /*
@@ -121,17 +210,40 @@ fsync_pgdata(const char *pg_data,
  * This is a convenient wrapper on top of walkdir().
  */
 void
-fsync_dir_recurse(const char *dir)
+fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
 {
-	/*
-	 * If possible, hint to the kernel that we're soon going to fsync the data
-	 * directory and its contents.
-	 */
+	switch (sync_method)
+	{
+		case DATA_DIR_SYNC_METHOD_SYNCFS:
+			{
+#ifndef HAVE_SYNCFS
+				pg_log_error("this build does not support sync method \"%s\"",
+							 "syncfs");
+				exit(EXIT_FAILURE);
+#else
+				/*
+				 * On Linux, we don't have to open every single file one by
+				 * one.  We can use syncfs() to sync the whole filesystem.
+				 */
+				do_syncfs(dir);
+#endif							/* HAVE_SYNCFS */
+			}
+			break;
+
+		case DATA_DIR_SYNC_METHOD_FSYNC:
+			{
+				/*
+				 * If possible, hint to the kernel that we're soon going to
+				 * fsync the data directory and its contents.
+				 */
 #ifdef PG_FLUSH_DATA_WORKS
-	walkdir(dir, pre_sync_fname, false);
+				walkdir(dir, pre_sync_fname, false);
 #endif
 
-	walkdir(dir, fsync_fname, false);
+				walkdir(dir, fsync_fname, false);
+			}
+			break;
+	}
 }
 
 /*
diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c
index 763c991015..36ac689964 100644
--- a/src/fe_utils/option_utils.c
+++ b/src/fe_utils/option_utils.c
@@ -82,3 +82,27 @@ option_parse_int(const char *optarg, const char *optname,
 		*result = val;
 	return true;
 }
+
+bool
+parse_sync_method(const char *optarg, DataDirSyncMethod *sync_method)
+{
+	if (strcmp(optarg, "fsync") == 0)
+		*sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
+	else if (strcmp(optarg, "syncfs") == 0)
+	{
+#ifdef HAVE_SYNCFS
+		*sync_method = DATA_DIR_SYNC_METHOD_SYNCFS;
+#else
+		pg_log_error("this build does not support sync method \"%s\"",
+					 "syncfs");
+		return false;
+#endif
+	}
+	else
+	{
+		pg_log_error("unrecognized sync method: %s", optarg);
+		return false;
+	}
+
+	return true;
+}
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index dd1532bcb0..09d1e5d561 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -26,10 +26,17 @@ typedef enum PGFileType
 
 struct iovec;					/* avoid including port/pg_iovec.h here */
 
+typedef enum DataDirSyncMethod
+{
+	DATA_DIR_SYNC_METHOD_FSYNC,
+	DATA_DIR_SYNC_METHOD_SYNCFS
+} DataDirSyncMethod;
+
 #ifdef FRONTEND
 extern int	fsync_fname(const char *fname, bool isdir);
-extern void fsync_pgdata(const char *pg_data, int serverVersion);
-extern void fsync_dir_recurse(const char *dir);
+extern void fsync_pgdata(const char *pg_data, int serverVersion,
+						 DataDirSyncMethod sync_method);
+extern void fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
 #endif
diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h
index b7b0654cee..6f3a965916 100644
--- a/src/include/fe_utils/option_utils.h
+++ b/src/include/fe_utils/option_utils.h
@@ -14,6 +14,8 @@
 
 #include "postgres_fe.h"
 
+#include "common/file_utils.h"
+
 typedef void (*help_handler) (const char *progname);
 
 extern void handle_help_version_opts(int argc, char *argv[],
@@ -22,5 +24,7 @@ extern void handle_help_version_opts(int argc, char *argv[],
 extern bool option_parse_int(const char *optarg, const char *optname,
 							 int min_range, int max_range,
 							 int *result);
+extern bool parse_sync_method(const char *optarg,
+							  DataDirSyncMethod *sync_method);
 
 #endif							/* OPTION_UTILS_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index aea30c0622..d9d5d9da5f 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -46,12 +46,6 @@
 #include <dirent.h>
 #include <fcntl.h>
 
-typedef enum RecoveryInitSyncMethod
-{
-	RECOVERY_INIT_SYNC_METHOD_FSYNC,
-	RECOVERY_INIT_SYNC_METHOD_SYNCFS
-}			RecoveryInitSyncMethod;
-
 typedef int File;
 
 
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 49a33c0387..fe571c6265 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -545,6 +545,7 @@ DR_printtup
 DR_sqlfunction
 DR_transientrel
 DWORD
+DataDirSyncMethod
 DataDumperPtr
 DataPageDeleteStack
 DatabaseInfo
-- 
2.25.1

#29Michael Paquier
michael@paquier.xyz
In reply to: Nathan Bossart (#28)
Re: should frontend tools use syncfs() ?

On Tue, Aug 29, 2023 at 06:14:08PM -0700, Nathan Bossart wrote:

That seems fair enough. I did this in v7. I restructured fsync_pgdata()
and fsync_dir_recurse() so that any new sync methods should cause compiler
warnings until they are implemented.

That's pretty cool and easier to maintain in the long term.

After sleeping on it, there are two things that popped up in my mind
that may be worth considering:
- Should we have some regression tests? We should only need one test
in one of the binaries to be able to stress the new code paths of
file_utils.c with syncfs. The cheapest one may be pg_dump with a
dump in directory format? Note that we have tests there that depend
on lz4 or gzip existing, which are conditional.
- Perhaps 0002 should be split into two parts? The first patch could
introduce DataDirSyncMethod in file_utils.h with the new routines in
file_utils.h (including syncfs support), and the second patch would
plug the new option to all the binaries. In the first patch, I would
hardcode DATA_DIR_SYNC_METHOD_FSYNC.
--
Michael

#30Nathan Bossart
nathandbossart@gmail.com
In reply to: Michael Paquier (#29)
4 attachment(s)
Re: should frontend tools use syncfs() ?

On Thu, Aug 31, 2023 at 02:30:33PM +0900, Michael Paquier wrote:

- Should we have some regression tests? We should only need one test
in one of the binaries to be able to stress the new code paths of
file_utils.c with syncfs. The cheapest one may be pg_dump with a
dump in directory format? Note that we have tests there that depend
on lz4 or gzip existing, which are conditional.

I added one for initdb in v8.

- Perhaps 0002 should be split into two parts? The first patch could
introduce DataDirSyncMethod in file_utils.h with the new routines in
file_utils.h (including syncfs support), and the second patch would
plug the new option to all the binaries. In the first patch, I would
hardcode DATA_DIR_SYNC_METHOD_FSYNC.

Ha, I was just thinking about this, too. I actually split it into 3
patches. The first adds DataDirSyncMethod and uses it for
recovery_init_sync_method. The second adds syncfs() support in
file_utils.c. And the third adds the ability to specify syncfs in the
frontend utilities. WDYT?

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

Attachments:

v8-0001-move-PG_TEMP_FILE-macros-to-file_utils.h.patchtext/x-diff; charset=us-asciiDownload
From e627b8ea4dcc01ac9e2fe3a21e3b3cc109b815e4 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Mon, 21 Aug 2023 19:05:14 -0700
Subject: [PATCH v8 1/4] move PG_TEMP_FILE* macros to file_utils.h

---
 src/backend/backup/basebackup.c     |  2 +-
 src/backend/postmaster/postmaster.c |  1 +
 src/backend/storage/file/fileset.c  |  1 +
 src/bin/pg_checksums/pg_checksums.c | 10 ----------
 src/bin/pg_rewind/filemap.c         |  2 +-
 src/include/common/file_utils.h     |  4 ++++
 src/include/storage/fd.h            |  4 ----
 7 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c
index 45be21131c..5d66014499 100644
--- a/src/backend/backup/basebackup.c
+++ b/src/backend/backup/basebackup.c
@@ -25,6 +25,7 @@
 #include "commands/defrem.h"
 #include "common/compression.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "lib/stringinfo.h"
 #include "miscadmin.h"
 #include "nodes/pg_list.h"
@@ -37,7 +38,6 @@
 #include "storage/bufpage.h"
 #include "storage/checksum.h"
 #include "storage/dsm_impl.h"
-#include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/reinit.h"
 #include "utils/builtins.h"
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index d7bfb28ff3..54e9bfb8c4 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -94,6 +94,7 @@
 #include "access/xlogrecovery.h"
 #include "catalog/pg_control.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "common/ip.h"
 #include "common/pg_prng.h"
 #include "common/string.h"
diff --git a/src/backend/storage/file/fileset.c b/src/backend/storage/file/fileset.c
index e9951b0269..84cae32548 100644
--- a/src/backend/storage/file/fileset.c
+++ b/src/backend/storage/file/fileset.c
@@ -25,6 +25,7 @@
 
 #include "catalog/pg_tablespace.h"
 #include "commands/tablespace.h"
+#include "common/file_utils.h"
 #include "common/hashfn.h"
 #include "miscadmin.h"
 #include "storage/ipc.h"
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 19eb67e485..9011a19b4e 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -52,16 +52,6 @@ typedef enum
 	PG_MODE_ENABLE
 } PgChecksumMode;
 
-/*
- * Filename components.
- *
- * XXX: fd.h is not declared here as frontend side code is not able to
- * interact with the backend-side definitions for the various fsync
- * wrappers.
- */
-#define PG_TEMP_FILES_DIR "pgsql_tmp"
-#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
-
 static PgChecksumMode mode = PG_MODE_CHECK;
 
 static const char *progname;
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index bd5c598e20..58280d9abc 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -27,12 +27,12 @@
 #include <unistd.h>
 
 #include "catalog/pg_tablespace_d.h"
+#include "common/file_utils.h"
 #include "common/hashfn.h"
 #include "common/string.h"
 #include "datapagemap.h"
 #include "filemap.h"
 #include "pg_rewind.h"
-#include "storage/fd.h"
 
 /*
  * Define a hash table which we can use to store information about the files
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index b7efa1226d..dd1532bcb0 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -46,4 +46,8 @@ extern ssize_t pg_pwritev_with_retry(int fd,
 
 extern ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset);
 
+/* Filename components */
+#define PG_TEMP_FILES_DIR "pgsql_tmp"
+#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
+
 #endif							/* FILE_UTILS_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 6791a406fc..aea30c0622 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -195,8 +195,4 @@ extern int	durable_unlink(const char *fname, int elevel);
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
-/* Filename components */
-#define PG_TEMP_FILES_DIR "pgsql_tmp"
-#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
-
 #endif							/* FD_H */
-- 
2.25.1

v8-0002-make-common-enum-for-sync-methods.patchtext/x-diff; charset=us-asciiDownload
From 9145faf587249cbb1d3ab54e296542cf7c71eefc Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Thu, 31 Aug 2023 07:38:42 -0700
Subject: [PATCH v8 2/4] make common enum for sync methods

---
 src/backend/storage/file/fd.c       | 4 ++--
 src/backend/utils/misc/guc_tables.c | 7 ++++---
 src/include/common/file_utils.h     | 6 ++++++
 src/include/storage/fd.h            | 6 ------
 src/tools/pgindent/typedefs.list    | 1 +
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index b490a76ba7..3fed475c38 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -162,7 +162,7 @@ int			max_safe_fds = FD_MINFREE;	/* default if not changed */
 bool		data_sync_retry = false;
 
 /* How SyncDataDirectory() should do its job. */
-int			recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC;
+int			recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 /* Which kinds of files should be opened with PG_O_DIRECT. */
 int			io_direct_flags;
@@ -3513,7 +3513,7 @@ SyncDataDirectory(void)
 	}
 
 #ifdef HAVE_SYNCFS
-	if (recovery_init_sync_method == RECOVERY_INIT_SYNC_METHOD_SYNCFS)
+	if (recovery_init_sync_method == DATA_DIR_SYNC_METHOD_SYNCFS)
 	{
 		DIR		   *dir;
 		struct dirent *de;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index e565a3092f..5abebe9a9c 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -41,6 +41,7 @@
 #include "commands/trigger.h"
 #include "commands/user.h"
 #include "commands/vacuum.h"
+#include "common/file_utils.h"
 #include "common/scram-common.h"
 #include "jit/jit.h"
 #include "libpq/auth.h"
@@ -430,9 +431,9 @@ StaticAssertDecl(lengthof(ssl_protocol_versions_info) == (PG_TLS1_3_VERSION + 2)
 				 "array length mismatch");
 
 static const struct config_enum_entry recovery_init_sync_method_options[] = {
-	{"fsync", RECOVERY_INIT_SYNC_METHOD_FSYNC, false},
+	{"fsync", DATA_DIR_SYNC_METHOD_FSYNC, false},
 #ifdef HAVE_SYNCFS
-	{"syncfs", RECOVERY_INIT_SYNC_METHOD_SYNCFS, false},
+	{"syncfs", DATA_DIR_SYNC_METHOD_SYNCFS, false},
 #endif
 	{NULL, 0, false}
 };
@@ -4964,7 +4965,7 @@ struct config_enum ConfigureNamesEnum[] =
 			gettext_noop("Sets the method for synchronizing the data directory before crash recovery."),
 		},
 		&recovery_init_sync_method,
-		RECOVERY_INIT_SYNC_METHOD_FSYNC, recovery_init_sync_method_options,
+		DATA_DIR_SYNC_METHOD_FSYNC, recovery_init_sync_method_options,
 		NULL, NULL, NULL
 	},
 
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index dd1532bcb0..7da21f15e6 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -24,6 +24,12 @@ typedef enum PGFileType
 	PGFILETYPE_LNK
 } PGFileType;
 
+typedef enum DataDirSyncMethod
+{
+	DATA_DIR_SYNC_METHOD_FSYNC,
+	DATA_DIR_SYNC_METHOD_SYNCFS
+} DataDirSyncMethod;
+
 struct iovec;					/* avoid including port/pg_iovec.h here */
 
 #ifdef FRONTEND
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index aea30c0622..d9d5d9da5f 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -46,12 +46,6 @@
 #include <dirent.h>
 #include <fcntl.h>
 
-typedef enum RecoveryInitSyncMethod
-{
-	RECOVERY_INIT_SYNC_METHOD_FSYNC,
-	RECOVERY_INIT_SYNC_METHOD_SYNCFS
-}			RecoveryInitSyncMethod;
-
 typedef int File;
 
 
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 49a33c0387..fe571c6265 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -545,6 +545,7 @@ DR_printtup
 DR_sqlfunction
 DR_transientrel
 DWORD
+DataDirSyncMethod
 DataDumperPtr
 DataPageDeleteStack
 DatabaseInfo
-- 
2.25.1

v8-0003-add-support-for-syncfs-in-frontend-support-functi.patchtext/x-diff; charset=us-asciiDownload
From c5bc205b3d7d645356ecd27924fdd164ffa90ce3 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Thu, 31 Aug 2023 07:59:52 -0700
Subject: [PATCH v8 3/4] add support for syncfs in frontend support functions

---
 src/bin/initdb/initdb.c               |   5 +-
 src/bin/pg_basebackup/pg_basebackup.c |   5 +-
 src/bin/pg_checksums/pg_checksums.c   |   3 +-
 src/bin/pg_dump/pg_backup.h           |   4 +-
 src/bin/pg_dump/pg_backup_archiver.c  |  14 ++-
 src/bin/pg_dump/pg_backup_archiver.h  |   1 +
 src/bin/pg_dump/pg_backup_directory.c |   2 +-
 src/bin/pg_dump/pg_dump.c             |   3 +-
 src/bin/pg_rewind/file_ops.c          |   2 +-
 src/bin/pg_rewind/pg_rewind.c         |   1 +
 src/bin/pg_rewind/pg_rewind.h         |   2 +
 src/common/file_utils.c               | 170 +++++++++++++++++++++-----
 src/include/common/file_utils.h       |   5 +-
 13 files changed, 172 insertions(+), 45 deletions(-)

diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 905b979947..bbea1e412b 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -165,6 +165,7 @@ static bool show_setting = false;
 static bool data_checksums = false;
 static char *xlog_dir = NULL;
 static int	wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024);
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 
 /* internal vars */
@@ -3333,7 +3334,7 @@ main(int argc, char *argv[])
 
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 		return 0;
 	}
@@ -3396,7 +3397,7 @@ main(int argc, char *argv[])
 	{
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 	}
 	else
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1dc8efe0cb..1a6eacf6d5 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -148,6 +148,7 @@ static bool verify_checksums = true;
 static bool manifest = true;
 static bool manifest_force_encode = false;
 static char *manifest_checksums = NULL;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 static bool success = false;
 static bool made_new_pgdata = false;
@@ -2199,11 +2200,11 @@ BaseBackup(char *compression_algorithm, char *compression_detail,
 		if (format == 't')
 		{
 			if (strcmp(basedir, "-") != 0)
-				(void) fsync_dir_recurse(basedir);
+				(void) fsync_dir_recurse(basedir, sync_method);
 		}
 		else
 		{
-			(void) fsync_pgdata(basedir, serverVersion);
+			(void) fsync_pgdata(basedir, serverVersion, sync_method);
 		}
 	}
 
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 9011a19b4e..123450f483 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -44,6 +44,7 @@ static char *only_filenode = NULL;
 static bool do_sync = true;
 static bool verbose = false;
 static bool showprogress = false;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 typedef enum
 {
@@ -623,7 +624,7 @@ main(int argc, char *argv[])
 		if (do_sync)
 		{
 			pg_log_info("syncing data directory");
-			fsync_pgdata(DataDir, PG_VERSION_NUM);
+			fsync_pgdata(DataDir, PG_VERSION_NUM, sync_method);
 		}
 
 		pg_log_info("updating control file");
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index aba780ef4b..3a57cdd97d 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -24,6 +24,7 @@
 #define PG_BACKUP_H
 
 #include "common/compression.h"
+#include "common/file_utils.h"
 #include "fe_utils/simple_list.h"
 #include "libpq-fe.h"
 
@@ -307,7 +308,8 @@ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt);
 extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 							  const pg_compress_specification compression_spec,
 							  bool dosync, ArchiveMode mode,
-							  SetupWorkerPtrType setupDumpWorker);
+							  SetupWorkerPtrType setupDumpWorker,
+							  DataDirSyncMethod sync_method);
 
 /* The --list option */
 extern void PrintTOCSummary(Archive *AHX);
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 39ebcfec32..4d83381d84 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -66,7 +66,8 @@ typedef struct _parallelReadyList
 static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
 							   const pg_compress_specification compression_spec,
 							   bool dosync, ArchiveMode mode,
-							   SetupWorkerPtrType setupWorkerPtr);
+							   SetupWorkerPtrType setupWorkerPtr,
+							   DataDirSyncMethod sync_method);
 static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te);
 static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData);
 static char *sanitize_line(const char *str, bool want_hyphen);
@@ -238,11 +239,12 @@ Archive *
 CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 			  const pg_compress_specification compression_spec,
 			  bool dosync, ArchiveMode mode,
-			  SetupWorkerPtrType setupDumpWorker)
+			  SetupWorkerPtrType setupDumpWorker,
+			  DataDirSyncMethod sync_method)
 
 {
 	ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression_spec,
-								 dosync, mode, setupDumpWorker);
+								 dosync, mode, setupDumpWorker, sync_method);
 
 	return (Archive *) AH;
 }
@@ -257,7 +259,8 @@ OpenArchive(const char *FileSpec, const ArchiveFormat fmt)
 
 	compression_spec.algorithm = PG_COMPRESSION_NONE;
 	AH = _allocAH(FileSpec, fmt, compression_spec, true,
-				  archModeRead, setupRestoreWorker);
+				  archModeRead, setupRestoreWorker,
+				  DATA_DIR_SYNC_METHOD_FSYNC);
 
 	return (Archive *) AH;
 }
@@ -2233,7 +2236,7 @@ static ArchiveHandle *
 _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 		 const pg_compress_specification compression_spec,
 		 bool dosync, ArchiveMode mode,
-		 SetupWorkerPtrType setupWorkerPtr)
+		 SetupWorkerPtrType setupWorkerPtr, DataDirSyncMethod sync_method)
 {
 	ArchiveHandle *AH;
 	CompressFileHandle *CFH;
@@ -2287,6 +2290,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 	AH->mode = mode;
 	AH->compression_spec = compression_spec;
 	AH->dosync = dosync;
+	AH->sync_method = sync_method;
 
 	memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
 
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 18b38c17ab..b07673933d 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -312,6 +312,7 @@ struct _archiveHandle
 	pg_compress_specification compression_spec; /* Requested specification for
 												 * compression */
 	bool		dosync;			/* data requested to be synced on sight */
+	DataDirSyncMethod sync_method;
 	ArchiveMode mode;			/* File mode - r or w */
 	void	   *formatData;		/* Header data specific to file format */
 
diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c
index 7f2ac7c7fd..6faa3a511f 100644
--- a/src/bin/pg_dump/pg_backup_directory.c
+++ b/src/bin/pg_dump/pg_backup_directory.c
@@ -613,7 +613,7 @@ _CloseArchive(ArchiveHandle *AH)
 		 * individually. Just recurse once through all the files generated.
 		 */
 		if (AH->dosync)
-			fsync_dir_recurse(ctx->directory);
+			fsync_dir_recurse(ctx->directory, AH->sync_method);
 	}
 	AH->FH = NULL;
 }
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 65f64c282d..39a468b131 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -357,6 +357,7 @@ main(int argc, char **argv)
 	char	   *compression_algorithm_str = "none";
 	char	   *error_detail = NULL;
 	bool		user_compression_defined = false;
+	DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 	static DumpOptions dopt;
 
@@ -777,7 +778,7 @@ main(int argc, char **argv)
 
 	/* Open the output file */
 	fout = CreateArchive(filename, archiveFormat, compression_spec,
-						 dosync, archiveMode, setupDumpWorker);
+						 dosync, archiveMode, setupDumpWorker, sync_method);
 
 	/* Make dump options accessible right away */
 	SetArchiveOptions(fout, &dopt, NULL);
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 25996b4da4..451fb1856e 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -296,7 +296,7 @@ sync_target_dir(void)
 	if (!do_sync || dry_run)
 		return;
 
-	fsync_pgdata(datadir_target, PG_VERSION_NUM);
+	fsync_pgdata(datadir_target, PG_VERSION_NUM, sync_method);
 }
 
 
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 7f69f02441..bdfacf3263 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -74,6 +74,7 @@ bool		showprogress = false;
 bool		dry_run = false;
 bool		do_sync = true;
 bool		restore_wal = false;
+DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 /* Target history */
 TimeLineHistoryEntry *targetHistory;
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ef8bdc1fbb..05729adfef 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -13,6 +13,7 @@
 
 #include "access/timeline.h"
 #include "common/logging.h"
+#include "common/file_utils.h"
 #include "datapagemap.h"
 #include "libpq-fe.h"
 #include "storage/block.h"
@@ -24,6 +25,7 @@ extern bool showprogress;
 extern bool dry_run;
 extern bool do_sync;
 extern int	WalSegSz;
+extern DataDirSyncMethod sync_method;
 
 /* Target history */
 extern TimeLineHistoryEntry *targetHistory;
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 74833c4acb..05c73c0bb7 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -51,6 +51,31 @@ static void walkdir(const char *path,
 					int (*action) (const char *fname, bool isdir),
 					bool process_symlinks);
 
+#ifdef HAVE_SYNCFS
+static void
+do_syncfs(const char *path)
+{
+	int			fd;
+
+	fd = open(path, O_RDONLY, 0);
+
+	if (fd < 0)
+	{
+		pg_log_error("could not open file \"%s\": %m", path);
+		return;
+	}
+
+	if (syncfs(fd) < 0)
+	{
+		pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+		(void) close(fd);
+		exit(EXIT_FAILURE);
+	}
+
+	(void) close(fd);
+}
+#endif
+
 /*
  * Issue fsync recursively on PGDATA and all its contents.
  *
@@ -63,7 +88,8 @@ static void walkdir(const char *path,
  */
 void
 fsync_pgdata(const char *pg_data,
-			 int serverVersion)
+			 int serverVersion,
+			 DataDirSyncMethod sync_method)
 {
 	bool		xlog_is_symlink;
 	char		pg_wal[MAXPGPATH];
@@ -89,30 +115,93 @@ fsync_pgdata(const char *pg_data,
 			xlog_is_symlink = true;
 	}
 
-	/*
-	 * If possible, hint to the kernel that we're soon going to fsync the data
-	 * directory and its contents.
-	 */
+	switch (sync_method)
+	{
+		case DATA_DIR_SYNC_METHOD_SYNCFS:
+			{
+#ifndef HAVE_SYNCFS
+				pg_log_error("this build does not support sync method \"%s\"",
+							 "syncfs");
+				exit(EXIT_FAILURE);
+#else
+				DIR		   *dir;
+				struct dirent *de;
+
+				/*
+				 * On Linux, we don't have to open every single file one by
+				 * one.  We can use syncfs() to sync whole filesystems.  We
+				 * only expect filesystem boundaries to exist where we
+				 * tolerate symlinks, namely pg_wal and the tablespaces, so we
+				 * call syncfs() for each of those directories.
+				 */
+
+				/* Sync the top level pgdata directory. */
+				do_syncfs(pg_data);
+
+				/* If any tablespaces are configured, sync each of those. */
+				dir = opendir(pg_tblspc);
+				if (dir == NULL)
+					pg_log_error("could not open directory \"%s\": %m",
+								 pg_tblspc);
+				else
+				{
+					while (errno = 0, (de = readdir(dir)) != NULL)
+					{
+						char		subpath[MAXPGPATH * 2];
+
+						if (strcmp(de->d_name, ".") == 0 ||
+							strcmp(de->d_name, "..") == 0)
+							continue;
+
+						snprintf(subpath, sizeof(subpath), "%s/%s",
+								 pg_tblspc, de->d_name);
+						do_syncfs(subpath);
+					}
+
+					if (errno)
+						pg_log_error("could not read directory \"%s\": %m",
+									 pg_tblspc);
+
+					(void) closedir(dir);
+				}
+
+				/* If pg_wal is a symlink, process that too. */
+				if (xlog_is_symlink)
+					do_syncfs(pg_wal);
+#endif							/* HAVE_SYNCFS */
+			}
+			break;
+
+		case DATA_DIR_SYNC_METHOD_FSYNC:
+			{
+				/*
+				 * If possible, hint to the kernel that we're soon going to
+				 * fsync the data directory and its contents.
+				 */
 #ifdef PG_FLUSH_DATA_WORKS
-	walkdir(pg_data, pre_sync_fname, false);
-	if (xlog_is_symlink)
-		walkdir(pg_wal, pre_sync_fname, false);
-	walkdir(pg_tblspc, pre_sync_fname, true);
+				walkdir(pg_data, pre_sync_fname, false);
+				if (xlog_is_symlink)
+					walkdir(pg_wal, pre_sync_fname, false);
+				walkdir(pg_tblspc, pre_sync_fname, true);
 #endif
 
-	/*
-	 * Now we do the fsync()s in the same order.
-	 *
-	 * The main call ignores symlinks, so in addition to specially processing
-	 * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
-	 * process_symlinks = true.  Note that if there are any plain directories
-	 * in pg_tblspc, they'll get fsync'd twice.  That's not an expected case
-	 * so we don't worry about optimizing it.
-	 */
-	walkdir(pg_data, fsync_fname, false);
-	if (xlog_is_symlink)
-		walkdir(pg_wal, fsync_fname, false);
-	walkdir(pg_tblspc, fsync_fname, true);
+				/*
+				 * Now we do the fsync()s in the same order.
+				 *
+				 * The main call ignores symlinks, so in addition to specially
+				 * processing pg_wal if it's a symlink, pg_tblspc has to be
+				 * visited separately with process_symlinks = true.  Note that
+				 * if there are any plain directories in pg_tblspc, they'll
+				 * get fsync'd twice. That's not an expected case so we don't
+				 * worry about optimizing it.
+				 */
+				walkdir(pg_data, fsync_fname, false);
+				if (xlog_is_symlink)
+					walkdir(pg_wal, fsync_fname, false);
+				walkdir(pg_tblspc, fsync_fname, true);
+			}
+			break;
+	}
 }
 
 /*
@@ -121,17 +210,40 @@ fsync_pgdata(const char *pg_data,
  * This is a convenient wrapper on top of walkdir().
  */
 void
-fsync_dir_recurse(const char *dir)
+fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
 {
-	/*
-	 * If possible, hint to the kernel that we're soon going to fsync the data
-	 * directory and its contents.
-	 */
+	switch (sync_method)
+	{
+		case DATA_DIR_SYNC_METHOD_SYNCFS:
+			{
+#ifndef HAVE_SYNCFS
+				pg_log_error("this build does not support sync method \"%s\"",
+							 "syncfs");
+				exit(EXIT_FAILURE);
+#else
+				/*
+				 * On Linux, we don't have to open every single file one by
+				 * one.  We can use syncfs() to sync the whole filesystem.
+				 */
+				do_syncfs(dir);
+#endif							/* HAVE_SYNCFS */
+			}
+			break;
+
+		case DATA_DIR_SYNC_METHOD_FSYNC:
+			{
+				/*
+				 * If possible, hint to the kernel that we're soon going to
+				 * fsync the data directory and its contents.
+				 */
 #ifdef PG_FLUSH_DATA_WORKS
-	walkdir(dir, pre_sync_fname, false);
+				walkdir(dir, pre_sync_fname, false);
 #endif
 
-	walkdir(dir, fsync_fname, false);
+				walkdir(dir, fsync_fname, false);
+			}
+			break;
+	}
 }
 
 /*
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index 7da21f15e6..cae6159bf6 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -34,8 +34,9 @@ struct iovec;					/* avoid including port/pg_iovec.h here */
 
 #ifdef FRONTEND
 extern int	fsync_fname(const char *fname, bool isdir);
-extern void fsync_pgdata(const char *pg_data, int serverVersion);
-extern void fsync_dir_recurse(const char *dir);
+extern void fsync_pgdata(const char *pg_data, int serverVersion,
+						 DataDirSyncMethod sync_method);
+extern void fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
 #endif
-- 
2.25.1

v8-0004-allow-syncfs-in-frontend-utilities.patchtext/x-diff; charset=us-asciiDownload
From 49a47f48b247acb33cb1538108b641b4cb8131ee Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Fri, 28 Jul 2023 15:56:24 -0700
Subject: [PATCH v8 4/4] allow syncfs in frontend utilities

---
 doc/src/sgml/config.sgml              | 12 +++------
 doc/src/sgml/filelist.sgml            |  1 +
 doc/src/sgml/postgres.sgml            |  1 +
 doc/src/sgml/ref/initdb.sgml          | 22 ++++++++++++++++
 doc/src/sgml/ref/pg_basebackup.sgml   | 25 +++++++++++++++++++
 doc/src/sgml/ref/pg_checksums.sgml    | 22 ++++++++++++++++
 doc/src/sgml/ref/pg_dump.sgml         | 21 ++++++++++++++++
 doc/src/sgml/ref/pg_rewind.sgml       | 22 ++++++++++++++++
 doc/src/sgml/ref/pgupgrade.sgml       | 23 +++++++++++++++++
 doc/src/sgml/syncfs.sgml              | 36 +++++++++++++++++++++++++++
 src/bin/initdb/initdb.c               |  6 +++++
 src/bin/initdb/t/001_initdb.pl        | 14 +++++++++++
 src/bin/pg_basebackup/pg_basebackup.c |  7 ++++++
 src/bin/pg_checksums/pg_checksums.c   |  6 +++++
 src/bin/pg_dump/pg_dump.c             |  7 ++++++
 src/bin/pg_rewind/pg_rewind.c         |  8 ++++++
 src/bin/pg_upgrade/option.c           | 13 ++++++++++
 src/bin/pg_upgrade/pg_upgrade.c       |  6 +++--
 src/bin/pg_upgrade/pg_upgrade.h       |  1 +
 src/fe_utils/option_utils.c           | 24 ++++++++++++++++++
 src/include/fe_utils/option_utils.h   |  4 +++
 21 files changed, 270 insertions(+), 11 deletions(-)
 create mode 100644 doc/src/sgml/syncfs.sgml

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 694d667bf9..2c7d9f1262 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -10580,15 +10580,9 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
         On Linux, <literal>syncfs</literal> may be used instead, to ask the
         operating system to synchronize the whole file systems that contain the
         data directory, the WAL files and each tablespace (but not any other
-        file systems that may be reachable through symbolic links).  This may
-        be a lot faster than the <literal>fsync</literal> setting, because it
-        doesn't need to open each file one by one.  On the other hand, it may
-        be slower if a file system is shared by other applications that
-        modify a lot of files, since those files will also be written to disk.
-        Furthermore, on versions of Linux before 5.8, I/O errors encountered
-        while writing data to disk may not be reported to
-        <productname>PostgreSQL</productname>, and relevant error messages may
-        appear only in kernel logs.
+        file systems that may be reachable through symbolic links).  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
        </para>
        <para>
         This parameter can only be set in the
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index 63b0fc2a46..df5d7c3025 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -184,6 +184,7 @@
 <!ENTITY acronyms   SYSTEM "acronyms.sgml">
 <!ENTITY glossary   SYSTEM "glossary.sgml">
 <!ENTITY color      SYSTEM "color.sgml">
+<!ENTITY syncfs     SYSTEM "syncfs.sgml">
 
 <!ENTITY features-supported   SYSTEM "features-supported.sgml">
 <!ENTITY features-unsupported SYSTEM "features-unsupported.sgml">
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index 2e271862fc..f629524be0 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -294,6 +294,7 @@ break is not needed in a wider output rendering.
   &acronyms;
   &glossary;
   &color;
+  &syncfs;
   &obsolete;
 
  </part>
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 22f1011781..8a09c5c438 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -365,6 +365,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry id="app-initdb-option-sync-method">
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>initdb</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="app-initdb-option-sync-only">
       <term><option>-S</option></term>
       <term><option>--sync-only</option></term>
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 79d3e657c3..d2b8ddd200 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -594,6 +594,31 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_basebackup</command> will recursively open and synchronize
+        all files in the backup directory.  When the plain format is used, the
+        search for files will follow symbolic links for the WAL directory and
+        each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        backup directory.  When the plain format is used,
+        <command>pg_basebackup</command> will also synchronize the file systems
+        that contain the WAL files and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml
index a3d0b0f0a3..7b44ba71cf 100644
--- a/doc/src/sgml/ref/pg_checksums.sgml
+++ b/doc/src/sgml/ref/pg_checksums.sgml
@@ -139,6 +139,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_checksums</command> will recursively open and synchronize
+        all files in the data directory.  The search for files will follow
+        symbolic links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index a3cf0608f5..c1e2220b3c 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -1179,6 +1179,27 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_dump --format=directory</command> will recursively open and
+        synchronize all files in the archive directory.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        archive directory.  See <xref linkend="syncfs"/> for more information
+        about using <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used or
+        <option>--format</option> is not set to <literal>directory</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--table-and-children=<replaceable class="parameter">pattern</replaceable></option></term>
       <listitem>
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 15cddd086b..80dff16168 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -284,6 +284,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_rewind</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-V</option></term>
       <term><option>--version</option></term>
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 7816b4c685..6c033485ea 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -190,6 +190,29 @@ PostgreSQL documentation
       variable <envar>PGSOCKETDIR</envar></para></listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_upgrade</command> will recursively open and synchronize all
+        files in the upgraded cluster's data directory.  The search for files
+        will follow symbolic links for the WAL directory and each configured
+        tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        upgrade cluster's data directory, its WAL files, and each tablespace.
+        See <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-U</option> <replaceable>username</replaceable></term>
       <term><option>--username=</option><replaceable>username</replaceable></term>
diff --git a/doc/src/sgml/syncfs.sgml b/doc/src/sgml/syncfs.sgml
new file mode 100644
index 0000000000..00457d2457
--- /dev/null
+++ b/doc/src/sgml/syncfs.sgml
@@ -0,0 +1,36 @@
+<!-- doc/src/sgml/syncfs.sgml -->
+
+<appendix id="syncfs">
+ <title><function>syncfs()</function> Caveats</title>
+
+ <indexterm zone="syncfs">
+  <primary>syncfs</primary>
+ </indexterm>
+
+ <para>
+  On Linux <function>syncfs()</function> may be specified for some
+  configuration parameters (e.g.,
+  <xref linkend="guc-recovery-init-sync-method"/>), server applications (e.g.,
+  <application>pg_upgrade</application>), and client applications (e.g.,
+  <application>pg_basebackup</application>) that involve synchronizing many
+  files to disk.  <function>syncfs()</function> is advantageous in many cases,
+  but there are some trade-offs to keep in mind.
+ </para>
+
+ <para>
+  Since <function>syncfs()</function> instructs the operating system to
+  synchronize a whole file system, it typically requires many fewer system
+  calls than using <function>fsync()</function> to synchronize each file one by
+  one.  Therefore, using <function>syncfs()</function> may be a lot faster than
+  using <function>fsync()</function>.  However, it may be slower if a file
+  system is shared by other applications that modify a lot of files, since
+  those files will also be written to disk.
+ </para>
+
+ <para>
+  Furthermore, on versions of Linux before 5.8, I/O errors encountered while
+  writing data to disk may not be reported to the calling program, and relevant
+  error messages may appear only in kernel logs.
+ </para>
+
+</appendix>
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index bbea1e412b..543927b8b4 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2467,6 +2467,7 @@ usage(const char *progname)
 	printf(_("  -N, --no-sync             do not wait for changes to be written safely to disk\n"));
 	printf(_("      --no-instructions     do not print instructions for next steps\n"));
 	printf(_("  -s, --show                show internal settings\n"));
+	printf(_("      --sync-method=METHOD  set method for syncing files to disk\n"));
 	printf(_("  -S, --sync-only           only sync database files to disk, then exit\n"));
 	printf(_("\nOther options:\n"));
 	printf(_("  -V, --version             output version information, then exit\n"));
@@ -3107,6 +3108,7 @@ main(int argc, char *argv[])
 		{"locale-provider", required_argument, NULL, 15},
 		{"icu-locale", required_argument, NULL, 16},
 		{"icu-rules", required_argument, NULL, 17},
+		{"sync-method", required_argument, NULL, 18},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -3287,6 +3289,10 @@ main(int argc, char *argv[])
 			case 17:
 				icu_rules = pg_strdup(optarg);
 				break;
+			case 18:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index 2d7469d2fc..2f2c3faba0 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -16,6 +16,7 @@ use Test::More;
 my $tempdir = PostgreSQL::Test::Utils::tempdir;
 my $xlogdir = "$tempdir/pgxlog";
 my $datadir = "$tempdir/data";
+my $supports_syncfs = check_pg_config("#define HAVE_SYNCFS 1");
 
 program_help_ok('initdb');
 program_version_ok('initdb');
@@ -82,6 +83,19 @@ command_fails([ 'pg_checksums', '-D', $datadir ],
 command_ok([ 'initdb', '-S', $datadir ], 'sync only');
 command_fails([ 'initdb', $datadir ], 'existing data directory');
 
+command_ok([ 'initdb', '-S', $datadir, '--sync-method', 'fsync' ],
+	'sync method fsync');
+if ($supports_syncfs)
+{
+	command_ok([ 'initdb', '-S', $datadir, '--sync-method', 'syncfs' ],
+		'sync method syncfs');
+}
+else
+{
+	command_fails([ 'initdb', '-S', $datadir, '--sync-method', 'syncfs' ],
+		'sync method syncfs');
+}
+
 # Check group access on PGDATA
 SKIP:
 {
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1a6eacf6d5..977c793a67 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -425,6 +425,8 @@ usage(void)
 	printf(_("      --no-slot          prevent creation of temporary replication slot\n"));
 	printf(_("      --no-verify-checksums\n"
 			 "                         do not verify checksums\n"));
+	printf(_("      --sync-method=METHOD\n"
+			 "                         set method for syncing files to disk\n"));
 	printf(_("  -?, --help             show this help, then exit\n"));
 	printf(_("\nConnection options:\n"));
 	printf(_("  -d, --dbname=CONNSTR   connection string\n"));
@@ -2282,6 +2284,7 @@ main(int argc, char **argv)
 		{"no-manifest", no_argument, NULL, 5},
 		{"manifest-force-encode", no_argument, NULL, 6},
 		{"manifest-checksums", required_argument, NULL, 7},
+		{"sync-method", required_argument, NULL, 8},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -2453,6 +2456,10 @@ main(int argc, char **argv)
 			case 7:
 				manifest_checksums = pg_strdup(optarg);
 				break;
+			case 8:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 123450f483..9fb2c5b3c7 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -78,6 +78,7 @@ usage(void)
 	printf(_("  -f, --filenode=FILENODE  check only relation with specified filenode\n"));
 	printf(_("  -N, --no-sync            do not wait for changes to be written safely to disk\n"));
 	printf(_("  -P, --progress           show progress information\n"));
+	printf(_("      --sync-method=METHOD set method for syncing files to disk\n"));
 	printf(_("  -v, --verbose            output verbose messages\n"));
 	printf(_("  -V, --version            output version information, then exit\n"));
 	printf(_("  -?, --help               show this help, then exit\n"));
@@ -436,6 +437,7 @@ main(int argc, char *argv[])
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"verbose", no_argument, NULL, 'v'},
+		{"sync-method", required_argument, NULL, 1},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -494,6 +496,10 @@ main(int argc, char *argv[])
 			case 'v':
 				verbose = true;
 				break;
+			case 1:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 39a468b131..dc4a28e81e 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -432,6 +432,7 @@ main(int argc, char **argv)
 		{"table-and-children", required_argument, NULL, 12},
 		{"exclude-table-and-children", required_argument, NULL, 13},
 		{"exclude-table-data-and-children", required_argument, NULL, 14},
+		{"sync-method", required_argument, NULL, 15},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -658,6 +659,11 @@ main(int argc, char **argv)
 										  optarg);
 				break;
 
+			case 15:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit_nicely(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -1069,6 +1075,7 @@ help(const char *progname)
 			 "                               compress as specified\n"));
 	printf(_("  --lock-wait-timeout=TIMEOUT  fail after waiting TIMEOUT for a table lock\n"));
 	printf(_("  --no-sync                    do not wait for changes to be written safely to disk\n"));
+	printf(_("  --sync-method=METHOD         set method for syncing files to disk\n"));
 	printf(_("  -?, --help                   show this help, then exit\n"));
 
 	printf(_("\nOptions controlling the output content:\n"));
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index bdfacf3263..bfd44a284e 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -22,6 +22,7 @@
 #include "common/file_perm.h"
 #include "common/restricted_token.h"
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/recovery_gen.h"
 #include "fe_utils/string_utils.h"
 #include "file_ops.h"
@@ -108,6 +109,7 @@ usage(const char *progname)
 			 "                                 file when running target cluster\n"));
 	printf(_("      --debug                    write a lot of debug messages\n"));
 	printf(_("      --no-ensure-shutdown       do not automatically fix unclean shutdown\n"));
+	printf(_("      --sync-method=METHOD       set method for syncing files to disk\n"));
 	printf(_("  -V, --version                  output version information, then exit\n"));
 	printf(_("  -?, --help                     show this help, then exit\n"));
 	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
@@ -132,6 +134,7 @@ main(int argc, char **argv)
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
+		{"sync-method", required_argument, NULL, 6},
 		{NULL, 0, NULL, 0}
 	};
 	int			option_index;
@@ -219,6 +222,11 @@ main(int argc, char **argv)
 				config_file = pg_strdup(optarg);
 				break;
 
+			case 6:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 640361009e..b9d900d0db 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -14,6 +14,7 @@
 #endif
 
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "getopt_long.h"
 #include "pg_upgrade.h"
 #include "utils/pidfile.h"
@@ -57,12 +58,14 @@ parseCommandLine(int argc, char *argv[])
 		{"verbose", no_argument, NULL, 'v'},
 		{"clone", no_argument, NULL, 1},
 		{"copy", no_argument, NULL, 2},
+		{"sync-method", required_argument, NULL, 3},
 
 		{NULL, 0, NULL, 0}
 	};
 	int			option;			/* Command line option */
 	int			optindex = 0;	/* used by getopt_long */
 	int			os_user_effective_id;
+	DataDirSyncMethod unused;
 
 	user_opts.do_sync = true;
 	user_opts.transfer_mode = TRANSFER_MODE_COPY;
@@ -199,6 +202,12 @@ parseCommandLine(int argc, char *argv[])
 				user_opts.transfer_mode = TRANSFER_MODE_COPY;
 				break;
 
+			case 3:
+				if (!parse_sync_method(optarg, &unused))
+					exit(1);
+				user_opts.sync_method = pg_strdup(optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
 						os_info.progname);
@@ -209,6 +218,9 @@ parseCommandLine(int argc, char *argv[])
 	if (optind < argc)
 		pg_fatal("too many command-line arguments (first is \"%s\")", argv[optind]);
 
+	if (!user_opts.sync_method)
+		user_opts.sync_method = pg_strdup("fsync");
+
 	if (log_opts.verbose)
 		pg_log(PG_REPORT, "Running in verbose mode");
 
@@ -289,6 +301,7 @@ usage(void)
 	printf(_("  -V, --version                 display version information, then exit\n"));
 	printf(_("  --clone                       clone instead of copying files to new cluster\n"));
 	printf(_("  --copy                        copy files to new cluster (default)\n"));
+	printf(_("  --sync-method=METHOD          set method for syncing files to disk\n"));
 	printf(_("  -?, --help                    show this help, then exit\n"));
 	printf(_("\n"
 			 "Before running pg_upgrade you must:\n"
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 4562dafcff..96bfb67167 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -192,8 +192,10 @@ main(int argc, char **argv)
 	{
 		prep_status("Sync data directory to disk");
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-				  "\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir,
-				  new_cluster.pgdata);
+				  "\"%s/initdb\" --sync-only \"%s\" --sync-method %s",
+				  new_cluster.bindir,
+				  new_cluster.pgdata,
+				  user_opts.sync_method);
 		check_ok();
 	}
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 7afa96716e..842f3b6cd3 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -304,6 +304,7 @@ typedef struct
 	transferMode transfer_mode; /* copy files or link them? */
 	int			jobs;			/* number of processes/threads to use */
 	char	   *socketdir;		/* directory to use for Unix sockets */
+	char	   *sync_method;
 } UserOpts;
 
 typedef struct
diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c
index 763c991015..36ac689964 100644
--- a/src/fe_utils/option_utils.c
+++ b/src/fe_utils/option_utils.c
@@ -82,3 +82,27 @@ option_parse_int(const char *optarg, const char *optname,
 		*result = val;
 	return true;
 }
+
+bool
+parse_sync_method(const char *optarg, DataDirSyncMethod *sync_method)
+{
+	if (strcmp(optarg, "fsync") == 0)
+		*sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
+	else if (strcmp(optarg, "syncfs") == 0)
+	{
+#ifdef HAVE_SYNCFS
+		*sync_method = DATA_DIR_SYNC_METHOD_SYNCFS;
+#else
+		pg_log_error("this build does not support sync method \"%s\"",
+					 "syncfs");
+		return false;
+#endif
+	}
+	else
+	{
+		pg_log_error("unrecognized sync method: %s", optarg);
+		return false;
+	}
+
+	return true;
+}
diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h
index b7b0654cee..6f3a965916 100644
--- a/src/include/fe_utils/option_utils.h
+++ b/src/include/fe_utils/option_utils.h
@@ -14,6 +14,8 @@
 
 #include "postgres_fe.h"
 
+#include "common/file_utils.h"
+
 typedef void (*help_handler) (const char *progname);
 
 extern void handle_help_version_opts(int argc, char *argv[],
@@ -22,5 +24,7 @@ extern void handle_help_version_opts(int argc, char *argv[],
 extern bool option_parse_int(const char *optarg, const char *optname,
 							 int min_range, int max_range,
 							 int *result);
+extern bool parse_sync_method(const char *optarg,
+							  DataDirSyncMethod *sync_method);
 
 #endif							/* OPTION_UTILS_H */
-- 
2.25.1

#31Michael Paquier
michael@paquier.xyz
In reply to: Nathan Bossart (#30)
Re: should frontend tools use syncfs() ?

On Thu, Aug 31, 2023 at 08:48:58AM -0700, Nathan Bossart wrote:

On Thu, Aug 31, 2023 at 02:30:33PM +0900, Michael Paquier wrote:

- Should we have some regression tests? We should only need one test
in one of the binaries to be able to stress the new code paths of
file_utils.c with syncfs. The cheapest one may be pg_dump with a
dump in directory format? Note that we have tests there that depend
on lz4 or gzip existing, which are conditional.

I added one for initdb in v8.

+my $supports_syncfs = check_pg_config("#define HAVE_SYNCFS 1");

That should be OK this way. The extra running time is not really
visible, right?

+command_ok([ 'initdb', '-S', $datadir, '--sync-method', 'fsync' ],
+   'sync method fsync');

Removing this one may be fine, actually, because we test the sync
paths on other places like pg_dump.

Ha, I was just thinking about this, too. I actually split it into 3
patches. The first adds DataDirSyncMethod and uses it for
recovery_init_sync_method. The second adds syncfs() support in
file_utils.c. And the third adds the ability to specify syncfs in the
frontend utilities. WDYT?

This split is OK by me, so WFM.
--
Michael

#32Nathan Bossart
nathandbossart@gmail.com
In reply to: Michael Paquier (#31)
4 attachment(s)
Re: should frontend tools use syncfs() ?

On Fri, Sep 01, 2023 at 10:40:12AM +0900, Michael Paquier wrote:

That should be OK this way. The extra running time is not really
visible, right?

AFAICT it is negligible. Presumably it could take a little longer if there
is a lot to sync on the file system, but I don't know if that's worth
worrying about.

+command_ok([ 'initdb', '-S', $datadir, '--sync-method', 'fsync' ],
+   'sync method fsync');

Removing this one may be fine, actually, because we test the sync
paths on other places like pg_dump.

Done.

This split is OK by me, so WFM.

Cool.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

Attachments:

v9-0001-move-PG_TEMP_FILE-macros-to-file_utils.h.patchtext/x-diff; charset=us-asciiDownload
From bc2210a3cee0a852b65f74b544a0c0d23c59b78f Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Mon, 21 Aug 2023 19:05:14 -0700
Subject: [PATCH v9 1/4] move PG_TEMP_FILE* macros to file_utils.h

---
 src/backend/backup/basebackup.c     |  2 +-
 src/backend/postmaster/postmaster.c |  1 +
 src/backend/storage/file/fileset.c  |  1 +
 src/bin/pg_checksums/pg_checksums.c | 10 ----------
 src/bin/pg_rewind/filemap.c         |  2 +-
 src/include/common/file_utils.h     |  4 ++++
 src/include/storage/fd.h            |  4 ----
 7 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c
index 45be21131c..5d66014499 100644
--- a/src/backend/backup/basebackup.c
+++ b/src/backend/backup/basebackup.c
@@ -25,6 +25,7 @@
 #include "commands/defrem.h"
 #include "common/compression.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "lib/stringinfo.h"
 #include "miscadmin.h"
 #include "nodes/pg_list.h"
@@ -37,7 +38,6 @@
 #include "storage/bufpage.h"
 #include "storage/checksum.h"
 #include "storage/dsm_impl.h"
-#include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/reinit.h"
 #include "utils/builtins.h"
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index d7bfb28ff3..54e9bfb8c4 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -94,6 +94,7 @@
 #include "access/xlogrecovery.h"
 #include "catalog/pg_control.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "common/ip.h"
 #include "common/pg_prng.h"
 #include "common/string.h"
diff --git a/src/backend/storage/file/fileset.c b/src/backend/storage/file/fileset.c
index e9951b0269..84cae32548 100644
--- a/src/backend/storage/file/fileset.c
+++ b/src/backend/storage/file/fileset.c
@@ -25,6 +25,7 @@
 
 #include "catalog/pg_tablespace.h"
 #include "commands/tablespace.h"
+#include "common/file_utils.h"
 #include "common/hashfn.h"
 #include "miscadmin.h"
 #include "storage/ipc.h"
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 19eb67e485..9011a19b4e 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -52,16 +52,6 @@ typedef enum
 	PG_MODE_ENABLE
 } PgChecksumMode;
 
-/*
- * Filename components.
- *
- * XXX: fd.h is not declared here as frontend side code is not able to
- * interact with the backend-side definitions for the various fsync
- * wrappers.
- */
-#define PG_TEMP_FILES_DIR "pgsql_tmp"
-#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
-
 static PgChecksumMode mode = PG_MODE_CHECK;
 
 static const char *progname;
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index bd5c598e20..58280d9abc 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -27,12 +27,12 @@
 #include <unistd.h>
 
 #include "catalog/pg_tablespace_d.h"
+#include "common/file_utils.h"
 #include "common/hashfn.h"
 #include "common/string.h"
 #include "datapagemap.h"
 #include "filemap.h"
 #include "pg_rewind.h"
-#include "storage/fd.h"
 
 /*
  * Define a hash table which we can use to store information about the files
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index b7efa1226d..dd1532bcb0 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -46,4 +46,8 @@ extern ssize_t pg_pwritev_with_retry(int fd,
 
 extern ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset);
 
+/* Filename components */
+#define PG_TEMP_FILES_DIR "pgsql_tmp"
+#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
+
 #endif							/* FILE_UTILS_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 6791a406fc..aea30c0622 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -195,8 +195,4 @@ extern int	durable_unlink(const char *fname, int elevel);
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
-/* Filename components */
-#define PG_TEMP_FILES_DIR "pgsql_tmp"
-#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
-
 #endif							/* FD_H */
-- 
2.25.1

v9-0002-make-common-enum-for-sync-methods.patchtext/x-diff; charset=us-asciiDownload
From ae51ea0182e88398a5c8ebd644fea9e98229e1d0 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Thu, 31 Aug 2023 07:38:42 -0700
Subject: [PATCH v9 2/4] make common enum for sync methods

---
 src/backend/storage/file/fd.c       | 4 ++--
 src/backend/utils/misc/guc_tables.c | 7 ++++---
 src/include/common/file_utils.h     | 6 ++++++
 src/include/storage/fd.h            | 6 ------
 src/tools/pgindent/typedefs.list    | 1 +
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index b490a76ba7..3fed475c38 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -162,7 +162,7 @@ int			max_safe_fds = FD_MINFREE;	/* default if not changed */
 bool		data_sync_retry = false;
 
 /* How SyncDataDirectory() should do its job. */
-int			recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC;
+int			recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 /* Which kinds of files should be opened with PG_O_DIRECT. */
 int			io_direct_flags;
@@ -3513,7 +3513,7 @@ SyncDataDirectory(void)
 	}
 
 #ifdef HAVE_SYNCFS
-	if (recovery_init_sync_method == RECOVERY_INIT_SYNC_METHOD_SYNCFS)
+	if (recovery_init_sync_method == DATA_DIR_SYNC_METHOD_SYNCFS)
 	{
 		DIR		   *dir;
 		struct dirent *de;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index e565a3092f..5abebe9a9c 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -41,6 +41,7 @@
 #include "commands/trigger.h"
 #include "commands/user.h"
 #include "commands/vacuum.h"
+#include "common/file_utils.h"
 #include "common/scram-common.h"
 #include "jit/jit.h"
 #include "libpq/auth.h"
@@ -430,9 +431,9 @@ StaticAssertDecl(lengthof(ssl_protocol_versions_info) == (PG_TLS1_3_VERSION + 2)
 				 "array length mismatch");
 
 static const struct config_enum_entry recovery_init_sync_method_options[] = {
-	{"fsync", RECOVERY_INIT_SYNC_METHOD_FSYNC, false},
+	{"fsync", DATA_DIR_SYNC_METHOD_FSYNC, false},
 #ifdef HAVE_SYNCFS
-	{"syncfs", RECOVERY_INIT_SYNC_METHOD_SYNCFS, false},
+	{"syncfs", DATA_DIR_SYNC_METHOD_SYNCFS, false},
 #endif
 	{NULL, 0, false}
 };
@@ -4964,7 +4965,7 @@ struct config_enum ConfigureNamesEnum[] =
 			gettext_noop("Sets the method for synchronizing the data directory before crash recovery."),
 		},
 		&recovery_init_sync_method,
-		RECOVERY_INIT_SYNC_METHOD_FSYNC, recovery_init_sync_method_options,
+		DATA_DIR_SYNC_METHOD_FSYNC, recovery_init_sync_method_options,
 		NULL, NULL, NULL
 	},
 
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index dd1532bcb0..7da21f15e6 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -24,6 +24,12 @@ typedef enum PGFileType
 	PGFILETYPE_LNK
 } PGFileType;
 
+typedef enum DataDirSyncMethod
+{
+	DATA_DIR_SYNC_METHOD_FSYNC,
+	DATA_DIR_SYNC_METHOD_SYNCFS
+} DataDirSyncMethod;
+
 struct iovec;					/* avoid including port/pg_iovec.h here */
 
 #ifdef FRONTEND
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index aea30c0622..d9d5d9da5f 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -46,12 +46,6 @@
 #include <dirent.h>
 #include <fcntl.h>
 
-typedef enum RecoveryInitSyncMethod
-{
-	RECOVERY_INIT_SYNC_METHOD_FSYNC,
-	RECOVERY_INIT_SYNC_METHOD_SYNCFS
-}			RecoveryInitSyncMethod;
-
 typedef int File;
 
 
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 49a33c0387..fe571c6265 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -545,6 +545,7 @@ DR_printtup
 DR_sqlfunction
 DR_transientrel
 DWORD
+DataDirSyncMethod
 DataDumperPtr
 DataPageDeleteStack
 DatabaseInfo
-- 
2.25.1

v9-0003-add-support-for-syncfs-in-frontend-support-functi.patchtext/x-diff; charset=us-asciiDownload
From 0176762890bc8ddc02e26696ba4414c6c8bac2e5 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Thu, 31 Aug 2023 07:59:52 -0700
Subject: [PATCH v9 3/4] add support for syncfs in frontend support functions

---
 src/bin/initdb/initdb.c               |   5 +-
 src/bin/pg_basebackup/pg_basebackup.c |   5 +-
 src/bin/pg_checksums/pg_checksums.c   |   3 +-
 src/bin/pg_dump/pg_backup.h           |   4 +-
 src/bin/pg_dump/pg_backup_archiver.c  |  14 ++-
 src/bin/pg_dump/pg_backup_archiver.h  |   1 +
 src/bin/pg_dump/pg_backup_directory.c |   2 +-
 src/bin/pg_dump/pg_dump.c             |   3 +-
 src/bin/pg_rewind/file_ops.c          |   2 +-
 src/bin/pg_rewind/pg_rewind.c         |   1 +
 src/bin/pg_rewind/pg_rewind.h         |   2 +
 src/common/file_utils.c               | 170 +++++++++++++++++++++-----
 src/include/common/file_utils.h       |   5 +-
 13 files changed, 172 insertions(+), 45 deletions(-)

diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 905b979947..bbea1e412b 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -165,6 +165,7 @@ static bool show_setting = false;
 static bool data_checksums = false;
 static char *xlog_dir = NULL;
 static int	wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024);
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 
 /* internal vars */
@@ -3333,7 +3334,7 @@ main(int argc, char *argv[])
 
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 		return 0;
 	}
@@ -3396,7 +3397,7 @@ main(int argc, char *argv[])
 	{
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		fsync_pgdata(pg_data, PG_VERSION_NUM);
+		fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
 		check_ok();
 	}
 	else
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1dc8efe0cb..1a6eacf6d5 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -148,6 +148,7 @@ static bool verify_checksums = true;
 static bool manifest = true;
 static bool manifest_force_encode = false;
 static char *manifest_checksums = NULL;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 static bool success = false;
 static bool made_new_pgdata = false;
@@ -2199,11 +2200,11 @@ BaseBackup(char *compression_algorithm, char *compression_detail,
 		if (format == 't')
 		{
 			if (strcmp(basedir, "-") != 0)
-				(void) fsync_dir_recurse(basedir);
+				(void) fsync_dir_recurse(basedir, sync_method);
 		}
 		else
 		{
-			(void) fsync_pgdata(basedir, serverVersion);
+			(void) fsync_pgdata(basedir, serverVersion, sync_method);
 		}
 	}
 
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 9011a19b4e..123450f483 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -44,6 +44,7 @@ static char *only_filenode = NULL;
 static bool do_sync = true;
 static bool verbose = false;
 static bool showprogress = false;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 typedef enum
 {
@@ -623,7 +624,7 @@ main(int argc, char *argv[])
 		if (do_sync)
 		{
 			pg_log_info("syncing data directory");
-			fsync_pgdata(DataDir, PG_VERSION_NUM);
+			fsync_pgdata(DataDir, PG_VERSION_NUM, sync_method);
 		}
 
 		pg_log_info("updating control file");
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index aba780ef4b..3a57cdd97d 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -24,6 +24,7 @@
 #define PG_BACKUP_H
 
 #include "common/compression.h"
+#include "common/file_utils.h"
 #include "fe_utils/simple_list.h"
 #include "libpq-fe.h"
 
@@ -307,7 +308,8 @@ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt);
 extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 							  const pg_compress_specification compression_spec,
 							  bool dosync, ArchiveMode mode,
-							  SetupWorkerPtrType setupDumpWorker);
+							  SetupWorkerPtrType setupDumpWorker,
+							  DataDirSyncMethod sync_method);
 
 /* The --list option */
 extern void PrintTOCSummary(Archive *AHX);
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 39ebcfec32..4d83381d84 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -66,7 +66,8 @@ typedef struct _parallelReadyList
 static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
 							   const pg_compress_specification compression_spec,
 							   bool dosync, ArchiveMode mode,
-							   SetupWorkerPtrType setupWorkerPtr);
+							   SetupWorkerPtrType setupWorkerPtr,
+							   DataDirSyncMethod sync_method);
 static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te);
 static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData);
 static char *sanitize_line(const char *str, bool want_hyphen);
@@ -238,11 +239,12 @@ Archive *
 CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
 			  const pg_compress_specification compression_spec,
 			  bool dosync, ArchiveMode mode,
-			  SetupWorkerPtrType setupDumpWorker)
+			  SetupWorkerPtrType setupDumpWorker,
+			  DataDirSyncMethod sync_method)
 
 {
 	ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression_spec,
-								 dosync, mode, setupDumpWorker);
+								 dosync, mode, setupDumpWorker, sync_method);
 
 	return (Archive *) AH;
 }
@@ -257,7 +259,8 @@ OpenArchive(const char *FileSpec, const ArchiveFormat fmt)
 
 	compression_spec.algorithm = PG_COMPRESSION_NONE;
 	AH = _allocAH(FileSpec, fmt, compression_spec, true,
-				  archModeRead, setupRestoreWorker);
+				  archModeRead, setupRestoreWorker,
+				  DATA_DIR_SYNC_METHOD_FSYNC);
 
 	return (Archive *) AH;
 }
@@ -2233,7 +2236,7 @@ static ArchiveHandle *
 _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 		 const pg_compress_specification compression_spec,
 		 bool dosync, ArchiveMode mode,
-		 SetupWorkerPtrType setupWorkerPtr)
+		 SetupWorkerPtrType setupWorkerPtr, DataDirSyncMethod sync_method)
 {
 	ArchiveHandle *AH;
 	CompressFileHandle *CFH;
@@ -2287,6 +2290,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 	AH->mode = mode;
 	AH->compression_spec = compression_spec;
 	AH->dosync = dosync;
+	AH->sync_method = sync_method;
 
 	memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
 
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 18b38c17ab..b07673933d 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -312,6 +312,7 @@ struct _archiveHandle
 	pg_compress_specification compression_spec; /* Requested specification for
 												 * compression */
 	bool		dosync;			/* data requested to be synced on sight */
+	DataDirSyncMethod sync_method;
 	ArchiveMode mode;			/* File mode - r or w */
 	void	   *formatData;		/* Header data specific to file format */
 
diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c
index 7f2ac7c7fd..6faa3a511f 100644
--- a/src/bin/pg_dump/pg_backup_directory.c
+++ b/src/bin/pg_dump/pg_backup_directory.c
@@ -613,7 +613,7 @@ _CloseArchive(ArchiveHandle *AH)
 		 * individually. Just recurse once through all the files generated.
 		 */
 		if (AH->dosync)
-			fsync_dir_recurse(ctx->directory);
+			fsync_dir_recurse(ctx->directory, AH->sync_method);
 	}
 	AH->FH = NULL;
 }
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 65f64c282d..39a468b131 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -357,6 +357,7 @@ main(int argc, char **argv)
 	char	   *compression_algorithm_str = "none";
 	char	   *error_detail = NULL;
 	bool		user_compression_defined = false;
+	DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 	static DumpOptions dopt;
 
@@ -777,7 +778,7 @@ main(int argc, char **argv)
 
 	/* Open the output file */
 	fout = CreateArchive(filename, archiveFormat, compression_spec,
-						 dosync, archiveMode, setupDumpWorker);
+						 dosync, archiveMode, setupDumpWorker, sync_method);
 
 	/* Make dump options accessible right away */
 	SetArchiveOptions(fout, &dopt, NULL);
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 25996b4da4..451fb1856e 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -296,7 +296,7 @@ sync_target_dir(void)
 	if (!do_sync || dry_run)
 		return;
 
-	fsync_pgdata(datadir_target, PG_VERSION_NUM);
+	fsync_pgdata(datadir_target, PG_VERSION_NUM, sync_method);
 }
 
 
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 7f69f02441..bdfacf3263 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -74,6 +74,7 @@ bool		showprogress = false;
 bool		dry_run = false;
 bool		do_sync = true;
 bool		restore_wal = false;
+DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 /* Target history */
 TimeLineHistoryEntry *targetHistory;
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ef8bdc1fbb..05729adfef 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -13,6 +13,7 @@
 
 #include "access/timeline.h"
 #include "common/logging.h"
+#include "common/file_utils.h"
 #include "datapagemap.h"
 #include "libpq-fe.h"
 #include "storage/block.h"
@@ -24,6 +25,7 @@ extern bool showprogress;
 extern bool dry_run;
 extern bool do_sync;
 extern int	WalSegSz;
+extern DataDirSyncMethod sync_method;
 
 /* Target history */
 extern TimeLineHistoryEntry *targetHistory;
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 74833c4acb..05c73c0bb7 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -51,6 +51,31 @@ static void walkdir(const char *path,
 					int (*action) (const char *fname, bool isdir),
 					bool process_symlinks);
 
+#ifdef HAVE_SYNCFS
+static void
+do_syncfs(const char *path)
+{
+	int			fd;
+
+	fd = open(path, O_RDONLY, 0);
+
+	if (fd < 0)
+	{
+		pg_log_error("could not open file \"%s\": %m", path);
+		return;
+	}
+
+	if (syncfs(fd) < 0)
+	{
+		pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+		(void) close(fd);
+		exit(EXIT_FAILURE);
+	}
+
+	(void) close(fd);
+}
+#endif
+
 /*
  * Issue fsync recursively on PGDATA and all its contents.
  *
@@ -63,7 +88,8 @@ static void walkdir(const char *path,
  */
 void
 fsync_pgdata(const char *pg_data,
-			 int serverVersion)
+			 int serverVersion,
+			 DataDirSyncMethod sync_method)
 {
 	bool		xlog_is_symlink;
 	char		pg_wal[MAXPGPATH];
@@ -89,30 +115,93 @@ fsync_pgdata(const char *pg_data,
 			xlog_is_symlink = true;
 	}
 
-	/*
-	 * If possible, hint to the kernel that we're soon going to fsync the data
-	 * directory and its contents.
-	 */
+	switch (sync_method)
+	{
+		case DATA_DIR_SYNC_METHOD_SYNCFS:
+			{
+#ifndef HAVE_SYNCFS
+				pg_log_error("this build does not support sync method \"%s\"",
+							 "syncfs");
+				exit(EXIT_FAILURE);
+#else
+				DIR		   *dir;
+				struct dirent *de;
+
+				/*
+				 * On Linux, we don't have to open every single file one by
+				 * one.  We can use syncfs() to sync whole filesystems.  We
+				 * only expect filesystem boundaries to exist where we
+				 * tolerate symlinks, namely pg_wal and the tablespaces, so we
+				 * call syncfs() for each of those directories.
+				 */
+
+				/* Sync the top level pgdata directory. */
+				do_syncfs(pg_data);
+
+				/* If any tablespaces are configured, sync each of those. */
+				dir = opendir(pg_tblspc);
+				if (dir == NULL)
+					pg_log_error("could not open directory \"%s\": %m",
+								 pg_tblspc);
+				else
+				{
+					while (errno = 0, (de = readdir(dir)) != NULL)
+					{
+						char		subpath[MAXPGPATH * 2];
+
+						if (strcmp(de->d_name, ".") == 0 ||
+							strcmp(de->d_name, "..") == 0)
+							continue;
+
+						snprintf(subpath, sizeof(subpath), "%s/%s",
+								 pg_tblspc, de->d_name);
+						do_syncfs(subpath);
+					}
+
+					if (errno)
+						pg_log_error("could not read directory \"%s\": %m",
+									 pg_tblspc);
+
+					(void) closedir(dir);
+				}
+
+				/* If pg_wal is a symlink, process that too. */
+				if (xlog_is_symlink)
+					do_syncfs(pg_wal);
+#endif							/* HAVE_SYNCFS */
+			}
+			break;
+
+		case DATA_DIR_SYNC_METHOD_FSYNC:
+			{
+				/*
+				 * If possible, hint to the kernel that we're soon going to
+				 * fsync the data directory and its contents.
+				 */
 #ifdef PG_FLUSH_DATA_WORKS
-	walkdir(pg_data, pre_sync_fname, false);
-	if (xlog_is_symlink)
-		walkdir(pg_wal, pre_sync_fname, false);
-	walkdir(pg_tblspc, pre_sync_fname, true);
+				walkdir(pg_data, pre_sync_fname, false);
+				if (xlog_is_symlink)
+					walkdir(pg_wal, pre_sync_fname, false);
+				walkdir(pg_tblspc, pre_sync_fname, true);
 #endif
 
-	/*
-	 * Now we do the fsync()s in the same order.
-	 *
-	 * The main call ignores symlinks, so in addition to specially processing
-	 * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
-	 * process_symlinks = true.  Note that if there are any plain directories
-	 * in pg_tblspc, they'll get fsync'd twice.  That's not an expected case
-	 * so we don't worry about optimizing it.
-	 */
-	walkdir(pg_data, fsync_fname, false);
-	if (xlog_is_symlink)
-		walkdir(pg_wal, fsync_fname, false);
-	walkdir(pg_tblspc, fsync_fname, true);
+				/*
+				 * Now we do the fsync()s in the same order.
+				 *
+				 * The main call ignores symlinks, so in addition to specially
+				 * processing pg_wal if it's a symlink, pg_tblspc has to be
+				 * visited separately with process_symlinks = true.  Note that
+				 * if there are any plain directories in pg_tblspc, they'll
+				 * get fsync'd twice. That's not an expected case so we don't
+				 * worry about optimizing it.
+				 */
+				walkdir(pg_data, fsync_fname, false);
+				if (xlog_is_symlink)
+					walkdir(pg_wal, fsync_fname, false);
+				walkdir(pg_tblspc, fsync_fname, true);
+			}
+			break;
+	}
 }
 
 /*
@@ -121,17 +210,40 @@ fsync_pgdata(const char *pg_data,
  * This is a convenient wrapper on top of walkdir().
  */
 void
-fsync_dir_recurse(const char *dir)
+fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
 {
-	/*
-	 * If possible, hint to the kernel that we're soon going to fsync the data
-	 * directory and its contents.
-	 */
+	switch (sync_method)
+	{
+		case DATA_DIR_SYNC_METHOD_SYNCFS:
+			{
+#ifndef HAVE_SYNCFS
+				pg_log_error("this build does not support sync method \"%s\"",
+							 "syncfs");
+				exit(EXIT_FAILURE);
+#else
+				/*
+				 * On Linux, we don't have to open every single file one by
+				 * one.  We can use syncfs() to sync the whole filesystem.
+				 */
+				do_syncfs(dir);
+#endif							/* HAVE_SYNCFS */
+			}
+			break;
+
+		case DATA_DIR_SYNC_METHOD_FSYNC:
+			{
+				/*
+				 * If possible, hint to the kernel that we're soon going to
+				 * fsync the data directory and its contents.
+				 */
 #ifdef PG_FLUSH_DATA_WORKS
-	walkdir(dir, pre_sync_fname, false);
+				walkdir(dir, pre_sync_fname, false);
 #endif
 
-	walkdir(dir, fsync_fname, false);
+				walkdir(dir, fsync_fname, false);
+			}
+			break;
+	}
 }
 
 /*
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index 7da21f15e6..cae6159bf6 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -34,8 +34,9 @@ struct iovec;					/* avoid including port/pg_iovec.h here */
 
 #ifdef FRONTEND
 extern int	fsync_fname(const char *fname, bool isdir);
-extern void fsync_pgdata(const char *pg_data, int serverVersion);
-extern void fsync_dir_recurse(const char *dir);
+extern void fsync_pgdata(const char *pg_data, int serverVersion,
+						 DataDirSyncMethod sync_method);
+extern void fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
 #endif
-- 
2.25.1

v9-0004-allow-syncfs-in-frontend-utilities.patchtext/x-diff; charset=us-asciiDownload
From 215eb53c8f28a8c54dd14fb8877b377fb72d7fba Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Fri, 28 Jul 2023 15:56:24 -0700
Subject: [PATCH v9 4/4] allow syncfs in frontend utilities

---
 doc/src/sgml/config.sgml              | 12 +++------
 doc/src/sgml/filelist.sgml            |  1 +
 doc/src/sgml/postgres.sgml            |  1 +
 doc/src/sgml/ref/initdb.sgml          | 22 ++++++++++++++++
 doc/src/sgml/ref/pg_basebackup.sgml   | 25 +++++++++++++++++++
 doc/src/sgml/ref/pg_checksums.sgml    | 22 ++++++++++++++++
 doc/src/sgml/ref/pg_dump.sgml         | 21 ++++++++++++++++
 doc/src/sgml/ref/pg_rewind.sgml       | 22 ++++++++++++++++
 doc/src/sgml/ref/pgupgrade.sgml       | 23 +++++++++++++++++
 doc/src/sgml/syncfs.sgml              | 36 +++++++++++++++++++++++++++
 src/bin/initdb/initdb.c               |  6 +++++
 src/bin/initdb/t/001_initdb.pl        | 12 +++++++++
 src/bin/pg_basebackup/pg_basebackup.c |  7 ++++++
 src/bin/pg_checksums/pg_checksums.c   |  6 +++++
 src/bin/pg_dump/pg_dump.c             |  7 ++++++
 src/bin/pg_rewind/pg_rewind.c         |  8 ++++++
 src/bin/pg_upgrade/option.c           | 13 ++++++++++
 src/bin/pg_upgrade/pg_upgrade.c       |  6 +++--
 src/bin/pg_upgrade/pg_upgrade.h       |  1 +
 src/fe_utils/option_utils.c           | 24 ++++++++++++++++++
 src/include/fe_utils/option_utils.h   |  4 +++
 21 files changed, 268 insertions(+), 11 deletions(-)
 create mode 100644 doc/src/sgml/syncfs.sgml

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 694d667bf9..2c7d9f1262 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -10580,15 +10580,9 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
         On Linux, <literal>syncfs</literal> may be used instead, to ask the
         operating system to synchronize the whole file systems that contain the
         data directory, the WAL files and each tablespace (but not any other
-        file systems that may be reachable through symbolic links).  This may
-        be a lot faster than the <literal>fsync</literal> setting, because it
-        doesn't need to open each file one by one.  On the other hand, it may
-        be slower if a file system is shared by other applications that
-        modify a lot of files, since those files will also be written to disk.
-        Furthermore, on versions of Linux before 5.8, I/O errors encountered
-        while writing data to disk may not be reported to
-        <productname>PostgreSQL</productname>, and relevant error messages may
-        appear only in kernel logs.
+        file systems that may be reachable through symbolic links).  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
        </para>
        <para>
         This parameter can only be set in the
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index 63b0fc2a46..df5d7c3025 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -184,6 +184,7 @@
 <!ENTITY acronyms   SYSTEM "acronyms.sgml">
 <!ENTITY glossary   SYSTEM "glossary.sgml">
 <!ENTITY color      SYSTEM "color.sgml">
+<!ENTITY syncfs     SYSTEM "syncfs.sgml">
 
 <!ENTITY features-supported   SYSTEM "features-supported.sgml">
 <!ENTITY features-unsupported SYSTEM "features-unsupported.sgml">
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index 2e271862fc..f629524be0 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -294,6 +294,7 @@ break is not needed in a wider output rendering.
   &acronyms;
   &glossary;
   &color;
+  &syncfs;
   &obsolete;
 
  </part>
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 22f1011781..8a09c5c438 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -365,6 +365,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry id="app-initdb-option-sync-method">
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>initdb</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="app-initdb-option-sync-only">
       <term><option>-S</option></term>
       <term><option>--sync-only</option></term>
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 79d3e657c3..d2b8ddd200 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -594,6 +594,31 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_basebackup</command> will recursively open and synchronize
+        all files in the backup directory.  When the plain format is used, the
+        search for files will follow symbolic links for the WAL directory and
+        each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        backup directory.  When the plain format is used,
+        <command>pg_basebackup</command> will also synchronize the file systems
+        that contain the WAL files and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml
index a3d0b0f0a3..7b44ba71cf 100644
--- a/doc/src/sgml/ref/pg_checksums.sgml
+++ b/doc/src/sgml/ref/pg_checksums.sgml
@@ -139,6 +139,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_checksums</command> will recursively open and synchronize
+        all files in the data directory.  The search for files will follow
+        symbolic links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-v</option></term>
       <term><option>--verbose</option></term>
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index a3cf0608f5..c1e2220b3c 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -1179,6 +1179,27 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_dump --format=directory</command> will recursively open and
+        synchronize all files in the archive directory.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file system that contains the
+        archive directory.  See <xref linkend="syncfs"/> for more information
+        about using <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used or
+        <option>--format</option> is not set to <literal>directory</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--table-and-children=<replaceable class="parameter">pattern</replaceable></option></term>
       <listitem>
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 15cddd086b..80dff16168 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -284,6 +284,28 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_rewind</command> will recursively open and synchronize all
+        files in the data directory.  The search for files will follow symbolic
+        links for the WAL directory and each configured tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        data directory, the WAL files, and each tablespace.  See
+        <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-V</option></term>
       <term><option>--version</option></term>
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 7816b4c685..6c033485ea 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -190,6 +190,29 @@ PostgreSQL documentation
       variable <envar>PGSOCKETDIR</envar></para></listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--sync-method</option></term>
+      <listitem>
+       <para>
+        When set to <literal>fsync</literal>, which is the default,
+        <command>pg_upgrade</command> will recursively open and synchronize all
+        files in the upgraded cluster's data directory.  The search for files
+        will follow symbolic links for the WAL directory and each configured
+        tablespace.
+       </para>
+       <para>
+        On Linux, <literal>syncfs</literal> may be used instead to ask the
+        operating system to synchronize the whole file systems that contain the
+        upgrade cluster's data directory, its WAL files, and each tablespace.
+        See <xref linkend="syncfs"/> for more information about using
+        <function>syncfs()</function>.
+       </para>
+       <para>
+        This option has no effect when <option>--no-sync</option> is used.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-U</option> <replaceable>username</replaceable></term>
       <term><option>--username=</option><replaceable>username</replaceable></term>
diff --git a/doc/src/sgml/syncfs.sgml b/doc/src/sgml/syncfs.sgml
new file mode 100644
index 0000000000..00457d2457
--- /dev/null
+++ b/doc/src/sgml/syncfs.sgml
@@ -0,0 +1,36 @@
+<!-- doc/src/sgml/syncfs.sgml -->
+
+<appendix id="syncfs">
+ <title><function>syncfs()</function> Caveats</title>
+
+ <indexterm zone="syncfs">
+  <primary>syncfs</primary>
+ </indexterm>
+
+ <para>
+  On Linux <function>syncfs()</function> may be specified for some
+  configuration parameters (e.g.,
+  <xref linkend="guc-recovery-init-sync-method"/>), server applications (e.g.,
+  <application>pg_upgrade</application>), and client applications (e.g.,
+  <application>pg_basebackup</application>) that involve synchronizing many
+  files to disk.  <function>syncfs()</function> is advantageous in many cases,
+  but there are some trade-offs to keep in mind.
+ </para>
+
+ <para>
+  Since <function>syncfs()</function> instructs the operating system to
+  synchronize a whole file system, it typically requires many fewer system
+  calls than using <function>fsync()</function> to synchronize each file one by
+  one.  Therefore, using <function>syncfs()</function> may be a lot faster than
+  using <function>fsync()</function>.  However, it may be slower if a file
+  system is shared by other applications that modify a lot of files, since
+  those files will also be written to disk.
+ </para>
+
+ <para>
+  Furthermore, on versions of Linux before 5.8, I/O errors encountered while
+  writing data to disk may not be reported to the calling program, and relevant
+  error messages may appear only in kernel logs.
+ </para>
+
+</appendix>
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index bbea1e412b..543927b8b4 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2467,6 +2467,7 @@ usage(const char *progname)
 	printf(_("  -N, --no-sync             do not wait for changes to be written safely to disk\n"));
 	printf(_("      --no-instructions     do not print instructions for next steps\n"));
 	printf(_("  -s, --show                show internal settings\n"));
+	printf(_("      --sync-method=METHOD  set method for syncing files to disk\n"));
 	printf(_("  -S, --sync-only           only sync database files to disk, then exit\n"));
 	printf(_("\nOther options:\n"));
 	printf(_("  -V, --version             output version information, then exit\n"));
@@ -3107,6 +3108,7 @@ main(int argc, char *argv[])
 		{"locale-provider", required_argument, NULL, 15},
 		{"icu-locale", required_argument, NULL, 16},
 		{"icu-rules", required_argument, NULL, 17},
+		{"sync-method", required_argument, NULL, 18},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -3287,6 +3289,10 @@ main(int argc, char *argv[])
 			case 17:
 				icu_rules = pg_strdup(optarg);
 				break;
+			case 18:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index 2d7469d2fc..45f96cd8bb 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -16,6 +16,7 @@ use Test::More;
 my $tempdir = PostgreSQL::Test::Utils::tempdir;
 my $xlogdir = "$tempdir/pgxlog";
 my $datadir = "$tempdir/data";
+my $supports_syncfs = check_pg_config("#define HAVE_SYNCFS 1");
 
 program_help_ok('initdb');
 program_version_ok('initdb');
@@ -82,6 +83,17 @@ command_fails([ 'pg_checksums', '-D', $datadir ],
 command_ok([ 'initdb', '-S', $datadir ], 'sync only');
 command_fails([ 'initdb', $datadir ], 'existing data directory');
 
+if ($supports_syncfs)
+{
+	command_ok([ 'initdb', '-S', $datadir, '--sync-method', 'syncfs' ],
+		'sync method syncfs');
+}
+else
+{
+	command_fails([ 'initdb', '-S', $datadir, '--sync-method', 'syncfs' ],
+		'sync method syncfs');
+}
+
 # Check group access on PGDATA
 SKIP:
 {
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1a6eacf6d5..977c793a67 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -425,6 +425,8 @@ usage(void)
 	printf(_("      --no-slot          prevent creation of temporary replication slot\n"));
 	printf(_("      --no-verify-checksums\n"
 			 "                         do not verify checksums\n"));
+	printf(_("      --sync-method=METHOD\n"
+			 "                         set method for syncing files to disk\n"));
 	printf(_("  -?, --help             show this help, then exit\n"));
 	printf(_("\nConnection options:\n"));
 	printf(_("  -d, --dbname=CONNSTR   connection string\n"));
@@ -2282,6 +2284,7 @@ main(int argc, char **argv)
 		{"no-manifest", no_argument, NULL, 5},
 		{"manifest-force-encode", no_argument, NULL, 6},
 		{"manifest-checksums", required_argument, NULL, 7},
+		{"sync-method", required_argument, NULL, 8},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -2453,6 +2456,10 @@ main(int argc, char **argv)
 			case 7:
 				manifest_checksums = pg_strdup(optarg);
 				break;
+			case 8:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 123450f483..9fb2c5b3c7 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -78,6 +78,7 @@ usage(void)
 	printf(_("  -f, --filenode=FILENODE  check only relation with specified filenode\n"));
 	printf(_("  -N, --no-sync            do not wait for changes to be written safely to disk\n"));
 	printf(_("  -P, --progress           show progress information\n"));
+	printf(_("      --sync-method=METHOD set method for syncing files to disk\n"));
 	printf(_("  -v, --verbose            output verbose messages\n"));
 	printf(_("  -V, --version            output version information, then exit\n"));
 	printf(_("  -?, --help               show this help, then exit\n"));
@@ -436,6 +437,7 @@ main(int argc, char *argv[])
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"verbose", no_argument, NULL, 'v'},
+		{"sync-method", required_argument, NULL, 1},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -494,6 +496,10 @@ main(int argc, char *argv[])
 			case 'v':
 				verbose = true;
 				break;
+			case 1:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 39a468b131..dc4a28e81e 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -432,6 +432,7 @@ main(int argc, char **argv)
 		{"table-and-children", required_argument, NULL, 12},
 		{"exclude-table-and-children", required_argument, NULL, 13},
 		{"exclude-table-data-and-children", required_argument, NULL, 14},
+		{"sync-method", required_argument, NULL, 15},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -658,6 +659,11 @@ main(int argc, char **argv)
 										  optarg);
 				break;
 
+			case 15:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit_nicely(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -1069,6 +1075,7 @@ help(const char *progname)
 			 "                               compress as specified\n"));
 	printf(_("  --lock-wait-timeout=TIMEOUT  fail after waiting TIMEOUT for a table lock\n"));
 	printf(_("  --no-sync                    do not wait for changes to be written safely to disk\n"));
+	printf(_("  --sync-method=METHOD         set method for syncing files to disk\n"));
 	printf(_("  -?, --help                   show this help, then exit\n"));
 
 	printf(_("\nOptions controlling the output content:\n"));
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index bdfacf3263..bfd44a284e 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -22,6 +22,7 @@
 #include "common/file_perm.h"
 #include "common/restricted_token.h"
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "fe_utils/recovery_gen.h"
 #include "fe_utils/string_utils.h"
 #include "file_ops.h"
@@ -108,6 +109,7 @@ usage(const char *progname)
 			 "                                 file when running target cluster\n"));
 	printf(_("      --debug                    write a lot of debug messages\n"));
 	printf(_("      --no-ensure-shutdown       do not automatically fix unclean shutdown\n"));
+	printf(_("      --sync-method=METHOD       set method for syncing files to disk\n"));
 	printf(_("  -V, --version                  output version information, then exit\n"));
 	printf(_("  -?, --help                     show this help, then exit\n"));
 	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
@@ -132,6 +134,7 @@ main(int argc, char **argv)
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
+		{"sync-method", required_argument, NULL, 6},
 		{NULL, 0, NULL, 0}
 	};
 	int			option_index;
@@ -219,6 +222,11 @@ main(int argc, char **argv)
 				config_file = pg_strdup(optarg);
 				break;
 
+			case 6:
+				if (!parse_sync_method(optarg, &sync_method))
+					exit(1);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 640361009e..b9d900d0db 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -14,6 +14,7 @@
 #endif
 
 #include "common/string.h"
+#include "fe_utils/option_utils.h"
 #include "getopt_long.h"
 #include "pg_upgrade.h"
 #include "utils/pidfile.h"
@@ -57,12 +58,14 @@ parseCommandLine(int argc, char *argv[])
 		{"verbose", no_argument, NULL, 'v'},
 		{"clone", no_argument, NULL, 1},
 		{"copy", no_argument, NULL, 2},
+		{"sync-method", required_argument, NULL, 3},
 
 		{NULL, 0, NULL, 0}
 	};
 	int			option;			/* Command line option */
 	int			optindex = 0;	/* used by getopt_long */
 	int			os_user_effective_id;
+	DataDirSyncMethod unused;
 
 	user_opts.do_sync = true;
 	user_opts.transfer_mode = TRANSFER_MODE_COPY;
@@ -199,6 +202,12 @@ parseCommandLine(int argc, char *argv[])
 				user_opts.transfer_mode = TRANSFER_MODE_COPY;
 				break;
 
+			case 3:
+				if (!parse_sync_method(optarg, &unused))
+					exit(1);
+				user_opts.sync_method = pg_strdup(optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
 						os_info.progname);
@@ -209,6 +218,9 @@ parseCommandLine(int argc, char *argv[])
 	if (optind < argc)
 		pg_fatal("too many command-line arguments (first is \"%s\")", argv[optind]);
 
+	if (!user_opts.sync_method)
+		user_opts.sync_method = pg_strdup("fsync");
+
 	if (log_opts.verbose)
 		pg_log(PG_REPORT, "Running in verbose mode");
 
@@ -289,6 +301,7 @@ usage(void)
 	printf(_("  -V, --version                 display version information, then exit\n"));
 	printf(_("  --clone                       clone instead of copying files to new cluster\n"));
 	printf(_("  --copy                        copy files to new cluster (default)\n"));
+	printf(_("  --sync-method=METHOD          set method for syncing files to disk\n"));
 	printf(_("  -?, --help                    show this help, then exit\n"));
 	printf(_("\n"
 			 "Before running pg_upgrade you must:\n"
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 4562dafcff..96bfb67167 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -192,8 +192,10 @@ main(int argc, char **argv)
 	{
 		prep_status("Sync data directory to disk");
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-				  "\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir,
-				  new_cluster.pgdata);
+				  "\"%s/initdb\" --sync-only \"%s\" --sync-method %s",
+				  new_cluster.bindir,
+				  new_cluster.pgdata,
+				  user_opts.sync_method);
 		check_ok();
 	}
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 7afa96716e..842f3b6cd3 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -304,6 +304,7 @@ typedef struct
 	transferMode transfer_mode; /* copy files or link them? */
 	int			jobs;			/* number of processes/threads to use */
 	char	   *socketdir;		/* directory to use for Unix sockets */
+	char	   *sync_method;
 } UserOpts;
 
 typedef struct
diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c
index 763c991015..36ac689964 100644
--- a/src/fe_utils/option_utils.c
+++ b/src/fe_utils/option_utils.c
@@ -82,3 +82,27 @@ option_parse_int(const char *optarg, const char *optname,
 		*result = val;
 	return true;
 }
+
+bool
+parse_sync_method(const char *optarg, DataDirSyncMethod *sync_method)
+{
+	if (strcmp(optarg, "fsync") == 0)
+		*sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
+	else if (strcmp(optarg, "syncfs") == 0)
+	{
+#ifdef HAVE_SYNCFS
+		*sync_method = DATA_DIR_SYNC_METHOD_SYNCFS;
+#else
+		pg_log_error("this build does not support sync method \"%s\"",
+					 "syncfs");
+		return false;
+#endif
+	}
+	else
+	{
+		pg_log_error("unrecognized sync method: %s", optarg);
+		return false;
+	}
+
+	return true;
+}
diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h
index b7b0654cee..6f3a965916 100644
--- a/src/include/fe_utils/option_utils.h
+++ b/src/include/fe_utils/option_utils.h
@@ -14,6 +14,8 @@
 
 #include "postgres_fe.h"
 
+#include "common/file_utils.h"
+
 typedef void (*help_handler) (const char *progname);
 
 extern void handle_help_version_opts(int argc, char *argv[],
@@ -22,5 +24,7 @@ extern void handle_help_version_opts(int argc, char *argv[],
 extern bool option_parse_int(const char *optarg, const char *optname,
 							 int min_range, int max_range,
 							 int *result);
+extern bool parse_sync_method(const char *optarg,
+							  DataDirSyncMethod *sync_method);
 
 #endif							/* OPTION_UTILS_H */
-- 
2.25.1

#33Justin Pryzby
pryzby@telsasoft.com
In reply to: Nathan Bossart (#7)
Re: should frontend tools use syncfs() ?
+	if (!user_opts.sync_method)
+		user_opts.sync_method = pg_strdup("fsync");

why pstrdup?

+parse_sync_method(const char *optarg, SyncMethod *sync_method)
+{
+	if (strcmp(optarg, "fsync") == 0)
+		*sync_method = SYNC_METHOD_FSYNC;
+#ifdef HAVE_SYNCFS
+	else if (strcmp(optarg, "syncfs") == 0)
+		*sync_method = SYNC_METHOD_SYNCFS;
+#endif
+	else
+	{
+		pg_log_error("unrecognized sync method: %s", optarg);
+		return false;
+	}

This should probably give a distinct error when syncfs is not supported
than when it's truely recognized.

The patch should handle pg_dumpall, too.

Note that /bin/sync doesn't try to de-duplicate, it does just what you
tell it.

$ strace -e openat,syncfs,fsync sync / / / -f
...
openat(AT_FDCWD, "/", O_RDONLY|O_NONBLOCK) = 3
syncfs(3)                               = 0
openat(AT_FDCWD, "/", O_RDONLY|O_NONBLOCK) = 3
syncfs(3)                               = 0
openat(AT_FDCWD, "/", O_RDONLY|O_NONBLOCK) = 3
syncfs(3)                               = 0
+++ exited with 0 +++
#34Nathan Bossart
nathandbossart@gmail.com
In reply to: Justin Pryzby (#33)
Re: should frontend tools use syncfs() ?

Thanks for taking a look.

On Fri, Sep 01, 2023 at 12:58:10PM -0500, Justin Pryzby wrote:

+	if (!user_opts.sync_method)
+		user_opts.sync_method = pg_strdup("fsync");

why pstrdup?

I believe I was just following the precedent set by some of the other
options.

+parse_sync_method(const char *optarg, SyncMethod *sync_method)
+{
+	if (strcmp(optarg, "fsync") == 0)
+		*sync_method = SYNC_METHOD_FSYNC;
+#ifdef HAVE_SYNCFS
+	else if (strcmp(optarg, "syncfs") == 0)
+		*sync_method = SYNC_METHOD_SYNCFS;
+#endif
+	else
+	{
+		pg_log_error("unrecognized sync method: %s", optarg);
+		return false;
+	}

This should probably give a distinct error when syncfs is not supported
than when it's truely recognized.

Later versions of the patch should have this.

The patch should handle pg_dumpall, too.

It looks like pg_dumpall only ever fsyncs a single file, so I don't think
it is really needed there.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#35Justin Pryzby
pryzby@telsasoft.com
In reply to: Nathan Bossart (#34)
Re: should frontend tools use syncfs() ?

On Fri, Sep 01, 2023 at 11:08:51AM -0700, Nathan Bossart wrote:

This should probably give a distinct error when syncfs is not supported
than when it's truely recognized.

Later versions of the patch should have this.

Oops, right.

The patch should handle pg_dumpall, too.

It looks like pg_dumpall only ever fsyncs a single file, so I don't think
it is really needed there.

What about (per git grep no-sync doc) pg_receivewal?

--
Justin

#36Nathan Bossart
nathandbossart@gmail.com
In reply to: Justin Pryzby (#35)
Re: should frontend tools use syncfs() ?

On Fri, Sep 01, 2023 at 01:19:13PM -0500, Justin Pryzby wrote:

What about (per git grep no-sync doc) pg_receivewal?

I don't think it's applicable there, either. IIUC that option specifies
whether to sync the data as it is streamed over.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#37Nathan Bossart
nathandbossart@gmail.com
In reply to: Nathan Bossart (#36)
Re: should frontend tools use syncfs() ?

I've committed 0001 for now. I plan to commit the rest in the next couple
of days.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#38Nathan Bossart
nathandbossart@gmail.com
In reply to: Nathan Bossart (#37)
Re: should frontend tools use syncfs() ?

On Tue, Sep 05, 2023 at 05:08:53PM -0700, Nathan Bossart wrote:

I've committed 0001 for now. I plan to commit the rest in the next couple
of days.

Committed.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#39Maxim Orlov
orlovmg@gmail.com
In reply to: Nathan Bossart (#38)
2 attachment(s)
Re: should frontend tools use syncfs() ?

On Thu, 7 Sept 2023 at 03:34, Nathan Bossart <nathandbossart@gmail.com>
wrote:

Committed.

Hi! Great job!

But here is one problem I've encountered during working on some unrelated
stuff.
How we have two different things call the same name – sync_method. One in
xlog:
int sync_method = DEFAULT_SYNC_METHOD;
...and another one in "bins":
static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;

In current include order, this is not a problem, but imagine you add a
couple of new includes,
for example:
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -18,6 +18,8 @@
 #include "storage/block.h"
 #include "storage/item.h"
 #include "storage/off.h"
+#include "postgres.h"
+#include "utils/rel.h"

And build will be broken, because we how have two different things called
"sync_method" with
different types:
In file included from .../src/bin/pg_rewind/pg_rewind.c:33:
In file included from .../src/include/storage/bufpage.h:22:
In file included from .../src/include/utils/rel.h:18:
.../src/include/access/xlog.h:27:24: error: redeclaration of 'sync_method'
with a different type: 'int' vs 'DataDirSyncMethod' (aka 'enum
DataDirSyncMethod')
extern PGDLLIMPORT int sync_method;
...

As a solution, I suggest renaming sync_method in xlog module to
wal_sync_method. In fact,
appropriate GUC for this variable, called "wal_sync_method" and I see no
reason not to use
the exact same name for a variable in xlog module.

--
Best regards,
Maxim Orlov.

Attachments:

reproduce.diff.txttext/plain; charset=US-ASCII; name=reproduce.diff.txtDownload
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 424ecba028f..3aa1fc60cb4 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -18,6 +18,8 @@
 #include "storage/block.h"
 #include "storage/item.h"
 #include "storage/off.h"
+#include "postgres.h"
+#include "utils/rel.h"
 
 /*
  * A postgres disk page is an abstraction layered on top of a postgres
v10-0001-Fix-conflicting-types-for-sync_method.patchapplication/octet-stream; name=v10-0001-Fix-conflicting-types-for-sync_method.patchDownload
From bcf744b93d31ae42ffb4122b8d16fddb3e5391d2 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Wed, 20 Sep 2023 14:57:46 +0300
Subject: [PATCH v10] Fix conflicting types for sync_method.

Rename xlog's sync_method in order to:
- avoid conflicting types error for sync_method if include order is changed;
- be consistent with GUC name.

See also:
3ed19567198ddb34ab32be6ebdf132148287abdf
cccc6cdeb32f010f1cf777a9e9a85344a4317ab8
8c16ad3b43299695f203f9157a2b27c22b9ed634
---
 src/backend/access/transam/xlog.c   | 31 ++++++++++++++++-------------
 src/backend/storage/file/fd.c       |  2 +-
 src/backend/utils/misc/guc_tables.c |  6 +++---
 src/include/access/xlog.h           |  2 +-
 4 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index fcbde10529b..4ccb62e4c2a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -130,7 +130,7 @@ bool	   *wal_consistency_checking = NULL;
 bool		wal_init_zero = true;
 bool		wal_recycle = true;
 bool		log_checkpoints = true;
-int			sync_method = DEFAULT_SYNC_METHOD;
+int			wal_sync_method = DEFAULT_SYNC_METHOD;
 int			wal_level = WAL_LEVEL_REPLICA;
 int			CommitDelay = 0;	/* precommit delay in microseconds */
 int			CommitSiblings = 5; /* # concurrent xacts needed to sleep */
@@ -171,7 +171,7 @@ static bool check_wal_consistency_checking_deferred = false;
 /*
  * GUC support
  */
-const struct config_enum_entry sync_method_options[] = {
+const struct config_enum_entry wal_sync_method_options[] = {
 	{"fsync", SYNC_METHOD_FSYNC, false},
 #ifdef HAVE_FSYNC_WRITETHROUGH
 	{"fsync_writethrough", SYNC_METHOD_FSYNC_WRITETHROUGH, false},
@@ -2328,8 +2328,8 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 		 * have no open file or the wrong one.  However, we do not need to
 		 * fsync more than one file.
 		 */
-		if (sync_method != SYNC_METHOD_OPEN &&
-			sync_method != SYNC_METHOD_OPEN_DSYNC)
+		if (wal_sync_method != SYNC_METHOD_OPEN &&
+			wal_sync_method != SYNC_METHOD_OPEN_DSYNC)
 		{
 			if (openLogFile >= 0 &&
 				!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo,
@@ -2959,7 +2959,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 	 */
 	*added = false;
 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
-					   get_sync_bit(sync_method));
+					   get_sync_bit(wal_sync_method));
 	if (fd < 0)
 	{
 		if (errno != ENOENT)
@@ -3124,7 +3124,7 @@ XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
 
 	/* Now open original target segment (might not be file I just made) */
 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
-					   get_sync_bit(sync_method));
+					   get_sync_bit(wal_sync_method));
 	if (fd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
@@ -3356,7 +3356,7 @@ XLogFileOpen(XLogSegNo segno, TimeLineID tli)
 	XLogFilePath(path, tli, segno, wal_segment_size);
 
 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
-					   get_sync_bit(sync_method));
+					   get_sync_bit(wal_sync_method));
 	if (fd < 0)
 		ereport(PANIC,
 				(errcode_for_file_access(),
@@ -8141,9 +8141,9 @@ get_sync_bit(int method)
  * GUC support
  */
 void
-assign_xlog_sync_method(int new_sync_method, void *extra)
+assign_xlog_sync_method(int new_wal_sync_method, void *extra)
 {
-	if (sync_method != new_sync_method)
+	if (wal_sync_method != new_wal_sync_method)
 	{
 		/*
 		 * To ensure that no blocks escape unsynced, force an fsync on the
@@ -8169,8 +8169,11 @@ assign_xlog_sync_method(int new_sync_method, void *extra)
 			}
 
 			pgstat_report_wait_end();
-			if (get_sync_bit(sync_method) != get_sync_bit(new_sync_method))
+			if (get_sync_bit(wal_sync_method) !=
+				get_sync_bit(new_wal_sync_method))
+			{
 				XLogFileClose();
+			}
 		}
 	}
 }
@@ -8195,8 +8198,8 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 	 * file.
 	 */
 	if (!enableFsync ||
-		sync_method == SYNC_METHOD_OPEN ||
-		sync_method == SYNC_METHOD_OPEN_DSYNC)
+		wal_sync_method == SYNC_METHOD_OPEN ||
+		wal_sync_method == SYNC_METHOD_OPEN_DSYNC)
 		return;
 
 	/* Measure I/O timing to sync the WAL file */
@@ -8206,7 +8209,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 		INSTR_TIME_SET_ZERO(start);
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
-	switch (sync_method)
+	switch (wal_sync_method)
 	{
 		case SYNC_METHOD_FSYNC:
 			if (pg_fsync_no_writethrough(fd) != 0)
@@ -8228,7 +8231,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 			Assert(false);
 			break;
 		default:
-			elog(PANIC, "unrecognized wal_sync_method: %d", sync_method);
+			elog(PANIC, "unrecognized wal_sync_method: %d", wal_sync_method);
 			break;
 	}
 
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 3fed475c381..2f8737fa426 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -400,7 +400,7 @@ pg_fsync(int fd)
 
 	/* #if is to skip the sync_method test if there's no need for it */
 #if defined(HAVE_FSYNC_WRITETHROUGH)
-	if (sync_method == SYNC_METHOD_FSYNC_WRITETHROUGH)
+	if (wal_sync_method == SYNC_METHOD_FSYNC_WRITETHROUGH)
 		return pg_fsync_writethrough(fd);
 	else
 #endif
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index bdb26e2b77d..12639a6cf6d 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -484,7 +484,7 @@ static const struct config_enum_entry wal_compression_options[] = {
 extern const struct config_enum_entry wal_level_options[];
 extern const struct config_enum_entry archive_mode_options[];
 extern const struct config_enum_entry recovery_target_action_options[];
-extern const struct config_enum_entry sync_method_options[];
+extern const struct config_enum_entry wal_sync_method_options[];
 extern const struct config_enum_entry dynamic_shared_memory_options[];
 
 /*
@@ -4832,8 +4832,8 @@ struct config_enum ConfigureNamesEnum[] =
 			gettext_noop("Selects the method used for forcing WAL updates to disk."),
 			NULL
 		},
-		&sync_method,
-		DEFAULT_SYNC_METHOD, sync_method_options,
+		&wal_sync_method,
+		DEFAULT_SYNC_METHOD, wal_sync_method_options,
 		NULL, assign_xlog_sync_method, NULL
 	},
 
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 48ca8523810..4c3beea36c4 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -24,7 +24,7 @@
 #define SYNC_METHOD_OPEN		2	/* for O_SYNC */
 #define SYNC_METHOD_FSYNC_WRITETHROUGH	3
 #define SYNC_METHOD_OPEN_DSYNC	4	/* for O_DSYNC */
-extern PGDLLIMPORT int sync_method;
+extern PGDLLIMPORT int wal_sync_method;
 
 extern PGDLLIMPORT XLogRecPtr ProcLastRecPtr;
 extern PGDLLIMPORT XLogRecPtr XactLastRecEnd;
-- 
2.41.0

#40Nathan Bossart
nathandbossart@gmail.com
In reply to: Maxim Orlov (#39)
Re: should frontend tools use syncfs() ?

On Wed, Sep 20, 2023 at 03:12:56PM +0300, Maxim Orlov wrote:

As a solution, I suggest renaming sync_method in xlog module to
wal_sync_method. In fact,
appropriate GUC for this variable, called "wal_sync_method" and I see no
reason not to use
the exact same name for a variable in xlog module.

+1

I think we should also consider renaming things like SYNC_METHOD_FSYNC to
WAL_SYNC_METHOD_FSYNC, and sync_method_options to wal_sync_method_options.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#41Maxim Orlov
orlovmg@gmail.com
In reply to: Nathan Bossart (#40)
1 attachment(s)
Re: should frontend tools use syncfs() ?

On Wed, 20 Sept 2023 at 22:08, Nathan Bossart <nathandbossart@gmail.com>
wrote:

I think we should also consider renaming things like SYNC_METHOD_FSYNC to
WAL_SYNC_METHOD_FSYNC, and sync_method_options to wal_sync_method_options.

I've already rename sync_method_options in previous patch.
34 @@ -171,7 +171,7 @@ static bool check_wal_consistency_checking_deferred
= false;
35 /*
36 * GUC support
37 */
38 -const struct config_enum_entry sync_method_options[] = {
39 +const struct config_enum_entry wal_sync_method_options[] = {

As for SYNC_METHOD_FSYNC rename, PFA patch.
Also make enum for WAL sync methods instead of defines.

--
Best regards,
Maxim Orlov.

Attachments:

v11-0001-Fix-conflicting-types-for-sync_method.patchapplication/octet-stream; name=v11-0001-Fix-conflicting-types-for-sync_method.patchDownload
From b51f1e306a7f724082ee014e35f09709dd288a35 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Wed, 20 Sep 2023 14:57:46 +0300
Subject: [PATCH v11] Fix conflicting types for sync_method.

Rename xlog's sync_method in order to:
- avoid conflicting types error for sync_method if include order is changed;
- be consistent with GUC name.

Also:
- use enum for WAL sync methods;
- rename WAL sync methods (add WAL_ prefix).

See also:
3ed19567198ddb34ab32be6ebdf132148287abdf
cccc6cdeb32f010f1cf777a9e9a85344a4317ab8
8c16ad3b43299695f203f9157a2b27c22b9ed634
---
 src/backend/access/transam/xlog.c   | 61 +++++++++++++++--------------
 src/backend/storage/file/fd.c       |  2 +-
 src/backend/utils/misc/guc_tables.c |  6 +--
 src/include/access/xlog.h           | 17 ++++----
 src/include/access/xlogdefs.h       |  8 ++--
 src/include/port/freebsd.h          |  2 +-
 src/include/port/linux.h            |  2 +-
 7 files changed, 52 insertions(+), 46 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index fcbde10529b..f31819434a4 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -130,7 +130,7 @@ bool	   *wal_consistency_checking = NULL;
 bool		wal_init_zero = true;
 bool		wal_recycle = true;
 bool		log_checkpoints = true;
-int			sync_method = DEFAULT_SYNC_METHOD;
+int			wal_sync_method = DEFAULT_WAL_SYNC_METHOD;
 int			wal_level = WAL_LEVEL_REPLICA;
 int			CommitDelay = 0;	/* precommit delay in microseconds */
 int			CommitSiblings = 5; /* # concurrent xacts needed to sleep */
@@ -171,17 +171,17 @@ static bool check_wal_consistency_checking_deferred = false;
 /*
  * GUC support
  */
-const struct config_enum_entry sync_method_options[] = {
-	{"fsync", SYNC_METHOD_FSYNC, false},
+const struct config_enum_entry wal_sync_method_options[] = {
+	{"fsync", WAL_SYNC_METHOD_FSYNC, false},
 #ifdef HAVE_FSYNC_WRITETHROUGH
-	{"fsync_writethrough", SYNC_METHOD_FSYNC_WRITETHROUGH, false},
+	{"fsync_writethrough", WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, false},
 #endif
-	{"fdatasync", SYNC_METHOD_FDATASYNC, false},
+	{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
 #ifdef O_SYNC
-	{"open_sync", SYNC_METHOD_OPEN, false},
+	{"open_sync", WAL_SYNC_METHOD_OPEN, false},
 #endif
 #ifdef O_DSYNC
-	{"open_datasync", SYNC_METHOD_OPEN_DSYNC, false},
+	{"open_datasync", WAL_SYNC_METHOD_OPEN_DSYNC, false},
 #endif
 	{NULL, 0, false}
 };
@@ -2328,8 +2328,8 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 		 * have no open file or the wrong one.  However, we do not need to
 		 * fsync more than one file.
 		 */
-		if (sync_method != SYNC_METHOD_OPEN &&
-			sync_method != SYNC_METHOD_OPEN_DSYNC)
+		if (wal_sync_method != WAL_SYNC_METHOD_OPEN &&
+			wal_sync_method != WAL_SYNC_METHOD_OPEN_DSYNC)
 		{
 			if (openLogFile >= 0 &&
 				!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo,
@@ -2959,7 +2959,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 	 */
 	*added = false;
 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
-					   get_sync_bit(sync_method));
+					   get_sync_bit(wal_sync_method));
 	if (fd < 0)
 	{
 		if (errno != ENOENT)
@@ -3124,7 +3124,7 @@ XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
 
 	/* Now open original target segment (might not be file I just made) */
 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
-					   get_sync_bit(sync_method));
+					   get_sync_bit(wal_sync_method));
 	if (fd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
@@ -3356,7 +3356,7 @@ XLogFileOpen(XLogSegNo segno, TimeLineID tli)
 	XLogFilePath(path, tli, segno, wal_segment_size);
 
 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
-					   get_sync_bit(sync_method));
+					   get_sync_bit(wal_sync_method));
 	if (fd < 0)
 		ereport(PANIC,
 				(errcode_for_file_access(),
@@ -8118,16 +8118,16 @@ get_sync_bit(int method)
 			 * not included in the enum option array, and therefore will never
 			 * be seen here.
 			 */
-		case SYNC_METHOD_FSYNC:
-		case SYNC_METHOD_FSYNC_WRITETHROUGH:
-		case SYNC_METHOD_FDATASYNC:
+		case WAL_SYNC_METHOD_FSYNC:
+		case WAL_SYNC_METHOD_FSYNC_WRITETHROUGH:
+		case WAL_SYNC_METHOD_FDATASYNC:
 			return o_direct_flag;
 #ifdef O_SYNC
-		case SYNC_METHOD_OPEN:
+		case WAL_SYNC_METHOD_OPEN:
 			return O_SYNC | o_direct_flag;
 #endif
 #ifdef O_DSYNC
-		case SYNC_METHOD_OPEN_DSYNC:
+		case WAL_SYNC_METHOD_OPEN_DSYNC:
 			return O_DSYNC | o_direct_flag;
 #endif
 		default:
@@ -8141,9 +8141,9 @@ get_sync_bit(int method)
  * GUC support
  */
 void
-assign_xlog_sync_method(int new_sync_method, void *extra)
+assign_xlog_sync_method(int new_wal_sync_method, void *extra)
 {
-	if (sync_method != new_sync_method)
+	if (wal_sync_method != new_wal_sync_method)
 	{
 		/*
 		 * To ensure that no blocks escape unsynced, force an fsync on the
@@ -8169,8 +8169,11 @@ assign_xlog_sync_method(int new_sync_method, void *extra)
 			}
 
 			pgstat_report_wait_end();
-			if (get_sync_bit(sync_method) != get_sync_bit(new_sync_method))
+			if (get_sync_bit(wal_sync_method) !=
+				get_sync_bit(new_wal_sync_method))
+			{
 				XLogFileClose();
+			}
 		}
 	}
 }
@@ -8195,8 +8198,8 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 	 * file.
 	 */
 	if (!enableFsync ||
-		sync_method == SYNC_METHOD_OPEN ||
-		sync_method == SYNC_METHOD_OPEN_DSYNC)
+		wal_sync_method == WAL_SYNC_METHOD_OPEN ||
+		wal_sync_method == WAL_SYNC_METHOD_OPEN_DSYNC)
 		return;
 
 	/* Measure I/O timing to sync the WAL file */
@@ -8206,29 +8209,29 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 		INSTR_TIME_SET_ZERO(start);
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
-	switch (sync_method)
+	switch (wal_sync_method)
 	{
-		case SYNC_METHOD_FSYNC:
+		case WAL_SYNC_METHOD_FSYNC:
 			if (pg_fsync_no_writethrough(fd) != 0)
 				msg = _("could not fsync file \"%s\": %m");
 			break;
 #ifdef HAVE_FSYNC_WRITETHROUGH
-		case SYNC_METHOD_FSYNC_WRITETHROUGH:
+		case WAL_SYNC_METHOD_FSYNC_WRITETHROUGH:
 			if (pg_fsync_writethrough(fd) != 0)
 				msg = _("could not fsync write-through file \"%s\": %m");
 			break;
 #endif
-		case SYNC_METHOD_FDATASYNC:
+		case WAL_SYNC_METHOD_FDATASYNC:
 			if (pg_fdatasync(fd) != 0)
 				msg = _("could not fdatasync file \"%s\": %m");
 			break;
-		case SYNC_METHOD_OPEN:
-		case SYNC_METHOD_OPEN_DSYNC:
+		case WAL_SYNC_METHOD_OPEN:
+		case WAL_SYNC_METHOD_OPEN_DSYNC:
 			/* not reachable */
 			Assert(false);
 			break;
 		default:
-			elog(PANIC, "unrecognized wal_sync_method: %d", sync_method);
+			elog(PANIC, "unrecognized wal_sync_method: %d", wal_sync_method);
 			break;
 	}
 
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 3fed475c381..f5412214b69 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -400,7 +400,7 @@ pg_fsync(int fd)
 
 	/* #if is to skip the sync_method test if there's no need for it */
 #if defined(HAVE_FSYNC_WRITETHROUGH)
-	if (sync_method == SYNC_METHOD_FSYNC_WRITETHROUGH)
+	if (wal_sync_method == WAL_SYNC_METHOD_FSYNC_WRITETHROUGH)
 		return pg_fsync_writethrough(fd);
 	else
 #endif
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index bdb26e2b77d..3df821a958e 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -484,7 +484,7 @@ static const struct config_enum_entry wal_compression_options[] = {
 extern const struct config_enum_entry wal_level_options[];
 extern const struct config_enum_entry archive_mode_options[];
 extern const struct config_enum_entry recovery_target_action_options[];
-extern const struct config_enum_entry sync_method_options[];
+extern const struct config_enum_entry wal_sync_method_options[];
 extern const struct config_enum_entry dynamic_shared_memory_options[];
 
 /*
@@ -4832,8 +4832,8 @@ struct config_enum ConfigureNamesEnum[] =
 			gettext_noop("Selects the method used for forcing WAL updates to disk."),
 			NULL
 		},
-		&sync_method,
-		DEFAULT_SYNC_METHOD, sync_method_options,
+		&wal_sync_method,
+		DEFAULT_WAL_SYNC_METHOD, wal_sync_method_options,
 		NULL, assign_xlog_sync_method, NULL
 	},
 
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 48ca8523810..d82c7f7c040 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -18,13 +18,16 @@
 #include "nodes/pg_list.h"
 
 
-/* Sync methods */
-#define SYNC_METHOD_FSYNC		0
-#define SYNC_METHOD_FDATASYNC	1
-#define SYNC_METHOD_OPEN		2	/* for O_SYNC */
-#define SYNC_METHOD_FSYNC_WRITETHROUGH	3
-#define SYNC_METHOD_OPEN_DSYNC	4	/* for O_DSYNC */
-extern PGDLLIMPORT int sync_method;
+/* WAL sync methods */
+typedef enum WalSyncMethod
+{
+	WAL_SYNC_METHOD_FSYNC = 0,
+	WAL_SYNC_METHOD_FDATASYNC,
+	WAL_SYNC_METHOD_OPEN,		/* for O_SYNC */
+	WAL_SYNC_METHOD_FSYNC_WRITETHROUGH,
+	WAL_SYNC_METHOD_OPEN_DSYNC,	/* for O_DSYNC */
+} WalSyncMethod;
+extern PGDLLIMPORT int wal_sync_method;
 
 extern PGDLLIMPORT XLogRecPtr ProcLastRecPtr;
 extern PGDLLIMPORT XLogRecPtr XactLastRecEnd;
diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h
index fe794c77405..5f54c2816bc 100644
--- a/src/include/access/xlogdefs.h
+++ b/src/include/access/xlogdefs.h
@@ -71,12 +71,12 @@ typedef uint16 RepOriginId;
  *
  * Note that we define our own O_DSYNC on Windows, but not O_SYNC.
  */
-#if defined(PLATFORM_DEFAULT_SYNC_METHOD)
-#define DEFAULT_SYNC_METHOD		PLATFORM_DEFAULT_SYNC_METHOD
+#if defined(PLATFORM_DEFAULT_WAL_SYNC_METHOD)
+#define DEFAULT_WAL_SYNC_METHOD		PLATFORM_DEFAULT_WAL_SYNC_METHOD
 #elif defined(O_DSYNC) && (!defined(O_SYNC) || O_DSYNC != O_SYNC)
-#define DEFAULT_SYNC_METHOD		SYNC_METHOD_OPEN_DSYNC
+#define DEFAULT_WAL_SYNC_METHOD		WAL_SYNC_METHOD_OPEN_DSYNC
 #else
-#define DEFAULT_SYNC_METHOD		SYNC_METHOD_FDATASYNC
+#define DEFAULT_WAL_SYNC_METHOD		WAL_SYNC_METHOD_FDATASYNC
 #endif
 
 #endif							/* XLOG_DEFS_H */
diff --git a/src/include/port/freebsd.h b/src/include/port/freebsd.h
index 0e3fde55d6d..c604187acdd 100644
--- a/src/include/port/freebsd.h
+++ b/src/include/port/freebsd.h
@@ -5,4 +5,4 @@
  * would prefer open_datasync on FreeBSD 13+, but that is not a good choice on
  * many systems.
  */
-#define PLATFORM_DEFAULT_SYNC_METHOD	SYNC_METHOD_FDATASYNC
+#define PLATFORM_DEFAULT_WAL_SYNC_METHOD	WAL_SYNC_METHOD_FDATASYNC
diff --git a/src/include/port/linux.h b/src/include/port/linux.h
index 7a6e46cdbb7..8101af2b93f 100644
--- a/src/include/port/linux.h
+++ b/src/include/port/linux.h
@@ -19,4 +19,4 @@
  * perform better and (b) causes outright failures on ext4 data=journal
  * filesystems, because those don't support O_DIRECT.
  */
-#define PLATFORM_DEFAULT_SYNC_METHOD	SYNC_METHOD_FDATASYNC
+#define PLATFORM_DEFAULT_WAL_SYNC_METHOD	WAL_SYNC_METHOD_FDATASYNC
-- 
2.41.0

#42Peter Eisentraut
peter@eisentraut.org
In reply to: Michael Paquier (#16)
Re: should frontend tools use syncfs() ?

On 17.08.23 04:50, Michael Paquier wrote:

Yeah, this crossed my mind. Do you know of any existing examples of
options with links to a common section? One problem with this approach is
that there are small differences in the wording for some of the frontend
utilities, so it might be difficult to cleanly unite these sections.

The closest thing I can think of is Color Support in section
Appendixes, that describes something shared across a lot of binaries
(that would be 6 tools with this patch).

I think it's a bit much to add a whole appendix for that little content.

We have a collection of platform-specific notes in chapter 19, including
file-system-related notes in section 19.2. Maybe it could be put there?

#43Nathan Bossart
nathandbossart@gmail.com
In reply to: Peter Eisentraut (#42)
Re: should frontend tools use syncfs() ?

On Wed, Sep 27, 2023 at 01:56:08PM +0100, Peter Eisentraut wrote:

I think it's a bit much to add a whole appendix for that little content.

I'm inclined to agree.

We have a collection of platform-specific notes in chapter 19, including
file-system-related notes in section 19.2. Maybe it could be put there?

I will give this a try.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#44Maxim Orlov
orlovmg@gmail.com
In reply to: Nathan Bossart (#43)
Re: should frontend tools use syncfs() ?

Back to the patch v11. I don’t understand a bit, what we should do next?
Make a separate thread or put this one on commitfest?

--
Best regards,
Maxim Orlov.

#45Nathan Bossart
nathandbossart@gmail.com
In reply to: Maxim Orlov (#44)
Re: should frontend tools use syncfs() ?

On Fri, Oct 06, 2023 at 10:50:11AM +0300, Maxim Orlov wrote:

Back to the patch v11. I don’t understand a bit, what we should do next?
Make a separate thread or put this one on commitfest?

From a quick skim, this one looks pretty good to me. Would you mind adding
it to the commitfest so that it doesn't get lost? I will aim to take a
closer look at it next week.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#46Maxim Orlov
orlovmg@gmail.com
In reply to: Nathan Bossart (#45)
Re: should frontend tools use syncfs() ?

On Fri, 6 Oct 2023 at 22:35, Nathan Bossart <nathandbossart@gmail.com>
wrote:

From a quick skim, this one looks pretty good to me. Would you mind adding
it to the commitfest so that it doesn't get lost? I will aim to take a
closer look at it next week.

Sounds good, thanks a lot!

https://commitfest.postgresql.org/45/4609/

--
Best regards,
Maxim Orlov.

#47Nathan Bossart
nathandbossart@gmail.com
In reply to: Maxim Orlov (#46)
1 attachment(s)
Re: should frontend tools use syncfs() ?

On Mon, Oct 09, 2023 at 01:12:16PM +0300, Maxim Orlov wrote:

On Fri, 6 Oct 2023 at 22:35, Nathan Bossart <nathandbossart@gmail.com>
wrote:

From a quick skim, this one looks pretty good to me. Would you mind adding
it to the commitfest so that it doesn't get lost? I will aim to take a
closer look at it next week.

Sounds good, thanks a lot!

https://commitfest.postgresql.org/45/4609/

Thanks. I've made a couple of small changes, but otherwise I think this
one is just about ready.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

Attachments:

v12-0001-Align-names-of-variables-etc.-in-wal_sync_method.patchtext/x-diff; charset=us-asciiDownload
From 481efed2b47ab1cf37308a789698069d1a84561b Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Mon, 9 Oct 2023 11:07:29 -0500
Subject: [PATCH v12 1/1] Align names of variables, etc. in
 wal_sync_method-related code.

Author: Maxim Orlov
Discussion: https://postgr.es/m/CACG%3DezbL1gwE7_K7sr9uqaCGkWhmvRTcTEnm3%2BX1xsRNwbXULQ%40mail.gmail.com
---
 src/backend/access/transam/xlog.c   | 58 ++++++++++++++---------------
 src/backend/storage/file/fd.c       |  4 +-
 src/backend/utils/misc/guc_tables.c |  8 ++--
 src/include/access/xlog.h           | 15 +++++---
 src/include/access/xlogdefs.h       |  8 ++--
 src/include/port/freebsd.h          |  2 +-
 src/include/port/linux.h            |  2 +-
 src/include/utils/guc_hooks.h       |  2 +-
 src/tools/pgindent/typedefs.list    |  1 +
 9 files changed, 52 insertions(+), 48 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index fcbde10529..a0acd67772 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -130,7 +130,7 @@ bool	   *wal_consistency_checking = NULL;
 bool		wal_init_zero = true;
 bool		wal_recycle = true;
 bool		log_checkpoints = true;
-int			sync_method = DEFAULT_SYNC_METHOD;
+int			wal_sync_method = DEFAULT_WAL_SYNC_METHOD;
 int			wal_level = WAL_LEVEL_REPLICA;
 int			CommitDelay = 0;	/* precommit delay in microseconds */
 int			CommitSiblings = 5; /* # concurrent xacts needed to sleep */
@@ -171,17 +171,17 @@ static bool check_wal_consistency_checking_deferred = false;
 /*
  * GUC support
  */
-const struct config_enum_entry sync_method_options[] = {
-	{"fsync", SYNC_METHOD_FSYNC, false},
+const struct config_enum_entry wal_sync_method_options[] = {
+	{"fsync", WAL_SYNC_METHOD_FSYNC, false},
 #ifdef HAVE_FSYNC_WRITETHROUGH
-	{"fsync_writethrough", SYNC_METHOD_FSYNC_WRITETHROUGH, false},
+	{"fsync_writethrough", WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, false},
 #endif
-	{"fdatasync", SYNC_METHOD_FDATASYNC, false},
+	{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
 #ifdef O_SYNC
-	{"open_sync", SYNC_METHOD_OPEN, false},
+	{"open_sync", WAL_SYNC_METHOD_OPEN, false},
 #endif
 #ifdef O_DSYNC
-	{"open_datasync", SYNC_METHOD_OPEN_DSYNC, false},
+	{"open_datasync", WAL_SYNC_METHOD_OPEN_DSYNC, false},
 #endif
 	{NULL, 0, false}
 };
@@ -2328,8 +2328,8 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 		 * have no open file or the wrong one.  However, we do not need to
 		 * fsync more than one file.
 		 */
-		if (sync_method != SYNC_METHOD_OPEN &&
-			sync_method != SYNC_METHOD_OPEN_DSYNC)
+		if (wal_sync_method != WAL_SYNC_METHOD_OPEN &&
+			wal_sync_method != WAL_SYNC_METHOD_OPEN_DSYNC)
 		{
 			if (openLogFile >= 0 &&
 				!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo,
@@ -2959,7 +2959,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 	 */
 	*added = false;
 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
-					   get_sync_bit(sync_method));
+					   get_sync_bit(wal_sync_method));
 	if (fd < 0)
 	{
 		if (errno != ENOENT)
@@ -3124,7 +3124,7 @@ XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
 
 	/* Now open original target segment (might not be file I just made) */
 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
-					   get_sync_bit(sync_method));
+					   get_sync_bit(wal_sync_method));
 	if (fd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
@@ -3356,7 +3356,7 @@ XLogFileOpen(XLogSegNo segno, TimeLineID tli)
 	XLogFilePath(path, tli, segno, wal_segment_size);
 
 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
-					   get_sync_bit(sync_method));
+					   get_sync_bit(wal_sync_method));
 	if (fd < 0)
 		ereport(PANIC,
 				(errcode_for_file_access(),
@@ -8118,16 +8118,16 @@ get_sync_bit(int method)
 			 * not included in the enum option array, and therefore will never
 			 * be seen here.
 			 */
-		case SYNC_METHOD_FSYNC:
-		case SYNC_METHOD_FSYNC_WRITETHROUGH:
-		case SYNC_METHOD_FDATASYNC:
+		case WAL_SYNC_METHOD_FSYNC:
+		case WAL_SYNC_METHOD_FSYNC_WRITETHROUGH:
+		case WAL_SYNC_METHOD_FDATASYNC:
 			return o_direct_flag;
 #ifdef O_SYNC
-		case SYNC_METHOD_OPEN:
+		case WAL_SYNC_METHOD_OPEN:
 			return O_SYNC | o_direct_flag;
 #endif
 #ifdef O_DSYNC
-		case SYNC_METHOD_OPEN_DSYNC:
+		case WAL_SYNC_METHOD_OPEN_DSYNC:
 			return O_DSYNC | o_direct_flag;
 #endif
 		default:
@@ -8141,9 +8141,9 @@ get_sync_bit(int method)
  * GUC support
  */
 void
-assign_xlog_sync_method(int new_sync_method, void *extra)
+assign_wal_sync_method(int new_wal_sync_method, void *extra)
 {
-	if (sync_method != new_sync_method)
+	if (wal_sync_method != new_wal_sync_method)
 	{
 		/*
 		 * To ensure that no blocks escape unsynced, force an fsync on the
@@ -8169,7 +8169,7 @@ assign_xlog_sync_method(int new_sync_method, void *extra)
 			}
 
 			pgstat_report_wait_end();
-			if (get_sync_bit(sync_method) != get_sync_bit(new_sync_method))
+			if (get_sync_bit(wal_sync_method) != get_sync_bit(new_wal_sync_method))
 				XLogFileClose();
 		}
 	}
@@ -8195,8 +8195,8 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 	 * file.
 	 */
 	if (!enableFsync ||
-		sync_method == SYNC_METHOD_OPEN ||
-		sync_method == SYNC_METHOD_OPEN_DSYNC)
+		wal_sync_method == WAL_SYNC_METHOD_OPEN ||
+		wal_sync_method == WAL_SYNC_METHOD_OPEN_DSYNC)
 		return;
 
 	/* Measure I/O timing to sync the WAL file */
@@ -8206,29 +8206,29 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 		INSTR_TIME_SET_ZERO(start);
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
-	switch (sync_method)
+	switch (wal_sync_method)
 	{
-		case SYNC_METHOD_FSYNC:
+		case WAL_SYNC_METHOD_FSYNC:
 			if (pg_fsync_no_writethrough(fd) != 0)
 				msg = _("could not fsync file \"%s\": %m");
 			break;
 #ifdef HAVE_FSYNC_WRITETHROUGH
-		case SYNC_METHOD_FSYNC_WRITETHROUGH:
+		case WAL_SYNC_METHOD_FSYNC_WRITETHROUGH:
 			if (pg_fsync_writethrough(fd) != 0)
 				msg = _("could not fsync write-through file \"%s\": %m");
 			break;
 #endif
-		case SYNC_METHOD_FDATASYNC:
+		case WAL_SYNC_METHOD_FDATASYNC:
 			if (pg_fdatasync(fd) != 0)
 				msg = _("could not fdatasync file \"%s\": %m");
 			break;
-		case SYNC_METHOD_OPEN:
-		case SYNC_METHOD_OPEN_DSYNC:
+		case WAL_SYNC_METHOD_OPEN:
+		case WAL_SYNC_METHOD_OPEN_DSYNC:
 			/* not reachable */
 			Assert(false);
 			break;
 		default:
-			elog(PANIC, "unrecognized wal_sync_method: %d", sync_method);
+			elog(PANIC, "unrecognized wal_sync_method: %d", wal_sync_method);
 			break;
 	}
 
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 3fed475c38..f0ae034207 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -398,9 +398,9 @@ pg_fsync(int fd)
 	errno = 0;
 #endif
 
-	/* #if is to skip the sync_method test if there's no need for it */
+	/* #if is to skip the wal_sync_method test if there's no need for it */
 #if defined(HAVE_FSYNC_WRITETHROUGH)
-	if (sync_method == SYNC_METHOD_FSYNC_WRITETHROUGH)
+	if (wal_sync_method == SYNC_METHOD_FSYNC_WRITETHROUGH)
 		return pg_fsync_writethrough(fd);
 	else
 #endif
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 16ec6c5ef0..4c58574166 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -485,7 +485,7 @@ static const struct config_enum_entry wal_compression_options[] = {
 extern const struct config_enum_entry wal_level_options[];
 extern const struct config_enum_entry archive_mode_options[];
 extern const struct config_enum_entry recovery_target_action_options[];
-extern const struct config_enum_entry sync_method_options[];
+extern const struct config_enum_entry wal_sync_method_options[];
 extern const struct config_enum_entry dynamic_shared_memory_options[];
 
 /*
@@ -4843,9 +4843,9 @@ struct config_enum ConfigureNamesEnum[] =
 			gettext_noop("Selects the method used for forcing WAL updates to disk."),
 			NULL
 		},
-		&sync_method,
-		DEFAULT_SYNC_METHOD, sync_method_options,
-		NULL, assign_xlog_sync_method, NULL
+		&wal_sync_method,
+		DEFAULT_WAL_SYNC_METHOD, wal_sync_method_options,
+		NULL, assign_wal_sync_method, NULL
 	},
 
 	{
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 48ca852381..4ad572cb87 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -19,12 +19,15 @@
 
 
 /* Sync methods */
-#define SYNC_METHOD_FSYNC		0
-#define SYNC_METHOD_FDATASYNC	1
-#define SYNC_METHOD_OPEN		2	/* for O_SYNC */
-#define SYNC_METHOD_FSYNC_WRITETHROUGH	3
-#define SYNC_METHOD_OPEN_DSYNC	4	/* for O_DSYNC */
-extern PGDLLIMPORT int sync_method;
+typedef enum WalSyncMethod
+{
+	WAL_SYNC_METHOD_FSYNC = 0,
+	WAL_SYNC_METHOD_FDATASYNC,
+	WAL_SYNC_METHOD_OPEN,		/* for O_SYNC */
+	WAL_SYNC_METHOD_FSYNC_WRITETHROUGH,
+	WAL_SYNC_METHOD_OPEN_DSYNC	/* for O_DSYNC */
+} WalSyncMethod;
+extern PGDLLIMPORT int wal_sync_method;
 
 extern PGDLLIMPORT XLogRecPtr ProcLastRecPtr;
 extern PGDLLIMPORT XLogRecPtr XactLastRecEnd;
diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h
index fe794c7740..5f54c2816b 100644
--- a/src/include/access/xlogdefs.h
+++ b/src/include/access/xlogdefs.h
@@ -71,12 +71,12 @@ typedef uint16 RepOriginId;
  *
  * Note that we define our own O_DSYNC on Windows, but not O_SYNC.
  */
-#if defined(PLATFORM_DEFAULT_SYNC_METHOD)
-#define DEFAULT_SYNC_METHOD		PLATFORM_DEFAULT_SYNC_METHOD
+#if defined(PLATFORM_DEFAULT_WAL_SYNC_METHOD)
+#define DEFAULT_WAL_SYNC_METHOD		PLATFORM_DEFAULT_WAL_SYNC_METHOD
 #elif defined(O_DSYNC) && (!defined(O_SYNC) || O_DSYNC != O_SYNC)
-#define DEFAULT_SYNC_METHOD		SYNC_METHOD_OPEN_DSYNC
+#define DEFAULT_WAL_SYNC_METHOD		WAL_SYNC_METHOD_OPEN_DSYNC
 #else
-#define DEFAULT_SYNC_METHOD		SYNC_METHOD_FDATASYNC
+#define DEFAULT_WAL_SYNC_METHOD		WAL_SYNC_METHOD_FDATASYNC
 #endif
 
 #endif							/* XLOG_DEFS_H */
diff --git a/src/include/port/freebsd.h b/src/include/port/freebsd.h
index 0e3fde55d6..c604187acd 100644
--- a/src/include/port/freebsd.h
+++ b/src/include/port/freebsd.h
@@ -5,4 +5,4 @@
  * would prefer open_datasync on FreeBSD 13+, but that is not a good choice on
  * many systems.
  */
-#define PLATFORM_DEFAULT_SYNC_METHOD	SYNC_METHOD_FDATASYNC
+#define PLATFORM_DEFAULT_WAL_SYNC_METHOD	WAL_SYNC_METHOD_FDATASYNC
diff --git a/src/include/port/linux.h b/src/include/port/linux.h
index 7a6e46cdbb..8101af2b93 100644
--- a/src/include/port/linux.h
+++ b/src/include/port/linux.h
@@ -19,4 +19,4 @@
  * perform better and (b) causes outright failures on ext4 data=journal
  * filesystems, because those don't support O_DIRECT.
  */
-#define PLATFORM_DEFAULT_SYNC_METHOD	SYNC_METHOD_FDATASYNC
+#define PLATFORM_DEFAULT_WAL_SYNC_METHOD	WAL_SYNC_METHOD_FDATASYNC
diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h
index f04b99e3b9..2a191830a8 100644
--- a/src/include/utils/guc_hooks.h
+++ b/src/include/utils/guc_hooks.h
@@ -159,6 +159,6 @@ extern bool check_wal_consistency_checking(char **newval, void **extra,
 										   GucSource source);
 extern void assign_wal_consistency_checking(const char *newval, void *extra);
 extern bool check_wal_segment_size(int *newval, void **extra, GucSource source);
-extern void assign_xlog_sync_method(int new_sync_method, void *extra);
+extern void assign_wal_sync_method(int new_wal_sync_method, void *extra);
 
 #endif							/* GUC_HOOKS_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 8de90c4958..e69bb671bf 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3016,6 +3016,7 @@ WalSnd
 WalSndCtlData
 WalSndSendDataCallback
 WalSndState
+WalSyncMethod
 WalTimeSample
 WalUsage
 WalWriteMethod
-- 
2.25.1

#48Nathan Bossart
nathandbossart@gmail.com
In reply to: Nathan Bossart (#47)
1 attachment(s)
Re: should frontend tools use syncfs() ?

On Mon, Oct 09, 2023 at 11:14:39AM -0500, Nathan Bossart wrote:

Thanks. I've made a couple of small changes, but otherwise I think this
one is just about ready.

I forgot to rename one thing. Here's a v13 with that fixed.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

Attachments:

v13-0001-Align-names-of-variables-etc.-in-wal_sync_method.patchtext/x-diff; charset=us-asciiDownload
From b1829fea8f81cceceabd37585ab74a290505d0d5 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Mon, 9 Oct 2023 11:07:29 -0500
Subject: [PATCH v13 1/1] Align names of variables, etc. in
 wal_sync_method-related code.

Author: Maxim Orlov
Discussion: https://postgr.es/m/CACG%3DezbL1gwE7_K7sr9uqaCGkWhmvRTcTEnm3%2BX1xsRNwbXULQ%40mail.gmail.com
---
 src/backend/access/transam/xlog.c   | 58 ++++++++++++++---------------
 src/backend/storage/file/fd.c       |  4 +-
 src/backend/utils/misc/guc_tables.c |  8 ++--
 src/include/access/xlog.h           | 15 +++++---
 src/include/access/xlogdefs.h       |  8 ++--
 src/include/port/freebsd.h          |  2 +-
 src/include/port/linux.h            |  2 +-
 src/include/utils/guc_hooks.h       |  2 +-
 src/tools/pgindent/typedefs.list    |  1 +
 9 files changed, 52 insertions(+), 48 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index fcbde10529..a0acd67772 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -130,7 +130,7 @@ bool	   *wal_consistency_checking = NULL;
 bool		wal_init_zero = true;
 bool		wal_recycle = true;
 bool		log_checkpoints = true;
-int			sync_method = DEFAULT_SYNC_METHOD;
+int			wal_sync_method = DEFAULT_WAL_SYNC_METHOD;
 int			wal_level = WAL_LEVEL_REPLICA;
 int			CommitDelay = 0;	/* precommit delay in microseconds */
 int			CommitSiblings = 5; /* # concurrent xacts needed to sleep */
@@ -171,17 +171,17 @@ static bool check_wal_consistency_checking_deferred = false;
 /*
  * GUC support
  */
-const struct config_enum_entry sync_method_options[] = {
-	{"fsync", SYNC_METHOD_FSYNC, false},
+const struct config_enum_entry wal_sync_method_options[] = {
+	{"fsync", WAL_SYNC_METHOD_FSYNC, false},
 #ifdef HAVE_FSYNC_WRITETHROUGH
-	{"fsync_writethrough", SYNC_METHOD_FSYNC_WRITETHROUGH, false},
+	{"fsync_writethrough", WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, false},
 #endif
-	{"fdatasync", SYNC_METHOD_FDATASYNC, false},
+	{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
 #ifdef O_SYNC
-	{"open_sync", SYNC_METHOD_OPEN, false},
+	{"open_sync", WAL_SYNC_METHOD_OPEN, false},
 #endif
 #ifdef O_DSYNC
-	{"open_datasync", SYNC_METHOD_OPEN_DSYNC, false},
+	{"open_datasync", WAL_SYNC_METHOD_OPEN_DSYNC, false},
 #endif
 	{NULL, 0, false}
 };
@@ -2328,8 +2328,8 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 		 * have no open file or the wrong one.  However, we do not need to
 		 * fsync more than one file.
 		 */
-		if (sync_method != SYNC_METHOD_OPEN &&
-			sync_method != SYNC_METHOD_OPEN_DSYNC)
+		if (wal_sync_method != WAL_SYNC_METHOD_OPEN &&
+			wal_sync_method != WAL_SYNC_METHOD_OPEN_DSYNC)
 		{
 			if (openLogFile >= 0 &&
 				!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo,
@@ -2959,7 +2959,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 	 */
 	*added = false;
 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
-					   get_sync_bit(sync_method));
+					   get_sync_bit(wal_sync_method));
 	if (fd < 0)
 	{
 		if (errno != ENOENT)
@@ -3124,7 +3124,7 @@ XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
 
 	/* Now open original target segment (might not be file I just made) */
 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
-					   get_sync_bit(sync_method));
+					   get_sync_bit(wal_sync_method));
 	if (fd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
@@ -3356,7 +3356,7 @@ XLogFileOpen(XLogSegNo segno, TimeLineID tli)
 	XLogFilePath(path, tli, segno, wal_segment_size);
 
 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
-					   get_sync_bit(sync_method));
+					   get_sync_bit(wal_sync_method));
 	if (fd < 0)
 		ereport(PANIC,
 				(errcode_for_file_access(),
@@ -8118,16 +8118,16 @@ get_sync_bit(int method)
 			 * not included in the enum option array, and therefore will never
 			 * be seen here.
 			 */
-		case SYNC_METHOD_FSYNC:
-		case SYNC_METHOD_FSYNC_WRITETHROUGH:
-		case SYNC_METHOD_FDATASYNC:
+		case WAL_SYNC_METHOD_FSYNC:
+		case WAL_SYNC_METHOD_FSYNC_WRITETHROUGH:
+		case WAL_SYNC_METHOD_FDATASYNC:
 			return o_direct_flag;
 #ifdef O_SYNC
-		case SYNC_METHOD_OPEN:
+		case WAL_SYNC_METHOD_OPEN:
 			return O_SYNC | o_direct_flag;
 #endif
 #ifdef O_DSYNC
-		case SYNC_METHOD_OPEN_DSYNC:
+		case WAL_SYNC_METHOD_OPEN_DSYNC:
 			return O_DSYNC | o_direct_flag;
 #endif
 		default:
@@ -8141,9 +8141,9 @@ get_sync_bit(int method)
  * GUC support
  */
 void
-assign_xlog_sync_method(int new_sync_method, void *extra)
+assign_wal_sync_method(int new_wal_sync_method, void *extra)
 {
-	if (sync_method != new_sync_method)
+	if (wal_sync_method != new_wal_sync_method)
 	{
 		/*
 		 * To ensure that no blocks escape unsynced, force an fsync on the
@@ -8169,7 +8169,7 @@ assign_xlog_sync_method(int new_sync_method, void *extra)
 			}
 
 			pgstat_report_wait_end();
-			if (get_sync_bit(sync_method) != get_sync_bit(new_sync_method))
+			if (get_sync_bit(wal_sync_method) != get_sync_bit(new_wal_sync_method))
 				XLogFileClose();
 		}
 	}
@@ -8195,8 +8195,8 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 	 * file.
 	 */
 	if (!enableFsync ||
-		sync_method == SYNC_METHOD_OPEN ||
-		sync_method == SYNC_METHOD_OPEN_DSYNC)
+		wal_sync_method == WAL_SYNC_METHOD_OPEN ||
+		wal_sync_method == WAL_SYNC_METHOD_OPEN_DSYNC)
 		return;
 
 	/* Measure I/O timing to sync the WAL file */
@@ -8206,29 +8206,29 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 		INSTR_TIME_SET_ZERO(start);
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
-	switch (sync_method)
+	switch (wal_sync_method)
 	{
-		case SYNC_METHOD_FSYNC:
+		case WAL_SYNC_METHOD_FSYNC:
 			if (pg_fsync_no_writethrough(fd) != 0)
 				msg = _("could not fsync file \"%s\": %m");
 			break;
 #ifdef HAVE_FSYNC_WRITETHROUGH
-		case SYNC_METHOD_FSYNC_WRITETHROUGH:
+		case WAL_SYNC_METHOD_FSYNC_WRITETHROUGH:
 			if (pg_fsync_writethrough(fd) != 0)
 				msg = _("could not fsync write-through file \"%s\": %m");
 			break;
 #endif
-		case SYNC_METHOD_FDATASYNC:
+		case WAL_SYNC_METHOD_FDATASYNC:
 			if (pg_fdatasync(fd) != 0)
 				msg = _("could not fdatasync file \"%s\": %m");
 			break;
-		case SYNC_METHOD_OPEN:
-		case SYNC_METHOD_OPEN_DSYNC:
+		case WAL_SYNC_METHOD_OPEN:
+		case WAL_SYNC_METHOD_OPEN_DSYNC:
 			/* not reachable */
 			Assert(false);
 			break;
 		default:
-			elog(PANIC, "unrecognized wal_sync_method: %d", sync_method);
+			elog(PANIC, "unrecognized wal_sync_method: %d", wal_sync_method);
 			break;
 	}
 
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 3fed475c38..b7607aa1be 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -398,9 +398,9 @@ pg_fsync(int fd)
 	errno = 0;
 #endif
 
-	/* #if is to skip the sync_method test if there's no need for it */
+	/* #if is to skip the wal_sync_method test if there's no need for it */
 #if defined(HAVE_FSYNC_WRITETHROUGH)
-	if (sync_method == SYNC_METHOD_FSYNC_WRITETHROUGH)
+	if (wal_sync_method == WAL_SYNC_METHOD_FSYNC_WRITETHROUGH)
 		return pg_fsync_writethrough(fd);
 	else
 #endif
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 16ec6c5ef0..4c58574166 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -485,7 +485,7 @@ static const struct config_enum_entry wal_compression_options[] = {
 extern const struct config_enum_entry wal_level_options[];
 extern const struct config_enum_entry archive_mode_options[];
 extern const struct config_enum_entry recovery_target_action_options[];
-extern const struct config_enum_entry sync_method_options[];
+extern const struct config_enum_entry wal_sync_method_options[];
 extern const struct config_enum_entry dynamic_shared_memory_options[];
 
 /*
@@ -4843,9 +4843,9 @@ struct config_enum ConfigureNamesEnum[] =
 			gettext_noop("Selects the method used for forcing WAL updates to disk."),
 			NULL
 		},
-		&sync_method,
-		DEFAULT_SYNC_METHOD, sync_method_options,
-		NULL, assign_xlog_sync_method, NULL
+		&wal_sync_method,
+		DEFAULT_WAL_SYNC_METHOD, wal_sync_method_options,
+		NULL, assign_wal_sync_method, NULL
 	},
 
 	{
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 48ca852381..4ad572cb87 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -19,12 +19,15 @@
 
 
 /* Sync methods */
-#define SYNC_METHOD_FSYNC		0
-#define SYNC_METHOD_FDATASYNC	1
-#define SYNC_METHOD_OPEN		2	/* for O_SYNC */
-#define SYNC_METHOD_FSYNC_WRITETHROUGH	3
-#define SYNC_METHOD_OPEN_DSYNC	4	/* for O_DSYNC */
-extern PGDLLIMPORT int sync_method;
+typedef enum WalSyncMethod
+{
+	WAL_SYNC_METHOD_FSYNC = 0,
+	WAL_SYNC_METHOD_FDATASYNC,
+	WAL_SYNC_METHOD_OPEN,		/* for O_SYNC */
+	WAL_SYNC_METHOD_FSYNC_WRITETHROUGH,
+	WAL_SYNC_METHOD_OPEN_DSYNC	/* for O_DSYNC */
+} WalSyncMethod;
+extern PGDLLIMPORT int wal_sync_method;
 
 extern PGDLLIMPORT XLogRecPtr ProcLastRecPtr;
 extern PGDLLIMPORT XLogRecPtr XactLastRecEnd;
diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h
index fe794c7740..5f54c2816b 100644
--- a/src/include/access/xlogdefs.h
+++ b/src/include/access/xlogdefs.h
@@ -71,12 +71,12 @@ typedef uint16 RepOriginId;
  *
  * Note that we define our own O_DSYNC on Windows, but not O_SYNC.
  */
-#if defined(PLATFORM_DEFAULT_SYNC_METHOD)
-#define DEFAULT_SYNC_METHOD		PLATFORM_DEFAULT_SYNC_METHOD
+#if defined(PLATFORM_DEFAULT_WAL_SYNC_METHOD)
+#define DEFAULT_WAL_SYNC_METHOD		PLATFORM_DEFAULT_WAL_SYNC_METHOD
 #elif defined(O_DSYNC) && (!defined(O_SYNC) || O_DSYNC != O_SYNC)
-#define DEFAULT_SYNC_METHOD		SYNC_METHOD_OPEN_DSYNC
+#define DEFAULT_WAL_SYNC_METHOD		WAL_SYNC_METHOD_OPEN_DSYNC
 #else
-#define DEFAULT_SYNC_METHOD		SYNC_METHOD_FDATASYNC
+#define DEFAULT_WAL_SYNC_METHOD		WAL_SYNC_METHOD_FDATASYNC
 #endif
 
 #endif							/* XLOG_DEFS_H */
diff --git a/src/include/port/freebsd.h b/src/include/port/freebsd.h
index 0e3fde55d6..c604187acd 100644
--- a/src/include/port/freebsd.h
+++ b/src/include/port/freebsd.h
@@ -5,4 +5,4 @@
  * would prefer open_datasync on FreeBSD 13+, but that is not a good choice on
  * many systems.
  */
-#define PLATFORM_DEFAULT_SYNC_METHOD	SYNC_METHOD_FDATASYNC
+#define PLATFORM_DEFAULT_WAL_SYNC_METHOD	WAL_SYNC_METHOD_FDATASYNC
diff --git a/src/include/port/linux.h b/src/include/port/linux.h
index 7a6e46cdbb..8101af2b93 100644
--- a/src/include/port/linux.h
+++ b/src/include/port/linux.h
@@ -19,4 +19,4 @@
  * perform better and (b) causes outright failures on ext4 data=journal
  * filesystems, because those don't support O_DIRECT.
  */
-#define PLATFORM_DEFAULT_SYNC_METHOD	SYNC_METHOD_FDATASYNC
+#define PLATFORM_DEFAULT_WAL_SYNC_METHOD	WAL_SYNC_METHOD_FDATASYNC
diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h
index f04b99e3b9..2a191830a8 100644
--- a/src/include/utils/guc_hooks.h
+++ b/src/include/utils/guc_hooks.h
@@ -159,6 +159,6 @@ extern bool check_wal_consistency_checking(char **newval, void **extra,
 										   GucSource source);
 extern void assign_wal_consistency_checking(const char *newval, void *extra);
 extern bool check_wal_segment_size(int *newval, void **extra, GucSource source);
-extern void assign_xlog_sync_method(int new_sync_method, void *extra);
+extern void assign_wal_sync_method(int new_wal_sync_method, void *extra);
 
 #endif							/* GUC_HOOKS_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 8de90c4958..e69bb671bf 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3016,6 +3016,7 @@ WalSnd
 WalSndCtlData
 WalSndSendDataCallback
 WalSndState
+WalSyncMethod
 WalTimeSample
 WalUsage
 WalWriteMethod
-- 
2.25.1

#49Nathan Bossart
nathandbossart@gmail.com
In reply to: Nathan Bossart (#43)
Re: should frontend tools use syncfs() ?

On Wed, Oct 04, 2023 at 10:29:07AM -0500, Nathan Bossart wrote:

On Wed, Sep 27, 2023 at 01:56:08PM +0100, Peter Eisentraut wrote:

We have a collection of platform-specific notes in chapter 19, including
file-system-related notes in section 19.2. Maybe it could be put there?

I will give this a try.

I started on this, but I couldn't shake the feeling that this wasn't the
right place for these notes. This chapter is about setting up a server,
and the syncfs() notes do apply to the recovery_init_sync_method
configuration parameter, but it also applies to a number of server/client
applications.

I've been looking around, and I haven't found a great place to move this
section to. IMO some of the other appendices have similar amounts of
information (e.g., Date/Time Support, The Source Code Repository, Color
Support), so maybe a dedicated appendix isn't too extreme. Another option
could be to introduce a new section for platform-specific notes, but that
would just make this section even larger for now.

Thoughts?

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#50Nathan Bossart
nathandbossart@gmail.com
In reply to: Nathan Bossart (#48)
Re: should frontend tools use syncfs() ?

On Mon, Oct 09, 2023 at 02:34:27PM -0500, Nathan Bossart wrote:

On Mon, Oct 09, 2023 at 11:14:39AM -0500, Nathan Bossart wrote:

Thanks. I've made a couple of small changes, but otherwise I think this
one is just about ready.

I forgot to rename one thing. Here's a v13 with that fixed.

Committed.

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com