From 56882f910e58f72987433c72c32fc6eedc1cbac9 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Sat, 5 Feb 2022 09:15:47 +1300
Subject: [PATCH v3 2/2] Add wal_sync_method=fdatasync for Windows.

Windows 10 gained support for flushing files with fdatasync() semantics.
The main advantage over wal_sync_method=open_datasync is that the latter
does not flush SATA drive caches.

According to OS documentation FLUSH_FLAGS_FILE_DATA_SYNC_ONLY works
only on NTFS filesystems, so we can't consider making it the default.

Discussion: https://postgr.es/m/CA%2BhUKGJZJVO%3DiX%2Beb-PXi2_XS9ZRqnn_4URh0NUQOwt6-_51xQ%40mail.gmail.com
---
 configure                     |  6 ++++
 configure.ac                  |  1 +
 doc/src/sgml/wal.sgml         |  3 +-
 src/include/c.h               |  2 +-
 src/include/port/win32_port.h |  6 ++++
 src/include/port/win32ntdll.h | 10 ++++++-
 src/port/fdatasync.c          | 53 +++++++++++++++++++++++++++++++++++
 src/port/win32ntdll.c         |  6 +++-
 src/tools/msvc/Mkvcbuild.pm   |  3 +-
 src/tools/msvc/Solution.pm    |  2 +-
 10 files changed, 86 insertions(+), 6 deletions(-)
 create mode 100644 src/port/fdatasync.c

diff --git a/configure b/configure
index 1e63c6862b..a03339301d 100755
--- a/configure
+++ b/configure
@@ -16980,6 +16980,12 @@ fi
  ;;
 esac
 
+  case " $LIBOBJS " in
+  *" fdatasync.$ac_objext "* ) ;;
+  *) LIBOBJS="$LIBOBJS fdatasync.$ac_objext"
+ ;;
+esac
+
   case " $LIBOBJS " in
   *" kill.$ac_objext "* ) ;;
   *) LIBOBJS="$LIBOBJS kill.$ac_objext"
diff --git a/configure.ac b/configure.ac
index 71191f14ad..9528f7fb42 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1975,6 +1975,7 @@ if test "$PORTNAME" = "win32"; then
   AC_CHECK_FUNCS(_configthreadlocale)
   AC_REPLACE_FUNCS(gettimeofday)
   AC_LIBOBJ(dirmod)
+  AC_LIBOBJ(fdatasync)
   AC_LIBOBJ(kill)
   AC_LIBOBJ(open)
   AC_LIBOBJ(system)
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml
index 4b6ef283c1..01f7379ebb 100644
--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -108,7 +108,8 @@
         <literal>open_datasync</literal> (the default), write caching can be disabled
         by unchecking <literal>My Computer\Open\<replaceable>disk drive</replaceable>\Properties\Hardware\Properties\Policies\Enable write caching on the disk</literal>.
         Alternatively, set <varname>wal_sync_method</varname> to
-        <literal>fsync</literal> or <literal>fsync_writethrough</literal>, which prevent
+        <literal>fdatasync</literal> (NTFS only), <literal>fsync</literal> or
+        <literal>fsync_writethrough</literal>, which prevent
         write caching.
       </para>
     </listitem>
diff --git a/src/include/c.h b/src/include/c.h
index 863a16c6a6..fdf89e2742 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -1290,7 +1290,7 @@ typedef union PGAlignedXLogBlock
  * standard C library.
  */
 
-#if defined(HAVE_FDATASYNC) && !HAVE_DECL_FDATASYNC
+#if !HAVE_DECL_FDATASYNC
 extern int	fdatasync(int fildes);
 #endif
 
diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h
index 5121c0c626..5ea66528fa 100644
--- a/src/include/port/win32_port.h
+++ b/src/include/port/win32_port.h
@@ -83,6 +83,12 @@
 #define HAVE_FSYNC_WRITETHROUGH
 #define FSYNC_WRITETHROUGH_IS_FSYNC
 
+/*
+ * We have a replacement for fdatasync() in src/port/fdatasync.c, which is
+ * unconditionally used by MSVC and Mingw builds.
+ */
+#define HAVE_FDATASYNC
+
 #define USES_WINSOCK
 
 /*
diff --git a/src/include/port/win32ntdll.h b/src/include/port/win32ntdll.h
index 291b067ea4..34cebddd54 100644
--- a/src/include/port/win32ntdll.h
+++ b/src/include/port/win32ntdll.h
@@ -23,9 +23,17 @@
 #include <ntstatus.h>
 #include <winternl.h>
 
-typedef NTSTATUS (__stdcall * RtlGetLastNtStatus_t) (void);
+#ifndef FLUSH_FLAGS_FILE_DATA_SYNC_ONLY
+#define FLUSH_FLAGS_FILE_DATA_SYNC_ONLY 0x4
+#endif
+
+typedef NTSTATUS (__stdcall *RtlGetLastNtStatus_t) (void);
+typedef ULONG (__stdcall *RtlNtStatusToDosError_t) (NTSTATUS);
+typedef NTSTATUS (__stdcall *NtFlushBuffersFileEx_t) (HANDLE, ULONG, PVOID, ULONG, PIO_STATUS_BLOCK);
 
 extern PGDLLIMPORT RtlGetLastNtStatus_t pg_RtlGetLastNtStatus;
+extern PGDLLIMPORT RtlNtStatusToDosError_t pg_RtlNtStatusToDosError;
+extern PGDLLIMPORT NtFlushBuffersFileEx_t pg_NtFlushBuffersFileEx;
 
 extern int	initialize_ntdll(void);
 
diff --git a/src/port/fdatasync.c b/src/port/fdatasync.c
new file mode 100644
index 0000000000..afef853aa3
--- /dev/null
+++ b/src/port/fdatasync.c
@@ -0,0 +1,53 @@
+/*-------------------------------------------------------------------------
+ *
+ * fdatasync.c
+ *	   Win32 fdatasync() replacement
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ *
+ * src/port/fdatasync.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define UMDF_USING_NTSTATUS
+
+#ifdef FRONTEND
+#include "postgres_fe.h"
+#else
+#include "postgres.h"
+#endif
+
+#include "port/win32ntdll.h"
+
+int
+fdatasync(int fd)
+{
+	IO_STATUS_BLOCK iosb;
+	NTSTATUS	status;
+	HANDLE		handle;
+
+	handle = (HANDLE) _get_osfhandle(fd);
+	if (handle == INVALID_HANDLE_VALUE)
+	{
+		errno = EBADF;
+		return -1;
+	}
+
+	if (initialize_ntdll() < 0)
+		return -1;
+
+	memset(&iosb, 0, sizeof(iosb));
+	status = pg_NtFlushBuffersFileEx(handle,
+									 FLUSH_FLAGS_FILE_DATA_SYNC_ONLY,
+									 NULL,
+									 0,
+									 &iosb);
+
+	if (NT_SUCCESS(status))
+		return 0;
+
+	_dosmaperr(pg_RtlNtStatusToDosError(status));
+	return -1;
+}
diff --git a/src/port/win32ntdll.c b/src/port/win32ntdll.c
index 10c33c6a01..eb61407754 100644
--- a/src/port/win32ntdll.c
+++ b/src/port/win32ntdll.c
@@ -20,6 +20,8 @@
 #include "port/win32ntdll.h"
 
 RtlGetLastNtStatus_t pg_RtlGetLastNtStatus;
+RtlNtStatusToDosError_t pg_RtlNtStatusToDosError;
+NtFlushBuffersFileEx_t pg_NtFlushBuffersFileEx;
 
 typedef struct NtDllRoutine
 {
@@ -28,7 +30,9 @@ typedef struct NtDllRoutine
 } NtDllRoutine;
 
 static const NtDllRoutine routines[] = {
-	{"RtlGetLastNtStatus", (pg_funcptr_t *) &pg_RtlGetLastNtStatus}
+	{"RtlGetLastNtStatus", (pg_funcptr_t *) &pg_RtlGetLastNtStatus},
+	{"RtlNtStatusToDosError", (pg_funcptr_t *) &pg_RtlNtStatusToDosError},
+	{"NtFlushBuffersFileEx", (pg_funcptr_t *) &pg_NtFlushBuffersFileEx}
 };
 
 static bool initialized;
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index e4feda10fd..cc7a908d10 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -99,7 +99,8 @@ sub mkvcbuild
 	$solution = CreateSolution($vsVersion, $config);
 
 	our @pgportfiles = qw(
-	  chklocale.c explicit_bzero.c fls.c getpeereid.c getrusage.c inet_aton.c
+	  chklocale.c explicit_bzero.c fls.c fdatasync.c
+	  getpeereid.c getrusage.c inet_aton.c
 	  getaddrinfo.c gettimeofday.c inet_net_ntop.c kill.c open.c
 	  snprintf.c strlcat.c strlcpy.c dirmod.c noblock.c path.c
 	  dirent.c dlopen.c getopt.c getopt_long.c link.c
diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index fa32dc371d..c87bca8e6a 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -257,7 +257,7 @@ sub GenerateFiles
 		HAVE_EDITLINE_READLINE_H                    => undef,
 		HAVE_EXECINFO_H                             => undef,
 		HAVE_EXPLICIT_BZERO                         => undef,
-		HAVE_FDATASYNC                              => undef,
+		HAVE_FDATASYNC                              => 1,
 		HAVE_FLS                                    => undef,
 		HAVE_FSEEKO                                 => 1,
 		HAVE_FUNCNAME__FUNC                         => undef,
-- 
2.35.1

