From e6692d744a7d041519e9c0998ce9f34aabc63c1e Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Tue, 13 Dec 2022 16:54:18 +1300
Subject: [PATCH v2 3/4] Add io_direct setting (developer-only).

Provide a way to ask the kernel to use O_DIRECT (or local equivalent)
for data and WAL files.  This hurts performance currently and is not
intended for end-users yet.  Later proposed work would introduce our own
I/O clustering, read-ahead, etc to replace the kernel features that are
disabled with this option.

This replaces the previous logic that would use O_DIRECT for the WAL in
limited and obscure cases, now that there is an explicit setting.

Author: Thomas Munro <thomas.munro@gmail.com>
Author: Andres Freund <andres@anarazel.de>
Reviewed-by: Justin Pryzby <pryzby@telsasoft.com>
Discussion: https://postgr.es/m/CA%2BhUKGK1X532hYqJ_MzFWt0n1zt8trz980D79WbjwnT-yYLZpg%40mail.gmail.com
---
 doc/src/sgml/config.sgml                      | 33 +++++++
 src/backend/access/transam/xlog.c             | 37 ++++----
 src/backend/access/transam/xlogprefetcher.c   |  2 +-
 src/backend/storage/buffer/bufmgr.c           | 16 ++--
 src/backend/storage/buffer/localbuf.c         |  7 +-
 src/backend/storage/file/fd.c                 | 88 +++++++++++++++++++
 src/backend/storage/smgr/md.c                 | 24 +++--
 src/backend/storage/smgr/smgr.c               |  1 +
 src/backend/utils/misc/guc_tables.c           | 12 +++
 src/include/storage/fd.h                      |  8 ++
 src/include/storage/smgr.h                    |  1 +
 src/include/utils/guc_hooks.h                 |  3 +
 src/test/modules/test_misc/meson.build        |  1 +
 src/test/modules/test_misc/t/004_io_direct.pl | 40 +++++++++
 14 files changed, 239 insertions(+), 34 deletions(-)
 create mode 100644 src/test/modules/test_misc/t/004_io_direct.pl

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 8e4145979d..766d20f2ea 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -11033,6 +11033,39 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-io-direct" xreflabel="io_direct">
+      <term><varname>io_direct</varname> (<type>string</type>)
+      <indexterm>
+        <primary><varname>io_direct</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Ask the kernel to minimize caching effects for relation data and WAL
+        files using <literal>O_DIRECT</literal> (most Unix-like systems),
+        <literal>F_NOCACHE</literal> (macOS) or
+        <literal>FILE_FLAG_NO_BUFFERING</literal> (Windows).
+       </para>
+       <para>
+        May be set to an empty string (the default) to disable use of direct
+        I/O, or a comma-separated list of types of files for which direct I/O
+        is enabled.  The valid types of file are <literal>data</literal> for
+        main data files, <literal>wal</literal> for WAL files, and
+        <literal>wal_init</literal> for WAL files when being initially
+        allocated.
+       </para>
+       <para>
+        Some operating systems and file systems do not support direct I/O, so
+        non-default settings may be rejected at startup, or produce I/O errors
+        at runtime.
+       </para>
+       <para>
+        Currently this feature reduces performance, and is intended for
+        developer testing only.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-post-auth-delay" xreflabel="post_auth_delay">
       <term><varname>post_auth_delay</varname> (<type>integer</type>)
       <indexterm>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a75c6813a4..08a2f7a558 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2925,6 +2925,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 	XLogSegNo	max_segno;
 	int			fd;
 	int			save_errno;
+	int			open_flags = O_RDWR | O_CREAT | O_EXCL | PG_BINARY;
 
 	Assert(logtli != 0);
 
@@ -2957,8 +2958,11 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 
 	unlink(tmppath);
 
+	if (io_direct_flags & IO_DIRECT_WAL_INIT)
+		open_flags |= PG_O_DIRECT;
+
 	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
-	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
+	fd = BasicOpenFile(tmppath, open_flags);
 	if (fd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
@@ -3350,7 +3354,7 @@ XLogFileClose(void)
 	 * use the cache to read the WAL segment.
 	 */
 #if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
-	if (!XLogIsNeeded())
+	if (!XLogIsNeeded() && (io_direct_flags & IO_DIRECT_WAL) == 0)
 		(void) posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
 #endif
 
@@ -4450,7 +4454,6 @@ show_in_hot_standby(void)
 	return RecoveryInProgress() ? "on" : "off";
 }
 
-
 /*
  * Read the control file, set respective GUCs.
  *
@@ -8034,35 +8037,27 @@ xlog_redo(XLogReaderState *record)
 }
 
 /*
- * Return the (possible) sync flag used for opening a file, depending on the
- * value of the GUC wal_sync_method.
+ * Return the extra open flags used for opening a file, depending on the
+ * value of the GUCs wal_sync_method, fsync and io_direct.
  */
 static int
 get_sync_bit(int method)
 {
 	int			o_direct_flag = 0;
 
-	/* If fsync is disabled, never open in sync mode */
-	if (!enableFsync)
-		return 0;
-
 	/*
-	 * Optimize writes by bypassing kernel cache with O_DIRECT when using
-	 * O_SYNC and O_DSYNC.  But only if archiving and streaming are disabled,
-	 * otherwise the archive command or walsender process will read the WAL
-	 * soon after writing it, which is guaranteed to cause a physical read if
-	 * we bypassed the kernel cache. We also skip the
-	 * posix_fadvise(POSIX_FADV_DONTNEED) call in XLogFileClose() for the same
-	 * reason.
-	 *
-	 * Never use O_DIRECT in walreceiver process for similar reasons; the WAL
+	 * Use O_DIRECT if requested, except in walreceiver process.  The WAL
 	 * written by walreceiver is normally read by the startup process soon
-	 * after it's written. Also, walreceiver performs unaligned writes, which
+	 * after it's written.  Also, walreceiver performs unaligned writes, which
 	 * don't work with O_DIRECT, so it is required for correctness too.
 	 */
-	if (!XLogIsNeeded() && !AmWalReceiverProcess())
+	if ((io_direct_flags & IO_DIRECT_WAL) && !AmWalReceiverProcess())
 		o_direct_flag = PG_O_DIRECT;
 
+	/* If fsync is disabled, never open in sync mode */
+	if (!enableFsync)
+		return o_direct_flag;
+
 	switch (method)
 	{
 			/*
@@ -8074,7 +8069,7 @@ get_sync_bit(int method)
 		case SYNC_METHOD_FSYNC:
 		case SYNC_METHOD_FSYNC_WRITETHROUGH:
 		case SYNC_METHOD_FDATASYNC:
-			return 0;
+			return o_direct_flag;
 #ifdef O_SYNC
 		case SYNC_METHOD_OPEN:
 			return O_SYNC | o_direct_flag;
diff --git a/src/backend/access/transam/xlogprefetcher.c b/src/backend/access/transam/xlogprefetcher.c
index 0cf03945ee..992256dd09 100644
--- a/src/backend/access/transam/xlogprefetcher.c
+++ b/src/backend/access/transam/xlogprefetcher.c
@@ -785,7 +785,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
 				block->prefetch_buffer = InvalidBuffer;
 				return LRQ_NEXT_IO;
 			}
-			else
+			else if ((io_direct_flags & IO_DIRECT_DATA) == 0)
 			{
 				/*
 				 * This shouldn't be possible, because we already determined
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index aba07e94c9..11c8187a55 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -535,8 +535,11 @@ PrefetchSharedBuffer(SMgrRelation smgr_reln,
 		 * Try to initiate an asynchronous read.  This returns false in
 		 * recovery if the relation file doesn't exist.
 		 */
-		if (smgrprefetch(smgr_reln, forkNum, blockNum))
+		if ((io_direct_flags & IO_DIRECT_DATA) == 0 &&
+			smgrprefetch(smgr_reln, forkNum, blockNum))
+		{
 			result.initiated_io = true;
+		}
 #endif							/* USE_PREFETCH */
 	}
 	else
@@ -582,11 +585,11 @@ PrefetchSharedBuffer(SMgrRelation smgr_reln,
  * the kernel and therefore didn't really initiate I/O, and no way to know when
  * the I/O completes other than using synchronous ReadBuffer().
  *
- * 3.  Otherwise, the buffer wasn't already cached by PostgreSQL, and either
+ * 3.  Otherwise, the buffer wasn't already cached by PostgreSQL, and
  * USE_PREFETCH is not defined (this build doesn't support prefetching due to
- * lack of a kernel facility), or the underlying relation file wasn't found and
- * we are in recovery.  (If the relation file wasn't found and we are not in
- * recovery, an error is raised).
+ * lack of a kernel facility), direct I/O is enabled, or the underlying
+ * relation file wasn't found and we are in recovery.  (If the relation file
+ * wasn't found and we are not in recovery, an error is raised).
  */
 PrefetchBufferResult
 PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
@@ -4908,6 +4911,9 @@ ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag)
 {
 	PendingWriteback *pending;
 
+	if (io_direct_flags & IO_DIRECT_DATA)
+		return;
+
 	/*
 	 * Add buffer to the pending writeback array, unless writeback control is
 	 * disabled.
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 735f7c6018..b01e319641 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -87,8 +87,11 @@ PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
 	{
 #ifdef USE_PREFETCH
 		/* Not in buffers, so initiate prefetch */
-		smgrprefetch(smgr, forkNum, blockNum);
-		result.initiated_io = true;
+		if ((io_direct_flags & IO_DIRECT_DATA) == 0 &&
+			smgrprefetch(smgr, forkNum, blockNum))
+		{
+			result.initiated_io = true;
+		}
 #endif							/* USE_PREFETCH */
 	}
 
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index f6c9382023..0829e9b8df 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -98,7 +98,9 @@
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "utils/guc.h"
+#include "utils/guc_hooks.h"
 #include "utils/resowner_private.h"
+#include "utils/varlena.h"
 
 /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
 #if defined(HAVE_SYNC_FILE_RANGE)
@@ -162,6 +164,9 @@ bool		data_sync_retry = false;
 /* How SyncDataDirectory() should do its job. */
 int			recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC;
 
+/* Which kinds of files should be opened with PG_O_DIRECT. */
+int			io_direct_flags;
+
 /* Debugging.... */
 
 #ifdef FDDEBUG
@@ -2021,6 +2026,11 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
 	if (nbytes <= 0)
 		return;
 
+#ifdef PG_O_DIRECT
+	if (VfdCache[file].fileFlags & PG_O_DIRECT)
+		return;
+#endif
+
 	returnCode = FileAccess(file);
 	if (returnCode < 0)
 		return;
@@ -3737,3 +3747,81 @@ data_sync_elevel(int elevel)
 {
 	return data_sync_retry ? elevel : PANIC;
 }
+
+bool
+check_io_direct(char **newval, void **extra, GucSource source)
+{
+#if PG_O_DIRECT == 0
+	if (*newval)
+	{
+		GUC_check_errdetail("io_direct is not supported on this platform.");
+		return false;
+	}
+#else
+	List	   *list;
+	ListCell   *l;
+	int		   *flags;
+
+	if (!SplitGUCList(*newval, ',', &list))
+	{
+		GUC_check_errdetail("invalid list syntax in parameter \"%s\"",
+							"io_direct");
+		return false;
+	}
+
+	flags = guc_malloc(ERROR, sizeof(*flags));
+	*flags = 0;
+	foreach (l, list)
+	{
+		char	   *item = (char *) lfirst(l);
+
+		if (pg_strcasecmp(item, "data") == 0)
+			*flags |= IO_DIRECT_DATA;
+		else if (pg_strcasecmp(item, "wal") == 0)
+			*flags |= IO_DIRECT_WAL;
+		else if (pg_strcasecmp(item, "wal_init") == 0)
+			*flags |= IO_DIRECT_WAL_INIT;
+		else
+		{
+			GUC_check_errdetail("invalid option \"%s\"", item);
+			return false;
+		}
+	}
+
+	*extra = flags;
+
+	return true;
+#endif
+}
+
+extern void
+assign_io_direct(const char *newval, void *extra)
+{
+	int	   *flags = (int *) extra;
+
+	io_direct_flags = *flags;
+}
+
+extern const char *
+show_io_direct(void)
+{
+	static char result[80];
+
+	result[0] = 0;
+	if (io_direct_flags & IO_DIRECT_DATA)
+		strcat(result, "data");
+	if (io_direct_flags & IO_DIRECT_WAL)
+	{
+		if (result[0])
+			strcat(result, ", ");
+		strcat(result, "wal");
+	}
+	if (io_direct_flags & IO_DIRECT_WAL_INIT)
+	{
+		if (result[0])
+			strcat(result, ", ");
+		strcat(result, "wal_init");
+	}
+
+	return result;
+}
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 3e034afdf1..38263f3d0f 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -142,6 +142,16 @@ static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forknum,
 static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
 							  MdfdVec *seg);
 
+static inline int
+_mdfd_open_flags(ForkNumber forkNum)
+{
+	int		flags = O_RDWR | PG_BINARY;
+
+	if (io_direct_flags & IO_DIRECT_DATA)
+		flags |= PG_O_DIRECT;
+
+	return flags;
+}
 
 /*
  *	mdinit() -- Initialize private state for magnetic disk storage manager.
@@ -205,14 +215,14 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
 
 	path = relpath(reln->smgr_rlocator, forknum);
 
-	fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
+	fd = PathNameOpenFile(path, _mdfd_open_flags(forknum) | O_CREAT | O_EXCL);
 
 	if (fd < 0)
 	{
 		int			save_errno = errno;
 
 		if (isRedo)
-			fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
+			fd = PathNameOpenFile(path, _mdfd_open_flags(forknum));
 		if (fd < 0)
 		{
 			/* be sure to report the error reported by create, not open */
@@ -527,7 +537,7 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
 
 	path = relpath(reln->smgr_rlocator, forknum);
 
-	fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
+	fd = PathNameOpenFile(path, _mdfd_open_flags(forknum));
 
 	if (fd < 0)
 	{
@@ -598,6 +608,8 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 	off_t		seekpos;
 	MdfdVec    *v;
 
+	Assert((io_direct_flags & IO_DIRECT_DATA) == 0);
+
 	v = _mdfd_getseg(reln, forknum, blocknum, false,
 					 InRecovery ? EXTENSION_RETURN_NULL : EXTENSION_FAIL);
 	if (v == NULL)
@@ -623,6 +635,8 @@ void
 mdwriteback(SMgrRelation reln, ForkNumber forknum,
 			BlockNumber blocknum, BlockNumber nblocks)
 {
+	Assert((io_direct_flags & IO_DIRECT_DATA) == 0);
+
 	/*
 	 * Issue flush requests in as few requests as possible; have to split at
 	 * segment boundaries though, since those are actually separate files.
@@ -1200,7 +1214,7 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
 	fullpath = _mdfd_segpath(reln, forknum, segno);
 
 	/* open the file */
-	fd = PathNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags);
+	fd = PathNameOpenFile(fullpath, _mdfd_open_flags(forknum) | oflags);
 
 	pfree(fullpath);
 
@@ -1410,7 +1424,7 @@ mdsyncfiletag(const FileTag *ftag, char *path)
 		strlcpy(path, p, MAXPGPATH);
 		pfree(p);
 
-		file = PathNameOpenFile(path, O_RDWR | PG_BINARY);
+		file = PathNameOpenFile(path, _mdfd_open_flags(ftag->forknum));
 		if (file < 0)
 			return -1;
 		need_to_close = true;
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index c1a5febcbf..4892920812 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -20,6 +20,7 @@
 #include "access/xlogutils.h"
 #include "lib/ilist.h"
 #include "storage/bufmgr.h"
+#include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/md.h"
 #include "storage/smgr.h"
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 1bf14eec66..1de30ebbf1 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -543,6 +543,7 @@ static char *locale_ctype;
 static char *server_encoding_string;
 static char *server_version_string;
 static int	server_version_num;
+static char *io_direct_string;
 
 #ifdef HAVE_SYSLOG
 #define	DEFAULT_SYSLOG_FACILITY LOG_LOCAL0
@@ -4468,6 +4469,17 @@ struct config_string ConfigureNamesString[] =
 		check_backtrace_functions, assign_backtrace_functions, NULL
 	},
 
+	{
+		{"io_direct", PGC_POSTMASTER, DEVELOPER_OPTIONS,
+			gettext_noop("Use direct I/O for file access."),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&io_direct_string,
+		"",
+		check_io_direct, assign_io_direct, show_io_direct
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, NULL, NULL, NULL, NULL
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 85ef12c440..0d65cb3c80 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -44,6 +44,8 @@
 #define FD_H
 
 #include <dirent.h>
+#include <fcntl.h>
+
 
 typedef enum RecoveryInitSyncMethod
 {
@@ -54,10 +56,16 @@ typedef enum RecoveryInitSyncMethod
 typedef int File;
 
 
+#define IO_DIRECT_DATA			0x01
+#define IO_DIRECT_WAL			0x02
+#define IO_DIRECT_WAL_INIT		0x04
+
+
 /* GUC parameter */
 extern PGDLLIMPORT int max_files_per_process;
 extern PGDLLIMPORT bool data_sync_retry;
 extern PGDLLIMPORT int recovery_init_sync_method;
+extern PGDLLIMPORT int io_direct_flags;
 
 /*
  * This is private to fd.c, but exported for save/restore_backend_variables()
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index a07715356b..ea7b3ff8dd 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -17,6 +17,7 @@
 #include "lib/ilist.h"
 #include "storage/block.h"
 #include "storage/relfilelocator.h"
+#include "utils/guc.h"
 
 /*
  * smgr.c maintains a table of SMgrRelation objects, which are essentially
diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h
index f1a9a183b4..61a7fd77b8 100644
--- a/src/include/utils/guc_hooks.h
+++ b/src/include/utils/guc_hooks.h
@@ -154,5 +154,8 @@ extern bool check_wal_consistency_checking(char **newval, void **extra,
 										   GucSource source);
 extern void assign_wal_consistency_checking(const char *newval, void *extra);
 extern void assign_xlog_sync_method(int new_sync_method, void *extra);
+extern bool check_io_direct(char **newval, void **extra, GucSource source);
+extern void assign_io_direct(const char *newval, void *extra);
+extern const char *show_io_direct(void);
 
 #endif							/* GUC_HOOKS_H */
diff --git a/src/test/modules/test_misc/meson.build b/src/test/modules/test_misc/meson.build
index cfc830ff39..97162d2b8f 100644
--- a/src/test/modules/test_misc/meson.build
+++ b/src/test/modules/test_misc/meson.build
@@ -7,6 +7,7 @@ tests += {
       't/001_constraint_validation.pl',
       't/002_tablespace.pl',
       't/003_check_guc.pl',
+      't/004_io_direct.pl',
     ],
   },
 }
diff --git a/src/test/modules/test_misc/t/004_io_direct.pl b/src/test/modules/test_misc/t/004_io_direct.pl
new file mode 100644
index 0000000000..9a79fc8f9d
--- /dev/null
+++ b/src/test/modules/test_misc/t/004_io_direct.pl
@@ -0,0 +1,40 @@
+# Very simple exercise of direct I/O GUC.
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Systems that we know to have direct I/O support, and whose typical local
+# filesystems support it or at least won't fail with an error.  (illumos should
+# probably be in this list, but perl reports it as solaris.  Solaris should not
+# be in the list because we don't support its way of turning on direct I/O, and
+# even if we did, its version of ZFS rejects it) and OpenBSD just doesn't have
+# it.)
+if (!grep { $^O eq $_ } qw(aix darwin dragonfly freebsd linux MSWin32 netbsd))
+{
+	plan skip_all => "no direct I/O support";
+}
+
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init;
+$node->append_conf('io_direct', 'data,wal,wal_init');
+$node->append_conf('shared_buffers', '64kB'); # tiny to force I/O
+$node->start;
+
+# Do some work that is bound to generate shared and local writes and reads as a
+# simple exercise.
+$node->safe_psql('postgres', 'create table t1 as select 1 as i from generate_series(1, 10000)');
+$node->safe_psql('postgres', 'create table t2count (i int)');
+$node->safe_psql('postgres', 'begin; create temporary table t2 as select 1 as i from generate_series(1, 10000); update t2 set i = i; insert into t2count select count(*) from t2; commit;');
+$node->safe_psql('postgres', 'update t1 set i = i');
+is('10000', $node->safe_psql('postgres', 'select count(*) from t1'), "read back from shared");
+is('10000', $node->safe_psql('postgres', 'select * from t2count'), "read back from local");
+$node->stop('immediate');
+
+$node->start;
+is('10000', $node->safe_psql('postgres', 'select count(*) from t1'), "read back from shared after crash recovery");
+$node->stop;
+
+done_testing();
-- 
2.35.1

