From 8607189eb19302c509eed78a7a2db55b9a2d70b3 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Sat, 31 May 2025 22:50:22 +1200
Subject: [PATCH 1/2] Add io_min_fallocate setting.

BTRFS's compression is reported to be disabled by posix_fallocate(), so
offer a way to turn it off.  The previous coding had a threshold of 8
blocks before using that instead of writing zeroes, so make that
configurable.  0 means never, and other numbers specify a threshold in
blocks, defaulting to 8 as before.

Reported-by: Dimitrios Apostolou <jimis@gmx.net>
Discussion: https://postgr.es/m/b1843124-fd22-e279-a31f-252dffb6fbf2%40gmx.net
---
 doc/src/sgml/config.sgml                      | 17 +++++++++++++++++
 src/backend/storage/file/fd.c                 |  3 +++
 src/backend/storage/smgr/md.c                 |  6 ++----
 src/backend/utils/misc/guc_tables.c           | 12 ++++++++++++
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 src/include/storage/fd.h                      |  1 +
 6 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index f4a0191c55b..7d476665f42 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2684,6 +2684,23 @@ include_dir 'conf.d'
        </listitem>
       </varlistentry>
 
+      <varlistentry id="guc-io-min-fallocate" xreflabel="io_min_fallocate">
+       <term><varname>io_min_fallocate</varname> (<type>integer</type>)
+       <indexterm>
+        <primary><varname>io_min_fallocate</varname> configuration parameter</primary>
+       </indexterm>
+       </term>
+       <listitem>
+        <para>
+         Threshold at which <function>posix_fallocate()</function> is used to
+         extend data files instead of writing zeroes.  <literal>0</literal>
+         means never (always write
+         zeroes), and other values indicate a number of blocks.
+         The default is <literal>8</literal>.
+        </para>
+       </listitem>
+      </varlistentry>
+
       <varlistentry id="guc-io-max-combine-limit" xreflabel="io_max_combine_limit">
        <term><varname>io_max_combine_limit</varname> (<type>integer</type>)
        <indexterm>
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 0e8299dd556..ff16b5cc6bd 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -164,6 +164,9 @@ bool		data_sync_retry = false;
 /* How SyncDataDirectory() should do its job. */
 int			recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
+/* At what size FileFallocate() should be preferred over FileZero(). */
+int			io_min_fallocate = 8;
+
 /* Which kinds of files should be opened with PG_O_DIRECT. */
 int			io_direct_flags;
 
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 2ccb0faceb5..6d1b9cb65b2 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -588,11 +588,9 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 		 * to allocate page cache space for the extended pages.
 		 *
 		 * However, we don't use FileFallocate() for small extensions, as it
-		 * defeats delayed allocation on some filesystems. Not clear where
-		 * that decision should be made though? For now just use a cutoff of
-		 * 8, anything between 4 and 8 worked OK in some local testing.
+		 * defeats delayed allocation on some filesystems.
 		 */
-		if (numblocks > 8)
+		if (io_min_fallocate > 0 && numblocks >= io_min_fallocate)
 		{
 			int			ret;
 
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 2f8cbd86759..a75ff8623d9 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3265,6 +3265,18 @@ struct config_int ConfigureNamesInt[] =
 		NULL
 	},
 
+	{
+		{"io_min_fallocate",
+			PGC_USERSET,
+			RESOURCES_IO,
+			gettext_noop("Threshold for preferring posix_fallocate() when extending data files."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&io_min_fallocate,
+		8, 0, INT_MAX
+	},
+
 	{
 		{"io_max_combine_limit",
 			PGC_POSTMASTER,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 87ce76b18f4..8b712ef244f 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -200,6 +200,7 @@
 #backend_flush_after = 0		# measured in pages, 0 disables
 #effective_io_concurrency = 16		# 1-1000; 0 disables issuing multiple simultaneous IO requests
 #maintenance_io_concurrency = 16	# 1-1000; same as effective_io_concurrency
+#io_min_fallocate = 8			# min size at which to prefer posix_fallocate, 0 = never
 #io_max_combine_limit = 128kB		# usually 1-128 blocks (depends on OS)
 					# (change requires restart)
 #io_combine_limit = 128kB		# usually 1-128 blocks (depends on OS)
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index b77d8e5e30e..b8714e5ceb8 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -60,6 +60,7 @@ typedef int File;
 extern PGDLLIMPORT int max_files_per_process;
 extern PGDLLIMPORT bool data_sync_retry;
 extern PGDLLIMPORT int recovery_init_sync_method;
+extern PGDLLIMPORT int io_min_fallocate;
 extern PGDLLIMPORT int io_direct_flags;
 
 /*
-- 
2.39.5

