From b7a4bd996a0a296eeda9a3938ff965131825cb9f Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Wed, 12 Feb 2025 13:52:22 +1300
Subject: [PATCH v3 2/3] Introduce max_io_combine_limit.

The existing io_combine_limit parameter can be set by users.  The new
max_io_combine_limit parameter can be set only at server startup time,
and applies as a silent limit on top of that.

Since our GUC system doesn't allow dependencies between GUCs, the
user-settable one assigns a "raw" value to io_combine_limit_guc, and the
minimum of io_combine_limit_guc and max_io_combine_limit is maintained
in io_combine_limit.  In other words, code should continue to refer to
io_combine_limit directly, and io_combine_limit_guc is an implementation
detail that could eventually be removed if the GUC system ever learns
how to deal with dependencies.

max_io_combine_limit allows the administrator to cap the total I/O size
system-wide, and also provides a way for proposed AIO patches to know
what the maximum could possibly be when allocating related memory at
server startup, without having to assume it is the compile-time maximum
MAX_IO_COMBINE_LIMIT.  This paves the way for increasing the latter.

Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CA%2BhUKG%2B2T9p-%2BzM6Eeou-RAJjTML6eit1qn26f9twznX59qtCA%40mail.gmail.com
---
 doc/src/sgml/config.sgml                      | 23 ++++++++++++++++++-
 src/backend/commands/variable.c               | 18 +++++++++++++++
 src/backend/storage/buffer/bufmgr.c           |  5 +++-
 src/backend/utils/misc/guc_tables.c           | 16 ++++++++++++-
 src/backend/utils/misc/postgresql.conf.sample |  2 ++
 src/include/storage/bufmgr.h                  |  4 +++-
 src/include/utils/guc_hooks.h                 |  2 ++
 7 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index e55700f35b8..68ba7cc7980 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2631,6 +2631,24 @@ include_dir 'conf.d'
        </listitem>
       </varlistentry>
 
+      <varlistentry id="guc-max-io-combine-limit" xreflabel="max_io_combine_limit">
+       <term><varname>max_io_combine_limit</varname> (<type>integer</type>)
+       <indexterm>
+        <primary><varname>max_io_combine_limit</varname> configuration parameter</primary>
+       </indexterm>
+       </term>
+       <listitem>
+        <para>
+         Controls the largest I/O size in operations that combine I/O, and silently
+         limits the user-settable parameter <varname>io_combine_limit</varname>.
+         This parameter can only be set in
+         the <filename>postgresql.conf</filename> file or on the server
+         command line.
+         The default is 128kB.
+        </para>
+       </listitem>
+      </varlistentry>
+
       <varlistentry id="guc-io-combine-limit" xreflabel="io_combine_limit">
        <term><varname>io_combine_limit</varname> (<type>integer</type>)
        <indexterm>
@@ -2639,7 +2657,10 @@ include_dir 'conf.d'
        </term>
        <listitem>
         <para>
-         Controls the largest I/O size in operations that combine I/O.
+         Controls the largest I/O size in operations that combine I/O.  If set
+         higher than the <varname>max_io_combine_limit</varname> parameter, the
+         smaller value will silently be used instead, so both may need to be raised
+         to increase the I/O size.
          The default is 128kB.
         </para>
        </listitem>
diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c
index 4ad6e236d69..fab8f2c7958 100644
--- a/src/backend/commands/variable.c
+++ b/src/backend/commands/variable.c
@@ -1156,6 +1156,24 @@ assign_maintenance_io_concurrency(int newval, void *extra)
 #endif
 }
 
+/*
+ * GUC assign hooks that recompute io_combine_limit whenever
+ * io_combine_limit_guc and max_io_combine_limit are changed.  These are needed
+ * because the GUC subsystem doesn't support dependencies between GUCs, and
+ * they may be assigned in either order.
+ */
+void
+assign_max_io_combine_limit(int newval, void *extra)
+{
+	max_io_combine_limit = newval;
+	io_combine_limit = Min(max_io_combine_limit, io_combine_limit_guc);
+}
+void
+assign_io_combine_limit(int newval, void *extra)
+{
+	io_combine_limit_guc = newval;
+	io_combine_limit = Min(max_io_combine_limit, io_combine_limit_guc);
+}
 
 /*
  * These show hooks just exist because we want to show the values in octal.
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 7915ed624c1..af97d08b0ae 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -160,9 +160,12 @@ int			maintenance_io_concurrency = DEFAULT_MAINTENANCE_IO_CONCURRENCY;
 /*
  * Limit on how many blocks should be handled in single I/O operations.
  * StartReadBuffers() callers should respect it, as should other operations
- * that call smgr APIs directly.
+ * that call smgr APIs directly.  It is computed as the minimum of underlying
+ * GUCs io_combine_limit_guc and max_io_combine_limit.
  */
 int			io_combine_limit = DEFAULT_IO_COMBINE_LIMIT;
+int			io_combine_limit_guc = DEFAULT_IO_COMBINE_LIMIT;
+int			max_io_combine_limit = DEFAULT_IO_COMBINE_LIMIT;
 
 /*
  * GUC variables about triggering kernel writeback for buffers written; OS
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index ad25cbb39c5..d1cb93562d1 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3240,6 +3240,20 @@ struct config_int ConfigureNamesInt[] =
 		NULL
 	},
 
+	{
+		{"max_io_combine_limit",
+			PGC_POSTMASTER,
+			RESOURCES_IO,
+			gettext_noop("System-wide limit applied to io_combine_limit."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&max_io_combine_limit,
+		DEFAULT_IO_COMBINE_LIMIT,
+		1, MAX_IO_COMBINE_LIMIT,
+		NULL, assign_max_io_combine_limit, NULL
+	},
+
 	{
 		{"io_combine_limit",
 			PGC_USERSET,
@@ -3251,7 +3265,7 @@ struct config_int ConfigureNamesInt[] =
 		&io_combine_limit,
 		DEFAULT_IO_COMBINE_LIMIT,
 		1, MAX_IO_COMBINE_LIMIT,
-		NULL, NULL, NULL
+		NULL, assign_io_combine_limit, NULL
 	},
 
 	{
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 5362ff80519..1d63f9e5f36 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -200,6 +200,8 @@
 #backend_flush_after = 0		# measured in pages, 0 disables
 #effective_io_concurrency = 1		# 1-1000; 0 disables prefetching
 #maintenance_io_concurrency = 10	# 1-1000; 0 disables prefetching
+#max_io_combine_limit = 128kB		# usually 1-32 blocks (depends on OS)
+					# (change requires restart)
 #io_combine_limit = 128kB		# usually 1-32 blocks (depends on OS)
 
 # - Worker Processes -
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 7c1e4316dde..91a2f548c3a 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -163,7 +163,9 @@ extern PGDLLIMPORT int maintenance_io_concurrency;
 
 #define MAX_IO_COMBINE_LIMIT PG_IOV_MAX
 #define DEFAULT_IO_COMBINE_LIMIT Min(MAX_IO_COMBINE_LIMIT, (128 * 1024) / BLCKSZ)
-extern PGDLLIMPORT int io_combine_limit;
+extern PGDLLIMPORT int io_combine_limit;	/* min of the two GUCs below */
+extern PGDLLIMPORT int io_combine_limit_guc;
+extern PGDLLIMPORT int max_io_combine_limit;
 
 extern PGDLLIMPORT int checkpoint_flush_after;
 extern PGDLLIMPORT int backend_flush_after;
diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h
index 951451a9765..8f52af2fcee 100644
--- a/src/include/utils/guc_hooks.h
+++ b/src/include/utils/guc_hooks.h
@@ -82,6 +82,8 @@ extern const char *show_log_timezone(void);
 extern bool check_maintenance_io_concurrency(int *newval, void **extra,
 											 GucSource source);
 extern void assign_maintenance_io_concurrency(int newval, void *extra);
+extern void assign_max_io_combine_limit(int newval, void *extra);
+extern void assign_io_combine_limit(int newval, void *extra);
 extern bool check_max_slot_wal_keep_size(int *newval, void **extra,
 										 GucSource source);
 extern void assign_max_wal_size(int newval, void *extra);
-- 
2.48.1

