From 5e84720afa46fdfd892a8bac36585f0f7a29d3f3 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Wed, 22 Jan 2025 13:43:40 -0500
Subject: [PATCH v2.3 12/30] aio: Skeleton IO worker infrastructure

This doesn't do anything useful on its own, but the code that needs to be
touched is independent of other changes.

Remarks:
- should completely get rid of ID assignment logic in postmaster.c
- postmaster.c badly needs a refactoring.
- dynamic increase / decrease of workers based on IO load

Author:
Reviewed-by:
Discussion: https://postgr.es/m/
Backpatch:
---
 src/include/miscadmin.h                       |   2 +
 src/include/postmaster/postmaster.h           |   1 +
 src/include/storage/aio_init.h                |   2 +
 src/include/storage/io_worker.h               |  22 +++
 src/include/storage/proc.h                    |   4 +-
 src/backend/postmaster/launch_backend.c       |   2 +
 src/backend/postmaster/pmchild.c              |   1 +
 src/backend/postmaster/postmaster.c           | 169 ++++++++++++++++--
 src/backend/storage/aio/Makefile              |   1 +
 src/backend/storage/aio/aio_init.c            |   7 +
 src/backend/storage/aio/meson.build           |   1 +
 src/backend/storage/aio/method_worker.c       |  86 +++++++++
 src/backend/tcop/postgres.c                   |   2 +
 src/backend/utils/activity/pgstat_backend.c   |   1 +
 src/backend/utils/activity/pgstat_io.c        |   1 +
 .../utils/activity/wait_event_names.txt       |   1 +
 src/backend/utils/init/miscinit.c             |   3 +
 src/backend/utils/misc/guc_tables.c           |  13 ++
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 19 files changed, 305 insertions(+), 15 deletions(-)
 create mode 100644 src/include/storage/io_worker.h
 create mode 100644 src/backend/storage/aio/method_worker.c

diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index d016a9c9248..c2b3e27c613 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -360,6 +360,7 @@ typedef enum BackendType
 	B_ARCHIVER,
 	B_BG_WRITER,
 	B_CHECKPOINTER,
+	B_IO_WORKER,
 	B_STARTUP,
 	B_WAL_RECEIVER,
 	B_WAL_SUMMARIZER,
@@ -389,6 +390,7 @@ extern PGDLLIMPORT BackendType MyBackendType;
 #define AmWalReceiverProcess()		(MyBackendType == B_WAL_RECEIVER)
 #define AmWalSummarizerProcess()	(MyBackendType == B_WAL_SUMMARIZER)
 #define AmWalWriterProcess()		(MyBackendType == B_WAL_WRITER)
+#define AmIoWorkerProcess()			(MyBackendType == B_IO_WORKER)
 
 #define AmSpecialWorkerProcess() \
 	(AmAutoVacuumLauncherProcess() || \
diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h
index 188a06e2379..253dc98c50e 100644
--- a/src/include/postmaster/postmaster.h
+++ b/src/include/postmaster/postmaster.h
@@ -98,6 +98,7 @@ extern void InitProcessGlobals(void);
 extern int	MaxLivePostmasterChildren(void);
 
 extern bool PostmasterMarkPIDForWorkerNotify(int);
+extern void assign_io_workers(int newval, void *extra);
 
 #ifdef WIN32
 extern void pgwin32_register_deadchild_callback(HANDLE procHandle, DWORD procId);
diff --git a/src/include/storage/aio_init.h b/src/include/storage/aio_init.h
index 44151ef55bf..bc15b720fca 100644
--- a/src/include/storage/aio_init.h
+++ b/src/include/storage/aio_init.h
@@ -21,4 +21,6 @@ extern void AioShmemInit(void);
 
 extern void pgaio_init_backend(void);
 
+extern bool pgaio_workers_enabled(void);
+
 #endif							/* AIO_INIT_H */
diff --git a/src/include/storage/io_worker.h b/src/include/storage/io_worker.h
new file mode 100644
index 00000000000..223d614dc4a
--- /dev/null
+++ b/src/include/storage/io_worker.h
@@ -0,0 +1,22 @@
+/*-------------------------------------------------------------------------
+ *
+ * io_worker.h
+ *    IO worker for implementing AIO "ourselves"
+ *
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/io.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef IO_WORKER_H
+#define IO_WORKER_H
+
+
+extern void IoWorkerMain(char *startup_data, size_t startup_data_len) pg_attribute_noreturn();
+
+extern int	io_workers;
+
+#endif							/* IO_WORKER_H */
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 20777f7d5ae..64e9b8ff8c5 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -448,7 +448,9 @@ extern PGDLLIMPORT PGPROC *PreparedXactProcs;
  * 2 slots, but WAL writer is launched only after startup has exited, so we
  * only need 6 slots.
  */
-#define NUM_AUXILIARY_PROCS		6
+#define MAX_IO_WORKERS          32
+#define NUM_AUXILIARY_PROCS		(6 + MAX_IO_WORKERS)
+
 
 /* configurable options */
 extern PGDLLIMPORT int DeadlockTimeout;
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index a97a1eda6da..54b4c22bd63 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -48,6 +48,7 @@
 #include "replication/slotsync.h"
 #include "replication/walreceiver.h"
 #include "storage/dsm.h"
+#include "storage/io_worker.h"
 #include "storage/pg_shmem.h"
 #include "tcop/backend_startup.h"
 #include "utils/memutils.h"
@@ -197,6 +198,7 @@ static child_process_kind child_process_kinds[] = {
 	[B_ARCHIVER] = {"archiver", PgArchiverMain, true},
 	[B_BG_WRITER] = {"bgwriter", BackgroundWriterMain, true},
 	[B_CHECKPOINTER] = {"checkpointer", CheckpointerMain, true},
+	[B_IO_WORKER] = {"io_worker", IoWorkerMain, true},
 	[B_STARTUP] = {"startup", StartupProcessMain, true},
 	[B_WAL_RECEIVER] = {"wal_receiver", WalReceiverMain, true},
 	[B_WAL_SUMMARIZER] = {"wal_summarizer", WalSummarizerMain, true},
diff --git a/src/backend/postmaster/pmchild.c b/src/backend/postmaster/pmchild.c
index 0d473226c3a..cde1d23a4ca 100644
--- a/src/backend/postmaster/pmchild.c
+++ b/src/backend/postmaster/pmchild.c
@@ -101,6 +101,7 @@ InitPostmasterChildSlots(void)
 
 	pmchild_pools[B_AUTOVAC_WORKER].size = autovacuum_worker_slots;
 	pmchild_pools[B_BG_WORKER].size = max_worker_processes;
+	pmchild_pools[B_IO_WORKER].size = MAX_IO_WORKERS;
 
 	/*
 	 * There can be only one of each of these running at a time.  They each
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 115ad3d31d2..ddd82b94720 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -108,9 +108,12 @@
 #include "replication/logicallauncher.h"
 #include "replication/slotsync.h"
 #include "replication/walsender.h"
+#include "storage/aio_init.h"
 #include "storage/fd.h"
+#include "storage/io_worker.h"
 #include "storage/ipc.h"
 #include "storage/pmsignal.h"
+#include "storage/proc.h"
 #include "tcop/backend_startup.h"
 #include "tcop/tcopprot.h"
 #include "utils/datetime.h"
@@ -334,6 +337,7 @@ typedef enum
 								 * ckpt */
 	PM_WAIT_XLOG_ARCHIVAL,		/* waiting for archiver and walsenders to
 								 * finish */
+	PM_WAIT_IO_WORKERS,			/* waiting for io workers to exit */
 	PM_WAIT_CHECKPOINTER,		/* waiting for checkpointer to shut down */
 	PM_WAIT_DEAD_END,			/* waiting for dead-end children to exit */
 	PM_NO_CHILDREN,				/* all important children have exited */
@@ -396,6 +400,10 @@ bool		LoadedSSL = false;
 static DNSServiceRef bonjour_sdref = NULL;
 #endif
 
+/* State for IO worker management. */
+static int	io_worker_count = 0;
+static PMChild *io_worker_children[MAX_IO_WORKERS];
+
 /*
  * postmaster.c - function prototypes
  */
@@ -430,6 +438,8 @@ static void TerminateChildren(int signal);
 static int	CountChildren(BackendTypeMask targetMask);
 static void LaunchMissingBackgroundProcesses(void);
 static void maybe_start_bgworkers(void);
+static bool maybe_reap_io_worker(int pid);
+static void maybe_adjust_io_workers(void);
 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
 static PMChild *StartChildProcess(BackendType type);
 static void StartSysLogger(void);
@@ -1357,6 +1367,11 @@ PostmasterMain(int argc, char *argv[])
 	 */
 	AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING);
 
+	UpdatePMState(PM_STARTUP);
+
+	/* Make sure we can perform I/O while starting up. */
+	maybe_adjust_io_workers();
+
 	/* Start bgwriter and checkpointer so they can help with recovery */
 	if (CheckpointerPMChild == NULL)
 		CheckpointerPMChild = StartChildProcess(B_CHECKPOINTER);
@@ -1369,7 +1384,6 @@ PostmasterMain(int argc, char *argv[])
 	StartupPMChild = StartChildProcess(B_STARTUP);
 	Assert(StartupPMChild != NULL);
 	StartupStatus = STARTUP_RUNNING;
-	UpdatePMState(PM_STARTUP);
 
 	/* Some workers may be scheduled to start now */
 	maybe_start_bgworkers();
@@ -2493,6 +2507,16 @@ process_pm_child_exit(void)
 			continue;
 		}
 
+		/* Was it an IO worker? */
+		if (maybe_reap_io_worker(pid))
+		{
+			if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
+				HandleChildCrash(pid, exitstatus, _("io worker"));
+
+			maybe_adjust_io_workers();
+			continue;
+		}
+
 		/*
 		 * Was it a backend or a background worker?
 		 */
@@ -2704,6 +2728,7 @@ HandleFatalError(QuitSignalReason reason, bool consider_sigabrt)
 		case PM_WAIT_XLOG_SHUTDOWN:
 		case PM_WAIT_XLOG_ARCHIVAL:
 		case PM_WAIT_CHECKPOINTER:
+		case PM_WAIT_IO_WORKERS:
 
 			/*
 			 * Note that we switch *back* to PM_WAIT_BACKENDS here. This way
@@ -2892,20 +2917,21 @@ PostmasterStateMachine(void)
 
 		/*
 		 * If we are doing crash recovery or an immediate shutdown then we
-		 * expect archiver, checkpointer and walsender to exit as well,
-		 * otherwise not.
+		 * expect archiver, checkpointer, io workers and walsender to exit as
+		 * well, otherwise not.
 		 */
 		if (FatalError || Shutdown >= ImmediateShutdown)
 			targetMask = btmask_add(targetMask,
 									B_CHECKPOINTER,
 									B_ARCHIVER,
+									B_IO_WORKER,
 									B_WAL_SENDER);
 
 		/*
-		 * Normally walsenders and archiver will continue running; they will
-		 * be terminated later after writing the checkpoint record.  We also
-		 * let dead-end children to keep running for now.  The syslogger
-		 * process exits last.
+		 * Normally archiver, checkpointer, IO workers and walsenders will
+		 * continue running; they will be terminated later after writing the
+		 * checkpoint record.  We also let dead-end children to keep running
+		 * for now.  The syslogger process exits last.
 		 *
 		 * This assertion checks that we have covered all backend types,
 		 * either by including them in targetMask, or by noting here that they
@@ -2920,12 +2946,13 @@ PostmasterStateMachine(void)
 									B_LOGGER);
 
 			/*
-			 * Archiver, checkpointer and walsender may or may not be in
-			 * targetMask already.
+			 * Archiver, checkpointer, IO workers, and walsender may or may
+			 * not be in targetMask already.
 			 */
 			remainMask = btmask_add(remainMask,
 									B_ARCHIVER,
 									B_CHECKPOINTER,
+									B_IO_WORKER,
 									B_WAL_SENDER);
 
 			/* these are not real postmaster children */
@@ -3020,11 +3047,25 @@ PostmasterStateMachine(void)
 	{
 		/*
 		 * PM_WAIT_XLOG_ARCHIVAL state ends when there's no children other
-		 * than checkpointer and dead-end children left. There shouldn't be
-		 * any regular backends left by now anyway; what we're really waiting
-		 * for is for walsenders and archiver to exit.
+		 * than checkpointer, io workers and dead-end children left. There
+		 * shouldn't be any regular backends left by now anyway; what we're
+		 * really waiting for is for walsenders and archiver to exit.
 		 */
-		if (CountChildren(btmask_all_except(B_CHECKPOINTER, B_LOGGER, B_DEAD_END_BACKEND)) == 0)
+		if (CountChildren(btmask_all_except(B_CHECKPOINTER, B_IO_WORKER,
+											B_LOGGER, B_DEAD_END_BACKEND)) == 0)
+		{
+			UpdatePMState(PM_WAIT_IO_WORKERS);
+			SignalChildren(SIGUSR2, btmask(B_IO_WORKER));
+		}
+	}
+
+	if (pmState == PM_WAIT_IO_WORKERS)
+	{
+		/*
+		 * PM_WAIT_IO_WORKERS state ends when there's only checkpointer and
+		 * dead_end children left.
+		 */
+		if (io_worker_count == 0)
 		{
 			UpdatePMState(PM_WAIT_CHECKPOINTER);
 
@@ -3151,10 +3192,14 @@ PostmasterStateMachine(void)
 		/* re-create shared memory and semaphores */
 		CreateSharedMemoryAndSemaphores();
 
+		UpdatePMState(PM_STARTUP);
+
+		/* Make sure we can perform I/O while starting up. */
+		maybe_adjust_io_workers();
+
 		StartupPMChild = StartChildProcess(B_STARTUP);
 		Assert(StartupPMChild != NULL);
 		StartupStatus = STARTUP_RUNNING;
-		UpdatePMState(PM_STARTUP);
 		/* crash recovery started, reset SIGKILL flag */
 		AbortStartTime = 0;
 
@@ -3178,6 +3223,7 @@ pmstate_name(PMState state)
 			PM_TOSTR_CASE(PM_WAIT_BACKENDS);
 			PM_TOSTR_CASE(PM_WAIT_XLOG_SHUTDOWN);
 			PM_TOSTR_CASE(PM_WAIT_XLOG_ARCHIVAL);
+			PM_TOSTR_CASE(PM_WAIT_IO_WORKERS);
 			PM_TOSTR_CASE(PM_WAIT_DEAD_END);
 			PM_TOSTR_CASE(PM_WAIT_CHECKPOINTER);
 			PM_TOSTR_CASE(PM_NO_CHILDREN);
@@ -4093,6 +4139,7 @@ bgworker_should_start_now(BgWorkerStartTime start_time)
 		case PM_WAIT_DEAD_END:
 		case PM_WAIT_XLOG_ARCHIVAL:
 		case PM_WAIT_XLOG_SHUTDOWN:
+		case PM_WAIT_IO_WORKERS:
 		case PM_WAIT_BACKENDS:
 		case PM_STOP_BACKENDS:
 			break;
@@ -4243,6 +4290,100 @@ maybe_start_bgworkers(void)
 	}
 }
 
+static bool
+maybe_reap_io_worker(int pid)
+{
+	for (int id = 0; id < MAX_IO_WORKERS; ++id)
+	{
+		if (io_worker_children[id] &&
+			io_worker_children[id]->pid == pid)
+		{
+			ReleasePostmasterChildSlot(io_worker_children[id]);
+
+			--io_worker_count;
+			io_worker_children[id] = NULL;
+			return true;
+		}
+	}
+	return false;
+}
+
+static void
+maybe_adjust_io_workers(void)
+{
+	if (!pgaio_workers_enabled())
+		return;
+
+	/*
+	 * If we're in final shutting down state, then we're just waiting for all
+	 * processes to exit.
+	 */
+	if (pmState >= PM_WAIT_IO_WORKERS)
+		return;
+
+	/* Don't start new workers during an immediate shutdown either. */
+	if (Shutdown >= ImmediateShutdown)
+		return;
+
+	/*
+	 * Don't start new workers if we're in the shutdown phase of a crash
+	 * restart. But we *do* need to start if we're already starting up again.
+	 */
+	if (FatalError && pmState >= PM_STOP_BACKENDS)
+		return;
+
+	Assert(pmState < PM_WAIT_IO_WORKERS);
+
+	/* Not enough running? */
+	while (io_worker_count < io_workers)
+	{
+		PMChild    *child;
+		int			id;
+
+		/* find unused entry in io_worker_children array */
+		for (id = 0; id < MAX_IO_WORKERS; ++id)
+		{
+			if (io_worker_children[id] == NULL)
+				break;
+		}
+		if (id == MAX_IO_WORKERS)
+			elog(ERROR, "could not find a free IO worker ID");
+
+		/* Try to launch one. */
+		child = StartChildProcess(B_IO_WORKER);
+		if (child != NULL)
+		{
+			io_worker_children[id] = child;
+			++io_worker_count;
+		}
+		else
+			break;				/* XXX try again soon? */
+	}
+
+	/* Too many running? */
+	if (io_worker_count > io_workers)
+	{
+		/* ask the IO worker in the highest slot to exit */
+		for (int id = MAX_IO_WORKERS - 1; id >= 0; --id)
+		{
+			if (io_worker_children[id] != NULL)
+			{
+				kill(io_worker_children[id]->pid, SIGUSR2);
+				break;
+			}
+		}
+	}
+}
+
+void
+assign_io_workers(int newval, void *extra)
+{
+	io_workers = newval;
+	if (!IsUnderPostmaster && pmState > PM_INIT)
+		maybe_adjust_io_workers();
+}
+
+
 /*
  * When a backend asks to be notified about worker state changes, we
  * set a flag in its backend entry.  The background worker machinery needs
diff --git a/src/backend/storage/aio/Makefile b/src/backend/storage/aio/Makefile
index 89f821ea7e1..f51c34a37f8 100644
--- a/src/backend/storage/aio/Makefile
+++ b/src/backend/storage/aio/Makefile
@@ -15,6 +15,7 @@ OBJS = \
 	aio_io.o \
 	aio_target.o \
 	method_sync.o \
+	method_worker.o \
 	read_stream.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/storage/aio/aio_init.c b/src/backend/storage/aio/aio_init.c
index 0e98cc0c8fb..233c144965b 100644
--- a/src/backend/storage/aio/aio_init.c
+++ b/src/backend/storage/aio/aio_init.c
@@ -221,3 +221,10 @@ pgaio_init_backend(void)
 
 	before_shmem_exit(pgaio_shutdown, 0);
 }
+
+bool
+pgaio_workers_enabled(void)
+{
+	/* placeholder for future commit */
+	return false;
+}
diff --git a/src/backend/storage/aio/meson.build b/src/backend/storage/aio/meson.build
index 2c26089d52e..74f94c6e40b 100644
--- a/src/backend/storage/aio/meson.build
+++ b/src/backend/storage/aio/meson.build
@@ -7,5 +7,6 @@ backend_sources += files(
   'aio_io.c',
   'aio_target.c',
   'method_sync.c',
+  'method_worker.c',
   'read_stream.c',
 )
diff --git a/src/backend/storage/aio/method_worker.c b/src/backend/storage/aio/method_worker.c
new file mode 100644
index 00000000000..1d79e7e85ef
--- /dev/null
+++ b/src/backend/storage/aio/method_worker.c
@@ -0,0 +1,86 @@
+/*-------------------------------------------------------------------------
+ *
+ * method_worker.c
+ *    AIO implementation using workers
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/storage/aio/method_worker.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "libpq/pqsignal.h"
+#include "miscadmin.h"
+#include "postmaster/auxprocess.h"
+#include "postmaster/interrupt.h"
+#include "storage/io_worker.h"
+#include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/proc.h"
+#include "tcop/tcopprot.h"
+#include "utils/wait_event.h"
+
+
+int			io_workers = 3;
+
+
+void
+IoWorkerMain(char *startup_data, size_t startup_data_len)
+{
+	sigjmp_buf	local_sigjmp_buf;
+
+	MyBackendType = B_IO_WORKER;
+	AuxiliaryProcessMainCommon();
+
+	/* TODO review all signals */
+	pqsignal(SIGHUP, SignalHandlerForConfigReload);
+	pqsignal(SIGINT, die);		/* to allow manually triggering worker restart */
+
+	/*
+	 * Ignore SIGTERM, will get explicit shutdown via SIGUSR2 later in the
+	 * shutdown sequence, similar to checkpointer.
+	 */
+	pqsignal(SIGTERM, SIG_IGN);
+	/* SIGQUIT handler was already set up by InitPostmasterChild */
+	pqsignal(SIGALRM, SIG_IGN);
+	pqsignal(SIGPIPE, SIG_IGN);
+	pqsignal(SIGUSR1, procsignal_sigusr1_handler);
+	pqsignal(SIGUSR2, SignalHandlerForShutdownRequest);
+	sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
+
+	/* see PostgresMain() */
+	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	{
+		error_context_stack = NULL;
+		HOLD_INTERRUPTS();
+
+		/*
+		 * We normally shouldn't get errors here. Need to do just enough error
+		 * recovery so that we can mark the IO as failed and then exit.
+		 */
+		LWLockReleaseAll();
+
+		/* TODO: recover from IO errors */
+
+		EmitErrorReport();
+		proc_exit(1);
+	}
+
+	/* We can now handle ereport(ERROR) */
+	PG_exception_stack = &local_sigjmp_buf;
+
+	while (!ShutdownRequestPending)
+	{
+		WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, -1,
+				  WAIT_EVENT_IO_WORKER_MAIN);
+		ResetLatch(MyLatch);
+		CHECK_FOR_INTERRUPTS();
+	}
+
+	proc_exit(0);
+}
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 5655348a2e2..605c8950043 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3313,6 +3313,8 @@ ProcessInterrupts(void)
 					(errcode(ERRCODE_ADMIN_SHUTDOWN),
 					 errmsg("terminating background worker \"%s\" due to administrator command",
 							MyBgworkerEntry->bgw_type)));
+		else if (AmIoWorkerProcess())
+			proc_exit(0);
 		else
 			ereport(FATAL,
 					(errcode(ERRCODE_ADMIN_SHUTDOWN),
diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c
index bcf9e4b1487..b2151ab4ca3 100644
--- a/src/backend/utils/activity/pgstat_backend.c
+++ b/src/backend/utils/activity/pgstat_backend.c
@@ -241,6 +241,7 @@ pgstat_tracks_backend_bktype(BackendType bktype)
 		case B_WAL_SUMMARIZER:
 		case B_BG_WRITER:
 		case B_CHECKPOINTER:
+		case B_IO_WORKER:
 		case B_STARTUP:
 			return false;
 
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
index 6ff5d9e96a1..70518749142 100644
--- a/src/backend/utils/activity/pgstat_io.c
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -365,6 +365,7 @@ pgstat_tracks_io_bktype(BackendType bktype)
 		case B_BG_WORKER:
 		case B_BG_WRITER:
 		case B_CHECKPOINTER:
+		case B_IO_WORKER:
 		case B_SLOTSYNC_WORKER:
 		case B_STANDALONE_BACKEND:
 		case B_STARTUP:
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index b5d3dcbf1e9..e702aa7152a 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -57,6 +57,7 @@ BGWRITER_HIBERNATE	"Waiting in background writer process, hibernating."
 BGWRITER_MAIN	"Waiting in main loop of background writer process."
 CHECKPOINTER_MAIN	"Waiting in main loop of checkpointer process."
 CHECKPOINTER_SHUTDOWN	"Waiting for checkpointer process to be terminated."
+IO_WORKER_MAIN	"Waiting in main loop of IO Worker process."
 LOGICAL_APPLY_MAIN	"Waiting in main loop of logical replication apply process."
 LOGICAL_LAUNCHER_MAIN	"Waiting in main loop of logical replication launcher process."
 LOGICAL_PARALLEL_APPLY_MAIN	"Waiting in main loop of logical replication parallel apply process."
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index 0347fc11092..cbca090d2b0 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -293,6 +293,9 @@ GetBackendTypeDesc(BackendType backendType)
 		case B_CHECKPOINTER:
 			backendDesc = gettext_noop("checkpointer");
 			break;
+		case B_IO_WORKER:
+			backendDesc = "io worker";
+			break;
 		case B_LOGGER:
 			backendDesc = gettext_noop("logger");
 			break;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index de524eccad5..8a83dcc820d 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -74,6 +74,7 @@
 #include "storage/aio.h"
 #include "storage/bufmgr.h"
 #include "storage/bufpage.h"
+#include "storage/io_worker.h"
 #include "storage/large_object.h"
 #include "storage/pg_shmem.h"
 #include "storage/predicate.h"
@@ -3233,6 +3234,18 @@ struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"io_workers",
+			PGC_SIGHUP,
+			RESOURCES_ASYNCHRONOUS,
+			gettext_noop("Number of IO worker processes, for io_method=worker."),
+			NULL,
+		},
+		&io_workers,
+		3, 1, MAX_IO_WORKERS,
+		NULL, assign_io_workers, NULL
+	},
+
 	{
 		{"backend_flush_after", PGC_USERSET, RESOURCES_ASYNCHRONOUS,
 			gettext_noop("Number of pages after which previously performed writes are flushed to disk."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index fba0ad4b624..e68e112c72f 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -848,6 +848,7 @@ autovacuum_worker_slots = 16	# autovacuum worker slots to allocate
 #------------------------------------------------------------------------------
 
 #io_method = sync			# (change requires restart)
+#io_workers = 3				# 1-32;
 
 #io_max_concurrency = 32		# Max number of IOs that may be in
 					# flight at the same time in one backend
-- 
2.48.1.76.g4e746b1a31.dirty

