From ea72565d754cc7d0e7b89dedd7d97608c73172dd Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Tue, 18 Mar 2025 14:40:06 -0400
Subject: [PATCH v2.10 12/28] aio: Basic read_stream adjustments for real AIO

Adapt the read stream logic for real AIO:
- If AIO is enabled, we shouldn't issue advice, but if it isn't, we should
  continue issuing advice
- AIO benefits from reading ahead with direct IO
- While in read_stream_look_ahead(), we can use AIO batch submission mode for
  increased efficiency

There is one comment talking about max_ios logic with "real asynchronous I/O"
that I am not sure about, so I left it alone for now.

There are further improvements we should consider, e.g. waiting to issue IOs
until we can issue multiple IOs at once. But that's left for a future change,
since it would involve additional heuristics.
---
 src/backend/storage/aio/read_stream.c | 29 ++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c
index cdf4b5a86a2..d7b395b86a3 100644
--- a/src/backend/storage/aio/read_stream.c
+++ b/src/backend/storage/aio/read_stream.c
@@ -72,6 +72,7 @@
 #include "postgres.h"
 
 #include "miscadmin.h"
+#include "storage/aio.h"
 #include "storage/fd.h"
 #include "storage/smgr.h"
 #include "storage/read_stream.h"
@@ -99,6 +100,7 @@ struct ReadStream
 	int16		pinned_buffers;
 	int16		distance;
 	int16		initialized_buffers;
+	bool		sync_mode;
 	bool		advice_enabled;
 	bool		temporary;
 
@@ -416,6 +418,13 @@ read_stream_start_pending_read(ReadStream *stream)
 static void
 read_stream_look_ahead(ReadStream *stream)
 {
+	/*
+	 * Allow amortizing the cost of submitting IO over multiple IOs. This
+	 * requires that we don't do any operations that could lead to a deadlock
+	 * with staged-but-unsubmitted IO.
+	 */
+	pgaio_enter_batchmode();
+
 	while (stream->ios_in_progress < stream->max_ios &&
 		   stream->pinned_buffers + stream->pending_read_nblocks < stream->distance)
 	{
@@ -463,6 +472,7 @@ read_stream_look_ahead(ReadStream *stream)
 			{
 				/* We've hit the buffer or I/O limit.  Rewind and stop here. */
 				read_stream_unget_block(stream, blocknum);
+				pgaio_exit_batchmode();
 				return;
 			}
 		}
@@ -497,6 +507,8 @@ read_stream_look_ahead(ReadStream *stream)
 	 * time.
 	 */
 	Assert(stream->pinned_buffers > 0 || stream->distance == 0);
+
+	pgaio_exit_batchmode();
 }
 
 /*
@@ -628,15 +640,19 @@ read_stream_begin_impl(int flags,
 		stream->per_buffer_data = (void *)
 			MAXALIGN(&stream->ios[Max(1, max_ios)]);
 
+	stream->sync_mode = io_method == IOMETHOD_SYNC;
+
 #ifdef USE_PREFETCH
 
 	/*
-	 * This system supports prefetching advice.  We can use it as long as
-	 * direct I/O isn't enabled, the caller hasn't promised sequential access
-	 * (overriding our detection heuristics), and max_ios hasn't been set to
-	 * zero.
+	 * This system supports prefetching advice.
+	 *
+	 * Issue advice only if AIO is not used, direct I/O isn't enabled, the
+	 * caller hasn't promised sequential access (overriding our detection
+	 * heuristics), and max_ios hasn't been set to zero.
 	 */
-	if ((io_direct_flags & IO_DIRECT_DATA) == 0 &&
+	if (stream->sync_mode &&
+		(io_direct_flags & IO_DIRECT_DATA) == 0 &&
 		(flags & READ_STREAM_SEQUENTIAL) == 0 &&
 		max_ios > 0)
 		stream->advice_enabled = true;
@@ -646,6 +662,9 @@ read_stream_begin_impl(int flags,
 	 * For now, max_ios = 0 is interpreted as max_ios = 1 with advice disabled
 	 * above.  If we had real asynchronous I/O we might need a slightly
 	 * different definition.
+	 *
+	 * FIXME: Not sure what different definition we would need? I guess we
+	 * could add the READ_BUFFERS_SYNCHRONOUSLY flag automatically?
 	 */
 	if (max_ios == 0)
 		max_ios = 1;
-- 
2.48.1.76.g4e746b1a31.dirty

