From d549d3c0bac4a6e609781775e63277b370eb48ff Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Sat, 6 Apr 2024 13:28:28 +1300
Subject: [PATCH] Add READ_STREAM_OUT_OF_ORDER.

Just a proof-of-concept.
---
 src/backend/storage/aio/read_stream.c | 42 +++++++++++++++++++++++----
 src/include/storage/read_stream.h     |  7 +++++
 2 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c
index 2e45dcdcd4b..c1b95a42d39 100644
--- a/src/backend/storage/aio/read_stream.c
+++ b/src/backend/storage/aio/read_stream.c
@@ -115,6 +115,7 @@ struct ReadStream
 	int16		pinned_buffers;
 	int16		distance;
 	bool		advice_enabled;
+	bool		out_of_order_enabled;
 
 	/*
 	 * Small buffer of block numbers, useful for 'ungetting' to resolve flow
@@ -307,12 +308,38 @@ read_stream_start_pending_read(ReadStream *stream, bool suppress_advice)
 				&stream->buffers[stream->queue_size],
 				sizeof(stream->buffers[0]) * overflow);
 
-	/* Compute location of start of next read, without using % operator. */
-	buffer_index += nblocks;
-	if (buffer_index >= stream->queue_size)
-		buffer_index -= stream->queue_size;
-	Assert(buffer_index >= 0 && buffer_index < stream->queue_size);
-	stream->next_buffer_index = buffer_index;
+	/*
+	 * If this was a hit and there is already I/O in progress, can we
+	 * re-prioritize it so that I/O has longer to complete and the caller can
+	 * consume this buffer immediately?
+	 */
+	if (!need_wait &&
+		nblocks == 1 &&
+		stream->ios_in_progress > 0 &&
+		stream->out_of_order_enabled)
+	{
+		int16		oldest_buffer_index;
+
+		/* Insert before "oldest" buffer. */
+		oldest_buffer_index = stream->oldest_buffer_index - 1;
+		if (oldest_buffer_index < 0)
+			oldest_buffer_index += stream->queue_size;
+		stream->buffers[oldest_buffer_index] = stream->buffers[buffer_index];
+		if (stream->per_buffer_data_size > 0)
+			memcpy(get_per_buffer_data(stream, oldest_buffer_index),
+				   get_per_buffer_data(stream, buffer_index),
+				   stream->per_buffer_data_size);
+		stream->oldest_buffer_index = oldest_buffer_index;
+	}
+	else
+	{
+		/* Compute location of start of next read, without using % operator. */
+		buffer_index += nblocks;
+		if (buffer_index >= stream->queue_size)
+			buffer_index -= stream->queue_size;
+		Assert(buffer_index >= 0 && buffer_index < stream->queue_size);
+		stream->next_buffer_index = buffer_index;
+	}
 
 	/* Adjust the pending read to cover the remaining portion, if any. */
 	stream->pending_read_blocknum += nblocks;
@@ -515,6 +542,9 @@ read_stream_begin_relation(int flags,
 		stream->advice_enabled = true;
 #endif
 
+	if (flags & READ_STREAM_OUT_OF_ORDER)
+		stream->out_of_order_enabled = true;
+
 	/*
 	 * For now, max_ios = 0 is interpreted as max_ios = 1 with advice disabled
 	 * above.  If we had real asynchronous I/O we might need a slightly
diff --git a/src/include/storage/read_stream.h b/src/include/storage/read_stream.h
index fae09d2b4cc..d23d51af00c 100644
--- a/src/include/storage/read_stream.h
+++ b/src/include/storage/read_stream.h
@@ -41,6 +41,13 @@
  */
 #define READ_STREAM_FULL 0x04
 
+/*
+ * We usually stream buffers in the order the callback generates block
+ * numbers, but if the caller can cope with it, there are sometimes
+ * opportunities to reorder blocks to reduce I/O stalls.
+ */
+#define READ_STREAM_OUT_OF_ORDER 0x08
+
 struct ReadStream;
 typedef struct ReadStream ReadStream;
 
-- 
2.44.0

