From 74cff50281b53098af676c38e65b1c8fce2ef0bd Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Wed, 25 Mar 2026 14:42:39 -0400
Subject: [PATCH v8 2/9] test_aio: Add basic tests for StartReadBuffers()

Upcoming commits will change StartReadBuffers() and its building blocks,
making it worthwhile to have tests explicitly of StartReadBuffers().

Discussion: https://postgr.es/m/zljergweqti7x67lg5ije2rzjusie37nslsnkjkkby4laqqbfw@3p3zu522yykv
---
 src/test/modules/test_aio/t/001_aio.pl      | 239 ++++++++++++++++++++
 src/test/modules/test_aio/test_aio--1.0.sql |   7 +
 src/test/modules/test_aio/test_aio.c        | 177 ++++++++++++++-
 3 files changed, 415 insertions(+), 8 deletions(-)

diff --git a/src/test/modules/test_aio/t/001_aio.pl b/src/test/modules/test_aio/t/001_aio.pl
index e18b2a2b8ae..bde92eeb7d4 100644
--- a/src/test/modules/test_aio/t/001_aio.pl
+++ b/src/test/modules/test_aio/t/001_aio.pl
@@ -1420,6 +1420,244 @@ SELECT read_rel_block_ll('tbl_cs_fail', 3, nblocks=>1, zero_on_error=>true);),
 }
 
 
+# Tests for StartReadBuffers()
+sub test_read_buffers
+{
+	my $io_method = shift;
+	my $node = shift;
+	my ($ret, $output);
+	my $table;
+	my $persistency;
+
+	my $psql_a = $node->background_psql('postgres', on_error_stop => 0);
+	my $psql_b = $node->background_psql('postgres', on_error_stop => 0);
+
+	$psql_a->query_safe(
+		qq(
+CREATE TEMPORARY TABLE tmp_ok(data int not null);
+INSERT INTO tmp_ok SELECT generate_series(1, 5000);
+));
+
+	foreach $persistency (qw(normal temporary))
+	{
+		$table = $persistency eq 'normal' ? 'tbl_ok' : 'tmp_ok';
+
+		# check that one larger read is done as multiple reads
+		$psql_a->query_safe(qq|SELECT evict_rel('$table')|);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, combine, block 0-1",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 0, 2)|,
+			qr/^0\|0\|t\|2$/,
+			qr/^$/);
+
+		# but if we do it again, i.e. it's in s_b, there will be two
+		# operations
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, doesn't combine hits, block 0-1",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 0, 2)|,
+			qr/^0\|0\|f\|1\n1\|1\|f\|1$/,
+			qr/^$/);
+
+		# Check that a larger read interrupted by a hit works
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, prep, block 3",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 3, 1)|,
+			qr/^0\|3\|t\|1$/,
+			qr/^$/);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, interrupted by hit on 3, block 2-5",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 2, 4)|,
+			qr/^0\|2\|t\|1\n1\|3\|f\|1\n2\|4\|t\|2$/,
+			qr/^$/);
+
+
+		# Verify that a read with an initial buffer hit works
+		$psql_a->query_safe(qq|SELECT evict_rel('$table')|);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, miss, block 0",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 0, 1)|,
+			qr/^0\|0\|t\|1$/,
+			qr/^$/);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, hit, block 0",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 0, 1)|,
+			qr/^0\|0\|f\|1$/,
+			qr/^$/);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, miss, block 1",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 1, 1)|,
+			qr/^0\|1\|t\|1$/,
+			qr/^$/);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, hit, block 1",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 1, 1)|,
+			qr/^0\|1\|f\|1$/,
+			qr/^$/);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, hit, block 0-1",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 0, 2)|,
+			qr/^0\|0\|f\|1\n1\|1\|f\|1$/,
+			qr/^$/);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, hit 0-1, miss 2",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 0, 3)|,
+			qr/^0\|0\|f\|1\n1\|1\|f\|1\n2\|2\|t\|1$/,
+			qr/^$/);
+
+		# Verify that a read with a initial miss and trailing buffer hit(s) works
+		$psql_a->query_safe(qq|SELECT invalidate_rel_block('$table', 0)|);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, miss 0, hit 1-2",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 0, 3)|,
+			qr/^0\|0\|t\|1\n1\|1\|f\|1\n2\|2\|f\|1$/,
+			qr/^$/);
+		$psql_a->query_safe(qq|SELECT invalidate_rel_block('$table', 1)|);
+		$psql_a->query_safe(qq|SELECT invalidate_rel_block('$table', 2)|);
+		$psql_a->query_safe(qq|SELECT * FROM read_buffers('$table', 3, 2)|);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, miss 1-2, hit 3-4",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 1, 4)|,
+			qr/^0\|1\|t\|2\n2\|3\|f\|1\n3\|4\|f\|1$/,
+			qr/^$/);
+
+		# Verify that we aren't doing reads larger than io_combine_limit
+		$psql_a->query_safe(qq|SELECT evict_rel('$table')|);
+		$psql_a->query_safe(qq|SET io_combine_limit=3|);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, io_combine_limit has effect",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 1, 5)|,
+			qr/^0\|1\|t\|3\n3\|4\|t\|2$/,
+			qr/^$/);
+		$psql_a->query_safe(qq|RESET io_combine_limit|);
+
+
+		$psql_a->query_safe(qq|SELECT invalidate_rel_block('$table', 1)|);
+		$psql_a->query_safe(qq|SELECT invalidate_rel_block('$table', 2)|);
+		$psql_a->query_safe(qq|SELECT * FROM read_buffers('$table', 3, 2)|);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, miss 1-2, hit 3-4",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 1, 4)|,
+			qr/^0\|1\|t\|2\n2\|3\|f\|1\n3\|4\|f\|1$/,
+			qr/^$/);
+
+		# Test encountering buffer IO we started, have to accept that IO may
+		# or may not have completed though.
+		$psql_a->query_safe(qq|SELECT evict_rel('$table')|);
+		$psql_a->query_safe(
+			qq|SELECT read_rel_block_ll('$table', 1, wait_complete=>false)|);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, in-progress 1, read 1-3",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 1, 3)|,
+			qr/^0\|1\|(t|f)\|1\n1\|2\|t\|2$/,
+			qr/^$/);
+
+		$psql_a->query_safe(qq|SELECT evict_rel('$table')|);
+		$psql_a->query_safe(
+			qq|SELECT read_rel_block_ll('$table', 2, wait_complete=>false)|);
+		psql_like(
+			$io_method,
+			$psql_a,
+			"$persistency: read buffers, in-progress 2, read 1-3",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 1, 3)|,
+			qr/^0\|1\|t\|1\n1\|2\|f\|1\n2\|3\|t\|1$/,
+			qr/^$/);
+	}
+
+	# The remaining tests don't make sense for temp tables, as they are
+	# concerned with multiple sessions interacting with each other.
+	$table = 'tbl_ok';
+	$persistency = 'normal';
+
+	# Test start buffer IO will split IO if there's IO in progress. We can't
+	# observe this with sync, as that does not start the IO operation in
+	# StartReadBuffers().
+	if ($io_method ne 'sync')
+	{
+		$psql_a->query_safe(qq|SELECT evict_rel('$table')|);
+
+		my $buf_id =
+		  $psql_b->query_safe(qq|SELECT buffer_create_toy('tbl_ok', 3)|);
+		$psql_b->query_safe(
+			qq|SELECT buffer_call_start_io($buf_id, for_input=>true, nowait=>false)|
+		);
+
+		query_wait_block(
+			$io_method,
+			$node,
+			$psql_a,
+			"$persistency: read buffers blocks waiting for concurrent IO",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 1, 5);\n|,
+			"BufferIo");
+		$psql_b->query_safe(
+			qq|SELECT buffer_call_terminate_io($buf_id, for_input=>true, succeed=>false, io_error=>false, release_aio=>false)|
+		);
+		pump_until(
+			$psql_a->{run}, $psql_a->{timeout},
+			\$psql_a->{stdout}, qr/0\|1\|t\|2\n2\|3\|t\|3/);
+		ok(1,
+			"$io_method: $persistency: IO was split due to concurrent failed IO"
+		);
+
+		# Same as before, except the concurrent IO succeeds this time
+		$psql_a->query_safe(qq|SELECT evict_rel('$table')|);
+		$buf_id =
+		  $psql_b->query_safe(qq|SELECT buffer_create_toy('tbl_ok', 3)|);
+		$psql_b->query_safe(
+			qq|SELECT buffer_call_start_io($buf_id, for_input=>true, nowait=>false)|
+		);
+
+		query_wait_block(
+			$io_method,
+			$node,
+			$psql_a,
+			"$persistency: read buffers blocks waiting for concurrent IO",
+			qq|SELECT blockoff, blocknum, needs_io, nblocks FROM read_buffers('$table', 1, 5);\n|,
+			"BufferIo");
+		$psql_b->query_safe(
+			qq|SELECT buffer_call_terminate_io($buf_id, for_input=>true, succeed=>true, io_error=>false, release_aio=>false)|
+		);
+		pump_until($psql_a->{run}, $psql_a->{timeout}, \$psql_a->{stdout},
+			qr/0\|1\|t\|2\n2\|3\|f\|1\n3\|4\|t\|2/);
+		ok(1,
+			"$io_method: $persistency: IO was split due to concurrent successful IO"
+		);
+	}
+
+	$psql_a->quit();
+	$psql_b->quit();
+}
+
+
 # Run all tests that for the specified node / io_method
 sub test_io_method
 {
@@ -1455,6 +1693,7 @@ CHECKPOINT;
 	test_checksum($io_method, $node);
 	test_ignore_checksum($io_method, $node);
 	test_checksum_createdb($io_method, $node);
+	test_read_buffers($io_method, $node);
 
 	# generic injection tests
   SKIP:
diff --git a/src/test/modules/test_aio/test_aio--1.0.sql b/src/test/modules/test_aio/test_aio--1.0.sql
index e495481c41e..c0ac47339a9 100644
--- a/src/test/modules/test_aio/test_aio--1.0.sql
+++ b/src/test/modules/test_aio/test_aio--1.0.sql
@@ -33,6 +33,10 @@ CREATE FUNCTION read_rel_block_ll(
 RETURNS pg_catalog.void STRICT
 AS 'MODULE_PATHNAME' LANGUAGE C;
 
+CREATE FUNCTION evict_rel(rel regclass)
+RETURNS pg_catalog.void STRICT
+AS 'MODULE_PATHNAME' LANGUAGE C;
+
 CREATE FUNCTION invalidate_rel_block(rel regclass, blockno int)
 RETURNS pg_catalog.void STRICT
 AS 'MODULE_PATHNAME' LANGUAGE C;
@@ -49,6 +53,9 @@ CREATE FUNCTION buffer_call_terminate_io(buffer int, for_input bool, succeed boo
 RETURNS pg_catalog.void STRICT
 AS 'MODULE_PATHNAME' LANGUAGE C;
 
+CREATE FUNCTION read_buffers(rel regclass, startblock int4, nblocks int4, OUT blockoff int4, OUT blocknum int4, OUT needs_io bool, OUT nblocks int4, OUT buf int4[])
+RETURNS SETOF record STRICT
+AS 'MODULE_PATHNAME' LANGUAGE C;
 
 
 /*
diff --git a/src/test/modules/test_aio/test_aio.c b/src/test/modules/test_aio/test_aio.c
index b1aa8af9ec0..43f32039960 100644
--- a/src/test/modules/test_aio/test_aio.c
+++ b/src/test/modules/test_aio/test_aio.c
@@ -19,7 +19,9 @@
 #include "postgres.h"
 
 #include "access/relation.h"
+#include "catalog/pg_type.h"
 #include "fmgr.h"
+#include "funcapi.h"
 #include "storage/aio.h"
 #include "storage/aio_internal.h"
 #include "storage/buf_internals.h"
@@ -27,9 +29,11 @@
 #include "storage/checksum.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
+#include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/injection_point.h"
 #include "utils/rel.h"
+#include "utils/tuplestore.h"
 
 
 PG_MODULE_MAGIC;
@@ -458,18 +462,13 @@ read_rel_block_ll(PG_FUNCTION_ARGS)
 	PG_RETURN_VOID();
 }
 
-PG_FUNCTION_INFO_V1(invalidate_rel_block);
-Datum
-invalidate_rel_block(PG_FUNCTION_ARGS)
+/* helper for invalidate_rel_block() and evict_rel() */
+static void
+invalidate_one_block(Relation rel, ForkNumber forknum, BlockNumber blkno)
 {
-	Oid			relid = PG_GETARG_OID(0);
-	BlockNumber blkno = PG_GETARG_UINT32(1);
-	Relation	rel;
 	PrefetchBufferResult pr;
 	Buffer		buf;
 
-	rel = relation_open(relid, AccessExclusiveLock);
-
 	/*
 	 * This is a gross hack, but there's no other API exposed that allows to
 	 * get a buffer ID without actually reading the block in.
@@ -506,11 +505,74 @@ invalidate_rel_block(PG_FUNCTION_ARGS)
 		}
 	}
 
+}
+
+PG_FUNCTION_INFO_V1(invalidate_rel_block);
+Datum
+invalidate_rel_block(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	BlockNumber blkno = PG_GETARG_UINT32(1);
+	Relation	rel;
+
+	rel = relation_open(relid, AccessExclusiveLock);
+
+	invalidate_one_block(rel, MAIN_FORKNUM, blkno);
+
 	relation_close(rel, AccessExclusiveLock);
 
 	PG_RETURN_VOID();
 }
 
+PG_FUNCTION_INFO_V1(evict_rel);
+Datum
+evict_rel(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	Relation	rel;
+
+	rel = relation_open(relid, AccessExclusiveLock);
+
+	/*
+	 * EvictRelUnpinnedBuffers() doesn't support temp tables, so for temp
+	 * tables we have to do it the expensive way and evict every possible
+	 * buffer.
+	 */
+	if (RelationUsesLocalBuffers(rel))
+	{
+		SMgrRelation smgr = RelationGetSmgr(rel);
+
+		for (int forknum = MAIN_FORKNUM; forknum <= MAX_FORKNUM; forknum++)
+		{
+			BlockNumber nblocks;
+
+			if (!smgrexists(smgr, forknum))
+				continue;
+
+			nblocks = smgrnblocks(smgr, forknum);
+
+			for (int blkno = 0; blkno < nblocks; blkno++)
+			{
+				invalidate_one_block(rel, forknum, blkno);
+			}
+		}
+	}
+	else
+	{
+		int32		buffers_evicted,
+					buffers_flushed,
+					buffers_skipped;
+
+		EvictRelUnpinnedBuffers(rel, &buffers_evicted, &buffers_flushed,
+								&buffers_skipped);
+	}
+
+	relation_close(rel, AccessExclusiveLock);
+
+
+	PG_RETURN_VOID();
+}
+
 PG_FUNCTION_INFO_V1(buffer_create_toy);
 Datum
 buffer_create_toy(PG_FUNCTION_ARGS)
@@ -610,6 +672,105 @@ buffer_call_terminate_io(PG_FUNCTION_ARGS)
 	PG_RETURN_VOID();
 }
 
+PG_FUNCTION_INFO_V1(read_buffers);
+/*
+ * Infrastructure to test StartReadBuffers()
+ */
+Datum
+read_buffers(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	BlockNumber startblock = PG_GETARG_UINT32(1);
+	int32		nblocks = PG_GETARG_INT32(2);
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	Relation	rel;
+	SMgrRelation smgr;
+	uint16		nblocks_done = 0;
+	uint16		nios = 0;
+	ReadBuffersOperation *operations;
+	Buffer	   *buffers;
+	Datum	   *buffers_datum;
+
+	Assert(nblocks > 0);
+
+	InitMaterializedSRF(fcinfo, 0);
+
+	/* at worst the each block gets its own IO */
+	operations = palloc0(sizeof(ReadBuffersOperation) * nblocks);
+	buffers = palloc0(sizeof(Buffer) * nblocks);
+	buffers_datum = palloc0(sizeof(Datum) * nblocks);
+
+	rel = relation_open(relid, AccessShareLock);
+	smgr = RelationGetSmgr(rel);
+
+	while (nblocks_done < nblocks)
+	{
+		ReadBuffersOperation *operation = &operations[nios];
+		int			nblocks_this_io =
+			Min(nblocks - nblocks_done, io_combine_limit);
+		Datum		values[5] = {0};
+		bool		nulls[5] = {0};
+		bool		needed_io;
+		ArrayType  *buffers_arr;
+
+		operation->rel = rel;
+		operation->smgr = smgr;
+		operation->persistence = rel->rd_rel->relpersistence;
+		operation->strategy = NULL;
+
+		needed_io = StartReadBuffers(operation,
+									 &buffers[nblocks_done],
+									 startblock + nblocks_done,
+									 &nblocks_this_io,
+									 0);
+		if (needed_io)
+			WaitReadBuffers(operation);
+
+		/* convert buffer array to datum array */
+		for (int i = 0; i < nblocks_this_io; i++)
+			buffers_datum[nblocks_done + i] = Int32GetDatum(buffers[nblocks_done + i]);
+		buffers_arr = construct_array_builtin(&buffers_datum[nblocks_done],
+											  nblocks_this_io,
+											  INT4OID);
+
+		values[0] = Int32GetDatum(nblocks_done);
+		nulls[0] = false;
+
+		values[1] = UInt32GetDatum(startblock + nblocks_done);
+		nulls[1] = false;
+
+		values[2] = BoolGetDatum(needed_io);
+		nulls[2] = false;
+
+		values[3] = Int32GetDatum((int16) nblocks_this_io);
+		nulls[3] = false;
+
+		values[4] = PointerGetDatum(buffers_arr);
+		nulls[4] = false;
+
+		tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
+
+		for (int i = 0; i < nblocks_this_io; i++)
+			ReleaseBuffer(buffers[nblocks_done + i]);
+
+		nios++;
+		nblocks_done += nblocks_this_io;
+	}
+
+	/*
+	 * Free explicitly, to have a chance to detect potential issues with too
+	 * long lived references to the operation.
+	 */
+	pfree(operations);
+	pfree(buffers);
+	pfree(buffers_datum);
+
+	relation_close(rel, NoLock);
+
+	return (Datum) 0;
+}
+
+
 PG_FUNCTION_INFO_V1(handle_get);
 Datum
 handle_get(PG_FUNCTION_ARGS)
-- 
2.53.0.1.gb2826b52eb

