From 7d58cc85191c96d8dc731b62810b64c5b366743b Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Sat, 31 Aug 2024 22:10:35 -0400
Subject: [PATCH v2.0 05/17] bufmgr/smgr: Don't cross segment boundaries in
 StartReadBuffers()

With real AIO it doesn't make sense to cross segment boundaries with one
IO. Add smgrmaxcombine() to allow upper layers to query which buffers can be
merged.
---
 src/include/storage/md.h            |  2 ++
 src/include/storage/smgr.h          |  2 ++
 src/backend/storage/buffer/bufmgr.c | 18 ++++++++++++++++++
 src/backend/storage/smgr/md.c       | 17 +++++++++++++++++
 src/backend/storage/smgr/smgr.c     | 16 ++++++++++++++++
 5 files changed, 55 insertions(+)

diff --git a/src/include/storage/md.h b/src/include/storage/md.h
index 620f10abdeb..b72293c79a5 100644
--- a/src/include/storage/md.h
+++ b/src/include/storage/md.h
@@ -32,6 +32,8 @@ extern void mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 						 BlockNumber blocknum, int nblocks, bool skipFsync);
 extern bool mdprefetch(SMgrRelation reln, ForkNumber forknum,
 					   BlockNumber blocknum, int nblocks);
+extern uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum,
+						   BlockNumber blocknum);
 extern void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 					void **buffers, BlockNumber nblocks);
 extern void mdwritev(SMgrRelation reln, ForkNumber forknum,
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index e15b20a566a..899d0d681c5 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -92,6 +92,8 @@ extern void smgrzeroextend(SMgrRelation reln, ForkNumber forknum,
 						   BlockNumber blocknum, int nblocks, bool skipFsync);
 extern bool smgrprefetch(SMgrRelation reln, ForkNumber forknum,
 						 BlockNumber blocknum, int nblocks);
+extern uint32 smgrmaxcombine(SMgrRelation reln, ForkNumber forknum,
+							 BlockNumber blocknum);
 extern void smgrreadv(SMgrRelation reln, ForkNumber forknum,
 					  BlockNumber blocknum,
 					  void **buffers, BlockNumber nblocks);
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index ec957635f2a..f2e608f597d 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1286,6 +1286,7 @@ StartReadBuffersImpl(ReadBuffersOperation *operation,
 {
 	int			actual_nblocks = *nblocks;
 	int			io_buffers_len = 0;
+	int			maxcombine = 0;
 
 	Assert(*nblocks > 0);
 	Assert(*nblocks <= MAX_IO_COMBINE_LIMIT);
@@ -1317,6 +1318,23 @@ StartReadBuffersImpl(ReadBuffersOperation *operation,
 		{
 			/* Extend the readable range to cover this block. */
 			io_buffers_len++;
+
+			/*
+			 * Check how many blocks we can cover with the same IO. The smgr
+			 * implementation might e.g. be limited due to a segment boundary.
+			 */
+			if (i == 0 && actual_nblocks > 1)
+			{
+				maxcombine = smgrmaxcombine(operation->smgr,
+											operation->forknum,
+											blockNum);
+				if (maxcombine < actual_nblocks)
+				{
+					elog(DEBUG2, "limiting nblocks at %u from %u to %u",
+						 blockNum, actual_nblocks, maxcombine);
+					actual_nblocks = maxcombine;
+				}
+			}
 		}
 	}
 	*nblocks = actual_nblocks;
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 6796756358f..6cd81a61faa 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -803,6 +803,17 @@ buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks)
 	return iovcnt;
 }
 
+uint32
+mdmaxcombine(SMgrRelation reln, ForkNumber forknum,
+			 BlockNumber blocknum)
+{
+	BlockNumber segoff;
+
+	segoff = blocknum % ((BlockNumber) RELSEG_SIZE);
+
+	return RELSEG_SIZE - segoff;
+}
+
 /*
  * mdreadv() -- Read the specified blocks from a relation.
  */
@@ -833,6 +844,9 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 				RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
 		nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
 
+		if (nblocks_this_segment != nblocks)
+			elog(ERROR, "read crossing segment boundary");
+
 		iovcnt = buffers_to_iovec(iov, buffers, nblocks_this_segment);
 		size_this_segment = nblocks_this_segment * BLCKSZ;
 		transferred_this_segment = 0;
@@ -956,6 +970,9 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 				RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
 		nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
 
+		if (nblocks_this_segment != nblocks)
+			elog(ERROR, "write crossing segment boundary");
+
 		iovcnt = buffers_to_iovec(iov, (void **) buffers, nblocks_this_segment);
 		size_this_segment = nblocks_this_segment * BLCKSZ;
 		transferred_this_segment = 0;
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 7b9fa103eff..ee31db85eec 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -88,6 +88,8 @@ typedef struct f_smgr
 									BlockNumber blocknum, int nblocks, bool skipFsync);
 	bool		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
 								  BlockNumber blocknum, int nblocks);
+	uint32		(*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum,
+									BlockNumber blocknum);
 	void		(*smgr_readv) (SMgrRelation reln, ForkNumber forknum,
 							   BlockNumber blocknum,
 							   void **buffers, BlockNumber nblocks);
@@ -117,6 +119,7 @@ static const f_smgr smgrsw[] = {
 		.smgr_extend = mdextend,
 		.smgr_zeroextend = mdzeroextend,
 		.smgr_prefetch = mdprefetch,
+		.smgr_maxcombine = mdmaxcombine,
 		.smgr_readv = mdreadv,
 		.smgr_writev = mdwritev,
 		.smgr_writeback = mdwriteback,
@@ -588,6 +591,19 @@ smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum, nblocks);
 }
 
+/*
+ * smgrmaxcombine() - Return the maximum number of total blocks that can be
+ *				 combined with an IO starting at blocknum.
+ *
+ * The returned value includes the io for blocknum itself.
+ */
+uint32
+smgrmaxcombine(SMgrRelation reln, ForkNumber forknum,
+			   BlockNumber blocknum)
+{
+	return smgrsw[reln->smgr_which].smgr_maxcombine(reln, forknum, blocknum);
+}
+
 /*
  * smgrreadv() -- read a particular block range from a relation into the
  *				 supplied buffers.
-- 
2.45.2.827.g557ae147e6

