From 2d79ed35fd906dc53fb7ac7695cfec92544dd7ac Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Wed, 19 Jul 2023 12:32:51 +1200
Subject: [PATCH v1 01/14] Provide vectored variants of FileRead() and
 FileWrite().

FileReadV() and FileWriteV() adapt preadv() and pwritev() for
PostgreSQL's virtual file descriptors.  The traditional FileRead() and
FileWrite() functions are implemented in terms of the new functions.

Author: Thomas Munro <thomas.munro@gmail.com>
---
 src/backend/storage/file/fd.c | 39 +++++++++++++++++++++++++++--------
 src/include/storage/fd.h      | 30 +++++++++++++++++++++++++--
 2 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index b490a76ba7..ac5b981cce 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -2085,8 +2085,8 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
 }
 
 int
-FileRead(File file, void *buffer, size_t amount, off_t offset,
-		 uint32 wait_event_info)
+FileReadV(File file, const struct iovec *iov, int iovcnt, off_t offset,
+		  uint32 wait_event_info)
 {
 	int			returnCode;
 	Vfd		   *vfdP;
@@ -2106,7 +2106,14 @@ FileRead(File file, void *buffer, size_t amount, off_t offset,
 
 retry:
 	pgstat_report_wait_start(wait_event_info);
-	returnCode = pg_pread(vfdP->fd, buffer, amount, offset);
+	/* Avoid a slightly more expensive kernel call if there is no benefit. */
+	if (iovcnt == 1)
+		returnCode = pg_pread(vfdP->fd,
+							  iov[0].iov_base,
+							  iov[0].iov_len,
+							  offset);
+	else
+		returnCode = pg_preadv(vfdP->fd, iov, iovcnt, offset);
 	pgstat_report_wait_end();
 
 	if (returnCode < 0)
@@ -2141,8 +2148,8 @@ retry:
 }
 
 int
-FileWrite(File file, const void *buffer, size_t amount, off_t offset,
-		  uint32 wait_event_info)
+FileWriteV(File file, const struct iovec *iov, int iovcnt, off_t offset,
+		   uint32 wait_event_info)
 {
 	int			returnCode;
 	Vfd		   *vfdP;
@@ -2170,7 +2177,14 @@ FileWrite(File file, const void *buffer, size_t amount, off_t offset,
 	 */
 	if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT))
 	{
-		off_t		past_write = offset + amount;
+		size_t		size = 0;
+		off_t		past_write;
+
+		/* Compute the total transfer size. */
+		for (int i = 0; i < iovcnt; ++i)
+			size += iov[i].iov_len;
+
+		past_write = offset + size;
 
 		if (past_write > vfdP->fileSize)
 		{
@@ -2188,11 +2202,18 @@ FileWrite(File file, const void *buffer, size_t amount, off_t offset,
 retry:
 	errno = 0;
 	pgstat_report_wait_start(wait_event_info);
-	returnCode = pg_pwrite(VfdCache[file].fd, buffer, amount, offset);
+	/* Avoid a slightly more expensive kernel call if there is no benefit. */
+	if (iovcnt == 1)
+		returnCode = pg_pwrite(vfdP->fd,
+							   iov[0].iov_base,
+							   iov[0].iov_len,
+							   offset);
+	else
+		returnCode = pg_pwritev(vfdP->fd, iov, iovcnt, offset);
 	pgstat_report_wait_end();
 
 	/* if write didn't set errno, assume problem is no disk space */
-	if (returnCode != amount && errno == 0)
+	if (returnCode < 0 && errno == 0)
 		errno = ENOSPC;
 
 	if (returnCode >= 0)
@@ -2202,7 +2223,7 @@ retry:
 		 */
 		if (vfdP->fdstate & FD_TEMP_FILE_LIMIT)
 		{
-			off_t		past_write = offset + amount;
+			off_t		past_write = offset + returnCode;
 
 			if (past_write > vfdP->fileSize)
 			{
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 6791a406fc..a354ae5a7f 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -43,6 +43,8 @@
 #ifndef FD_H
 #define FD_H
 
+#include "port/pg_iovec.h"
+
 #include <dirent.h>
 #include <fcntl.h>
 
@@ -111,8 +113,8 @@ extern File PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fil
 extern File OpenTemporaryFile(bool interXact);
 extern void FileClose(File file);
 extern int	FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info);
-extern int	FileRead(File file, void *buffer, size_t amount, off_t offset, uint32 wait_event_info);
-extern int	FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info);
+extern int	FileReadV(File file, const struct iovec *ioc, int iovcnt, off_t offset, uint32 wait_event_info);
+extern int	FileWriteV(File file, const struct iovec *ioc, int iovcnt, off_t offset, uint32 wait_event_info);
 extern int	FileSync(File file, uint32 wait_event_info);
 extern int	FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info);
 extern int	FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info);
@@ -195,6 +197,30 @@ extern int	durable_unlink(const char *fname, int elevel);
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
+static inline int
+FileRead(File file, void *buffer, size_t amount, off_t offset,
+		 uint32 wait_event_info)
+{
+	struct iovec iov = {
+		.iov_base = buffer,
+		.iov_len = amount
+	};
+
+	return FileReadV(file, &iov, 1, offset, wait_event_info);
+}
+
+static inline int
+FileWrite(File file, const void *buffer, size_t amount, off_t offset,
+		  uint32 wait_event_info)
+{
+	struct iovec iov = {
+		.iov_base = unconstify(void *, buffer),
+		.iov_len = amount
+	};
+
+	return FileWriteV(file, &iov, 1, offset, wait_event_info);
+}
+
 /* Filename components */
 #define PG_TEMP_FILES_DIR "pgsql_tmp"
 #define PG_TEMP_FILE_PREFIX "pgsql_tmp"
-- 
2.39.2

