From 7808d7241088f8dc02d7d828dc4ee0227e8e2a01 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Wed, 19 Jul 2023 12:32:51 +1200
Subject: [PATCH v3 1/7] Provide vectored variants of FileRead() and
 FileWrite().

FileReadV() and FileWriteV() adapt preadv() and pwritev() for
PostgreSQL's virtual file descriptors.  The traditional FileRead() and
FileWrite() functions are implemented in terms of the new functions.

Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Discussion: https://postgr.es/m/CA+hUKGJkOiOCa+mag4BF+zHo7qo=o9CFheB8=g6uT5TUm2gkvA@mail.gmail.com
---
 src/backend/storage/file/fd.c | 33 +++++++++++++++++++++++----------
 src/include/storage/fd.h      | 30 ++++++++++++++++++++++++++++--
 2 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index f691ba0932..aa89c755b3 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -2110,8 +2110,8 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
 }
 
 int
-FileRead(File file, void *buffer, size_t amount, off_t offset,
-		 uint32 wait_event_info)
+FileReadV(File file, const struct iovec *iov, int iovcnt, off_t offset,
+		  uint32 wait_event_info)
 {
 	int			returnCode;
 	Vfd		   *vfdP;
@@ -2131,7 +2131,7 @@ FileRead(File file, void *buffer, size_t amount, off_t offset,
 
 retry:
 	pgstat_report_wait_start(wait_event_info);
-	returnCode = pg_pread(vfdP->fd, buffer, amount, offset);
+	returnCode = pg_preadv(vfdP->fd, iov, iovcnt, offset);
 	pgstat_report_wait_end();
 
 	if (returnCode < 0)
@@ -2166,8 +2166,8 @@ retry:
 }
 
 int
-FileWrite(File file, const void *buffer, size_t amount, off_t offset,
-		  uint32 wait_event_info)
+FileWriteV(File file, const struct iovec *iov, int iovcnt, off_t offset,
+		   uint32 wait_event_info)
 {
 	int			returnCode;
 	Vfd		   *vfdP;
@@ -2195,7 +2195,14 @@ FileWrite(File file, const void *buffer, size_t amount, off_t offset,
 	 */
 	if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT))
 	{
-		off_t		past_write = offset + amount;
+		size_t		size = 0;
+		off_t		past_write;
+
+		/* Compute the total transfer size. */
+		for (int i = 0; i < iovcnt; ++i)
+			size += iov[i].iov_len;
+
+		past_write = offset + size;
 
 		if (past_write > vfdP->fileSize)
 		{
@@ -2213,11 +2220,17 @@ FileWrite(File file, const void *buffer, size_t amount, off_t offset,
 retry:
 	errno = 0;
 	pgstat_report_wait_start(wait_event_info);
-	returnCode = pg_pwrite(VfdCache[file].fd, buffer, amount, offset);
+	returnCode = pg_pwritev(vfdP->fd, iov, iovcnt, offset);
 	pgstat_report_wait_end();
 
-	/* if write didn't set errno, assume problem is no disk space */
-	if (returnCode != amount && errno == 0)
+	/*
+	 * Some callers expect short writes to set errno, even though that isn't
+	 * the behavior of the underlying system calls.  We don't want to waste
+	 * CPU cycles adding up the total write size here, so we'll just set it to
+	 * ENOSPC even on success, if the syscall didn't set it.  On successful
+	 * non-short write, no caller should consult errno.
+	 */
+	if (errno == 0)
 		errno = ENOSPC;
 
 	if (returnCode >= 0)
@@ -2227,7 +2240,7 @@ retry:
 		 */
 		if (vfdP->fdstate & FD_TEMP_FILE_LIMIT)
 		{
-			off_t		past_write = offset + amount;
+			off_t		past_write = offset + returnCode;
 
 			if (past_write > vfdP->fileSize)
 			{
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index d9d5d9da5f..a1d6056d9d 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -43,6 +43,8 @@
 #ifndef FD_H
 #define FD_H
 
+#include "port/pg_iovec.h"
+
 #include <dirent.h>
 #include <fcntl.h>
 
@@ -105,8 +107,8 @@ extern File PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fil
 extern File OpenTemporaryFile(bool interXact);
 extern void FileClose(File file);
 extern int	FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info);
-extern int	FileRead(File file, void *buffer, size_t amount, off_t offset, uint32 wait_event_info);
-extern int	FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info);
+extern int	FileReadV(File file, const struct iovec *ioc, int iovcnt, off_t offset, uint32 wait_event_info);
+extern int	FileWriteV(File file, const struct iovec *ioc, int iovcnt, off_t offset, uint32 wait_event_info);
 extern int	FileSync(File file, uint32 wait_event_info);
 extern int	FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info);
 extern int	FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info);
@@ -189,4 +191,28 @@ extern int	durable_unlink(const char *fname, int elevel);
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
+static inline int
+FileRead(File file, void *buffer, size_t amount, off_t offset,
+		 uint32 wait_event_info)
+{
+	struct iovec iov = {
+		.iov_base = buffer,
+		.iov_len = amount
+	};
+
+	return FileReadV(file, &iov, 1, offset, wait_event_info);
+}
+
+static inline int
+FileWrite(File file, const void *buffer, size_t amount, off_t offset,
+		  uint32 wait_event_info)
+{
+	struct iovec iov = {
+		.iov_base = unconstify(void *, buffer),
+		.iov_len = amount
+	};
+
+	return FileWriteV(file, &iov, 1, offset, wait_event_info);
+}
+
 #endif							/* FD_H */
-- 
2.42.1

