From 777b5265c296b09dd842e5b93f617bafd0f00a93 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Mon, 13 Feb 2023 17:11:29 -0800
Subject: [PATCH v4 01/15] Revise pg_pwrite_zeros()

- add offset parameter
- avoid memset() of zerobuf on every call
- don't initialize the whole IOV array unnecessarily
- don't handle the smaller trailing write in a separate write call
---
 src/include/common/file_utils.h    |  2 +-
 src/common/file_utils.c            | 62 ++++++++----------------------
 src/backend/access/transam/xlog.c  |  2 +-
 src/bin/pg_basebackup/walmethods.c |  2 +-
 4 files changed, 20 insertions(+), 48 deletions(-)

diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index bda6d3a5413..b7efa1226d6 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -44,6 +44,6 @@ extern ssize_t pg_pwritev_with_retry(int fd,
 									 int iovcnt,
 									 off_t offset);
 
-extern ssize_t pg_pwrite_zeros(int fd, size_t size);
+extern ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset);
 
 #endif							/* FILE_UTILS_H */
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 4dae534152f..93b5d42c5d1 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -537,62 +537,34 @@ pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
  * is returned with errno set.
  */
 ssize_t
-pg_pwrite_zeros(int fd, size_t size)
+pg_pwrite_zeros(int fd, size_t size, off_t offset)
 {
-	PGAlignedBlock zbuffer;		/* worth BLCKSZ */
-	size_t		zbuffer_sz;
+	static const PGAlignedBlock zbuffer = {0};	/* worth BLCKSZ */
+	void	   *zerobuf_addr = unconstify(PGAlignedBlock *, &zbuffer)->data;
 	struct iovec iov[PG_IOV_MAX];
-	int			blocks;
-	size_t		remaining_size = 0;
-	int			i;
-	ssize_t		written;
+	size_t		remaining_size = size;
 	ssize_t		total_written = 0;
 
-	zbuffer_sz = sizeof(zbuffer.data);
-
-	/* Zero-fill the buffer. */
-	memset(zbuffer.data, 0, zbuffer_sz);
-
-	/* Prepare to write out a lot of copies of our zero buffer at once. */
-	for (i = 0; i < lengthof(iov); ++i)
-	{
-		iov[i].iov_base = zbuffer.data;
-		iov[i].iov_len = zbuffer_sz;
-	}
-
 	/* Loop, writing as many blocks as we can for each system call. */
-	blocks = size / zbuffer_sz;
-	remaining_size = size % zbuffer_sz;
-	for (i = 0; i < blocks;)
+	while (remaining_size > 0)
 	{
-		int			iovcnt = Min(blocks - i, lengthof(iov));
-		off_t		offset = i * zbuffer_sz;
+		int			iovcnt = 0;
+		ssize_t		written;
+
+		for (; iovcnt < PG_IOV_MAX && remaining_size > 0; iovcnt++)
+		{
+			size_t		this_iov_size = Min(remaining_size, BLCKSZ);
+
+			iov[iovcnt].iov_base = zerobuf_addr;
+			iov[iovcnt].iov_len = this_iov_size;
+			remaining_size -= this_iov_size;
+		}
 
 		written = pg_pwritev_with_retry(fd, iov, iovcnt, offset);
-
-		if (written < 0)
-			return written;
-
-		i += iovcnt;
-		total_written += written;
-	}
-
-	/* Now, write the remaining size, if any, of the file with zeros. */
-	if (remaining_size > 0)
-	{
-		/* We'll never write more than one block here */
-		int			iovcnt = 1;
-
-		/* Jump on to the end of previously written blocks */
-		off_t		offset = i * zbuffer_sz;
-
-		iov[0].iov_len = remaining_size;
-
-		written = pg_pwritev_with_retry(fd, iov, iovcnt, offset);
-
 		if (written < 0)
 			return written;
 
+		offset += written;
 		total_written += written;
 	}
 
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f9f0f6db8d1..786b26054cf 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2981,7 +2981,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 		 * indirect blocks are down on disk.  Therefore, fdatasync(2) or
 		 * O_DSYNC will be sufficient to sync future writes to the log file.
 		 */
-		rc = pg_pwrite_zeros(fd, wal_segment_size);
+		rc = pg_pwrite_zeros(fd, wal_segment_size, 0);
 
 		if (rc < 0)
 			save_errno = errno;
diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c
index 54014e2b84d..6d14b988cb6 100644
--- a/src/bin/pg_basebackup/walmethods.c
+++ b/src/bin/pg_basebackup/walmethods.c
@@ -222,7 +222,7 @@ dir_open_for_write(WalWriteMethod *wwmethod, const char *pathname,
 	{
 		ssize_t		rc;
 
-		rc = pg_pwrite_zeros(fd, pad_to_size);
+		rc = pg_pwrite_zeros(fd, pad_to_size, 0);
 
 		if (rc < 0)
 		{
-- 
2.38.0

