From b54e6c7b05d9bf34bde60b72f3a8d1989b2b17dc Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Sun, 5 Apr 2026 03:07:19 +1200
Subject: [PATCH 3/4] fixup: Use more efficient zero-copy API?

We can pass a pointer to data in libarchive's internal buffer directly
to the next streamer, avoiding one copy.  To do this we also have to
expand any sparse regions ourselves.

XXX not sure it's worth the complexity for non-performance critical
code?
---
 src/fe_utils/astreamer_libarchive.c | 63 ++++++++++++++++++++++++-----
 1 file changed, 53 insertions(+), 10 deletions(-)

diff --git a/src/fe_utils/astreamer_libarchive.c b/src/fe_utils/astreamer_libarchive.c
index d57853171f4..d28c4ed915f 100644
--- a/src/fe_utils/astreamer_libarchive.c
+++ b/src/fe_utils/astreamer_libarchive.c
@@ -29,7 +29,8 @@ typedef struct astreamer_libarchive_reader
 	struct archive *archive;
 	bool		end_of_file;
 	bool		end_of_archive;
-	char		data[ASTREAMER_LIBARCHIVE_READER_BUFFER_SIZE];
+	pgoff_t		offset;
+	char		zeroes[8192];
 } astreamer_libarchive_reader;
 
 static void astreamer_libarchive_reader_content(astreamer *streamer,
@@ -121,6 +122,27 @@ astreamer_libarchive_reader_fill_member(astreamer_member *member,
 	}
 }
 
+/* Emit zeroes up to offset. */
+static bool
+astreamer_libarchive_reader_expand_sparse(astreamer_libarchive_reader *mystreamer,
+										  pgoff_t offset)
+{
+	size_t		size;
+
+	while (mystreamer->offset < offset)
+	{
+		size = offset - mystreamer->offset;
+		size = Min(size, sizeof(mystreamer->zeroes));
+		astreamer_content(mystreamer->base.bbs_next,
+						  &mystreamer->member,
+						  mystreamer->zeroes,
+						  size,
+						  ASTREAMER_MEMBER_CONTENTS);
+		mystreamer->offset += size;
+	}
+	return true;
+}
+
 static void
 astreamer_libarchive_reader_content(astreamer *streamer,
 									astreamer_member *member,
@@ -129,7 +151,9 @@ astreamer_libarchive_reader_content(astreamer *streamer,
 									astreamer_archive_context context)
 {
 	astreamer_libarchive_reader *mystreamer;
-	ssize_t		size;
+	const void *data;
+	size_t		size;
+	pgoff_t		offset;
 
 	/*
 	 * This should be reached by calling astreamer_pull().
@@ -174,6 +198,7 @@ astreamer_libarchive_reader_content(astreamer *streamer,
 				case ARCHIVE_OK:
 					/* Send file header, then fall through to send one chunk. */
 					mystreamer->end_of_file = false;
+					mystreamer->offset = 0;
 					astreamer_libarchive_reader_fill_member(&mystreamer->member,
 															entry);
 					astreamer_content(mystreamer->base.bbs_next,
@@ -197,12 +222,19 @@ astreamer_libarchive_reader_content(astreamer *streamer,
 			}
 		}
 
-		/* Stream a chunk of data, or discover end of file. */
+		/*
+		 * Stream a chunk of data, or discover end of file.
+		 *
+		 * It would be a bit simpler to use archive_read_data(), but this
+		 * interface removes the need for copying to an output buffer.  In
+		 * exchange for that, we have to deal with expanding (rare) sparse
+		 * file zeroes.
+		 */
 		Assert(!mystreamer->end_of_file);
-		size = archive_read_data(mystreamer->archive,
-								 mystreamer->data,
-								 sizeof(mystreamer->data));
-		switch (size)
+		switch (archive_read_data_block(mystreamer->archive,
+										&data,
+										&size,
+										&offset))
 		{
 			case ARCHIVE_RETRY:
 				continue;
@@ -213,11 +245,20 @@ astreamer_libarchive_reader_content(astreamer *streamer,
 			case ARCHIVE_WARN:
 				pg_log_warning("libarchive: %s",
 							   archive_error_string(mystreamer->archive));
-				continue;
+				break;
+			case ARCHIVE_EOF:
+				size = 0;
+				break;
+			case ARCHIVE_OK:
+				break;
 			default:
+				pg_fatal("unexpected result from archive_read_next_data_block()");
 				break;
 		}
 
+		/* Expand any intervening sparse region. */
+		astreamer_libarchive_reader_expand_sparse(mystreamer, offset);
+
 		if (size == 0)
 		{
 			/* Send trailer, and go around to start another file. */
@@ -230,12 +271,14 @@ astreamer_libarchive_reader_content(astreamer *streamer,
 			continue;
 		}
 
-		/* Stream large chunk and return. */
+		/* Stream large chunk directly from libarchive's buffer and return. */
+		Assert(mystreamer->offset == offset);
 		astreamer_content(mystreamer->base.bbs_next,
 						  &mystreamer->member,
-						  mystreamer->data,
+						  data,
 						  size,
 						  ASTREAMER_MEMBER_CONTENTS);
+		mystreamer->offset += size;
 		return;
 	}
 }
-- 
2.53.0

