From ed106edbfd2eeeadec74289afb8d8bfe8f37fff1 Mon Sep 17 00:00:00 2001
From: David Gilman <davidgilman1@gmail.com>
Date: Wed, 20 May 2020 22:49:28 -0400
Subject: [PATCH 3/4] Scan all TOCs when restoring a custom dump file without
 offsets

TOC requests are not guaranteed to come in disk order. If the custom
dump file was written with data offsets, pg_restore can seek directly to
the data, making request order irrelevant. If there are no data offsets
pg_restore would never attempt to seek backwards, even when it was
possible, and would not find TOCs before the current read position in
the file. 548e50976 changed how pg_restore's parallel algorithm worked
at the cost of greatly increasing out-of-order TOC requests.

This patch changes pg_restore to scan through all TOCs to service a TOC
read request when restoring a custom dump file without data offsets.
The odds of getting a successful parallel restore go way up at the cost
of a bunch of extra tiny reads when pg_restore starts up.

The pg_restore manpage now warns against running pg_dump with an
unseekable output file and suggests that if you plan on doing a parallel
restore of a custom dump you should run pg_dump with --file.
---
 doc/src/sgml/ref/pg_restore.sgml   |  8 ++++++++
 src/bin/pg_dump/pg_backup_custom.c | 24 ++++++++++++++++++++++--
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml
index 232f88024f..a0bf64767b 100644
--- a/doc/src/sgml/ref/pg_restore.sgml
+++ b/doc/src/sgml/ref/pg_restore.sgml
@@ -279,6 +279,14 @@ PostgreSQL documentation
         jobs cannot be used together with the
         option <option>--single-transaction</option>.
        </para>
+
+       <para>
+        <application>pg_restore</application> with concurrent jobs may fail
+        when restoring a custom archive format dump written to an unseekable
+        output stream, like stdout. For the best performance when restoring
+        a custom archive format dump use <application>pg_dump</application>'s
+        <option>--file</option> option to specify an output file.
+       </para>
       </listitem>
      </varlistentry>
 
diff --git a/src/bin/pg_dump/pg_backup_custom.c b/src/bin/pg_dump/pg_backup_custom.c
index 8c84e3c611..851add4915 100644
--- a/src/bin/pg_dump/pg_backup_custom.c
+++ b/src/bin/pg_dump/pg_backup_custom.c
@@ -415,6 +415,7 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
 	int			blkType;
 	int			id;
+	bool		initialScan = true;
 
 	if (tctx->dataState == K_OFFSET_NO_DATA)
 		return;
@@ -423,11 +424,19 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
 	{
 		/*
 		 * We cannot seek directly to the desired block.  Instead, skip over
-		 * block headers until we find the one we want.  This could fail if we
-		 * are asked to restore items out-of-order.
+		 * block headers until we find the one we want.
 		 */
+
 		_readBlockHeader(AH, &blkType, &id);
 
+		if (blkType == EOF && ctx->hasSeek) {
+			/* Started at the end of the file */
+			initialScan = false;
+			if (fseeko(AH->FH, ctx->dataStart, SEEK_SET) != 0)
+				fatal("error during file seek: %m");
+			_readBlockHeader(AH, &blkType, &id);
+		}
+
 		while (blkType != EOF && id != te->dumpId)
 		{
 			switch (blkType)
@@ -446,6 +455,17 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
 					break;
 			}
 			_readBlockHeader(AH, &blkType, &id);
+
+			if (blkType == EOF && ctx->hasSeek && initialScan) {
+				/*
+				 * This was possibly an out-of-order request.
+				 * Try one extra pass over the file to find it.
+				 */
+				initialScan = false;
+				if (fseeko(AH->FH, ctx->dataStart, SEEK_SET) != 0)
+					fatal("error during file seek: %m");
+				_readBlockHeader(AH, &blkType, &id);
+			}
 		}
 	}
 	else
-- 
2.26.2

