From 1493b0bfff3b048c47878a14dff409763763d12f Mon Sep 17 00:00:00 2001
From: Dimitrios Apostolou <jimis@qt.io>
Date: Sat, 12 Apr 2025 01:59:45 +0200
Subject: [PATCH v2] pg_restore --clean --data-only

In parallel restore, it issues a TRUNCATE before COPYing the data into
the tables, within a transaction.

As a result it avoid logging to the WAL, when combined with
wal_level=minimal.
---
 doc/src/sgml/ref/pg_restore.sgml     | 10 ++++++++++
 src/bin/pg_dump/pg_backup.h          |  1 +
 src/bin/pg_dump/pg_backup_archiver.c | 13 +++++++++++--
 src/bin/pg_dump/pg_restore.c         |  8 ++++----
 4 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml
index 2295df62d03..36537857145 100644
--- a/doc/src/sgml/ref/pg_restore.sgml
+++ b/doc/src/sgml/ref/pg_restore.sgml
@@ -122,16 +122,26 @@ PostgreSQL documentation
        <para>
         Before restoring database objects, issue commands
         to <command>DROP</command> all the objects that will be restored.
         This option is useful for overwriting an existing database.
         If any of the objects do not exist in the destination database,
         ignorable error messages will be reported,
         unless <option>--if-exists</option> is also specified.
        </para>
+       <para>
+         In combination with <option>--data-only</option> a TRUNCATE will be
+         attempted instead of DROP, before COPYing the data.  So if you want
+         to overwrite an existing database without re-writing the schema, then
+         issue <option>--data-only --clean</option>.  Together
+         with <option>--parallel</option> it is a high performance way to load
+         the tables, as it avoids logging to the WAL (if the server is
+         configured with <option>wal_level=minimal</option>).  Warning:
+         foreign key constraints might cause table truncation to fail.
+       </para>
       </listitem>
      </varlistentry>
 
      <varlistentry>
       <term><option>-C</option></term>
       <term><option>--create</option></term>
       <listitem>
        <para>
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index af0007fb6d2..9428b362c92 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -100,16 +100,17 @@ typedef struct _restoreOptions
 	int			noTableAm;		/* Don't issue table-AM-related commands */
 	int			noTablespace;	/* Don't issue tablespace-related commands */
 	int			disable_triggers;	/* disable triggers during data-only
 									 * restore */
 	int			use_setsessauth;	/* Use SET SESSION AUTHORIZATION commands
 									 * instead of OWNER TO */
 	char	   *superuser;		/* Username to use as superuser */
 	char	   *use_role;		/* Issue SET ROLE to this */
+	int			clean;
 	int			dropSchema;
 	int			disable_dollar_quoting;
 	int			dump_inserts;	/* 0 = COPY, otherwise rows per INSERT */
 	int			column_inserts;
 	int			if_exists;
 	int			no_comments;	/* Skip comments */
 	int			no_policies;	/* Skip row security policies */
 	int			no_publications;	/* Skip publication entries */
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 197c1295d93..d48c6d11e7c 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -979,32 +979,41 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
 					pg_log_info("processing data for table \"%s.%s\"",
 								te->namespace, te->tag);
 
 					/*
 					 * In parallel restore, if we created the table earlier in
 					 * this run (so that we know it is empty) and we are not
 					 * restoring a load-via-partition-root data item then we
 					 * wrap the COPY in a transaction and precede it with a
-					 * TRUNCATE.  If wal_level is set to minimal this prevents
+					 * TRUNCATE.
+					 *
+					 * Likewise if the table was pre-existing and the data is
+					 * being restored with --clean.
+					 *
+					 * If wal_level is set to minimal this prevents
 					 * WAL-logging the COPY.  This obtains a speedup similar
 					 * to that from using single_txn mode in non-parallel
 					 * restores.
 					 *
 					 * We mustn't do this for load-via-partition-root cases
 					 * because some data might get moved across partition
 					 * boundaries, risking deadlock and/or loss of previously
 					 * loaded data.  (We assume that all partitions of a
 					 * partitioned table will be treated the same way.)
 					 */
-					use_truncate = is_parallel && te->created &&
+					use_truncate = is_parallel &&
+						(te->created || (ropt->dumpData && ropt->clean)) &&
 						!is_load_via_partition_root(te);
 
 					if (use_truncate)
 					{
+						pg_log_debug("BEGIN transaction and TRUNCATE table \"%s.%s\"",
+									 te->namespace, te->tag);
+
 						/*
 						 * Parallel restore is always talking directly to a
 						 * server, so no need to see if we should issue BEGIN.
 						 */
 						StartTransaction(&AH->public);
 
 						/*
 						 * Issue TRUNCATE with ONLY so that child tables are
diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c
index f2182e91825..d08d3979c41 100644
--- a/src/bin/pg_dump/pg_restore.c
+++ b/src/bin/pg_dump/pg_restore.c
@@ -207,17 +207,17 @@ main(int argc, char **argv)
 							cmdopts, NULL)) != -1)
 	{
 		switch (c)
 		{
 			case 'a':			/* Dump data only */
 				data_only = true;
 				break;
 			case 'c':			/* clean (i.e., drop) schema prior to create */
-				opts->dropSchema = 1;
+				opts->clean = 1;
 				break;
 			case 'C':
 				opts->createDB = 1;
 				break;
 			case 'd':
 				opts->cparams.dbname = pg_strdup(optarg);
 				break;
 			case 'e':
@@ -420,19 +420,16 @@ main(int argc, char **argv)
 	/* reject conflicting "with-" and "no-" options */
 	if (with_data && no_data)
 		pg_fatal("options --with-data and --no-data cannot be used together");
 	if (with_schema && no_schema)
 		pg_fatal("options --with-schema and --no-schema cannot be used together");
 	if (with_statistics && no_statistics)
 		pg_fatal("options --with-statistics and --no-statistics cannot be used together");
 
-	if (data_only && opts->dropSchema)
-		pg_fatal("options -c/--clean and -a/--data-only cannot be used together");
-
 	if (opts->single_txn && opts->txn_size > 0)
 		pg_fatal("options -1/--single-transaction and --transaction-size cannot be used together");
 
 	/*
 	 * -C is not compatible with -1, because we can't create a database inside
 	 * a transaction block.
 	 */
 	if (opts->createDB && opts->single_txn)
@@ -451,16 +448,19 @@ main(int argc, char **argv)
 	 */
 	opts->dumpData = ((opts->dumpData && !schema_only && !statistics_only) ||
 					  (data_only || with_data)) && !no_data;
 	opts->dumpSchema = ((opts->dumpSchema && !data_only && !statistics_only) ||
 						(schema_only || with_schema)) && !no_schema;
 	opts->dumpStatistics = ((opts->dumpStatistics && !schema_only && !data_only) ||
 							(statistics_only || with_statistics)) && !no_statistics;
 
+	if (opts->clean && opts->dumpSchema)
+		opts->dropSchema = 1;
+
 	opts->disable_triggers = disable_triggers;
 	opts->enable_row_security = enable_row_security;
 	opts->noDataForFailedTables = no_data_for_failed_tables;
 	opts->noTableAm = outputNoTableAm;
 	opts->noTablespace = outputNoTablespaces;
 	opts->use_setsessauth = use_setsessauth;
 	opts->no_comments = no_comments;
 	opts->no_policies = no_policies;
-- 
2.49.0

