From 3ce3dfc2375759bd0083c8400d5b4ccaf9149aab Mon Sep 17 00:00:00 2001
From: Ashutosh Bapat <ashutosh.bapat@enterprisedb.com>
Date: Thu, 27 Jun 2024 10:03:53 +0530
Subject: [PATCH 2/3] pg_dump/restore regression objects

002_pg_upgrade.pl tests pg_upgrade on the regression database left
behind by regression run. Modify it to test dump and restore of the
regression database as well.

Regression database created by regression run contains almost all the
database objects supported by PostgreSQL left behind in various states.
The test thus covers wider dump and restore scenarios.

Author: Ashutosh Bapat
Reviewed by: Michael Pacquire
Discussion: https://www.postgresql.org/message-id/CAExHW5uF5V=Cjecx3_Z=7xfh4rg2Wf61PT+hfquzjBqouRzQJQ@mail.gmail.com
---
 src/bin/pg_upgrade/t/002_pg_upgrade.pl      | 125 +++++++++++++++++---
 src/test/perl/PostgreSQL/Test/AdjustDump.pm | 108 +++++++++++++++++
 2 files changed, 216 insertions(+), 17 deletions(-)
 create mode 100644 src/test/perl/PostgreSQL/Test/AdjustDump.pm

diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
index 17af2ce61e..0e181b294d 100644
--- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl
+++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
@@ -13,6 +13,7 @@ use File::Path qw(rmtree);
 use PostgreSQL::Test::Cluster;
 use PostgreSQL::Test::Utils;
 use PostgreSQL::Test::AdjustUpgrade;
+use PostgreSQL::Test::AdjustDump;
 use Test::More;
 
 # Can be changed to test the other modes.
@@ -36,9 +37,9 @@ sub generate_db
 		"created database with ASCII characters from $from_char to $to_char");
 }
 
-# Filter the contents of a dump before its use in a content comparison.
-# This returns the path to the filtered dump.
-sub filter_dump
+# Filter the contents of a dump before its use in a content comparison when
+# testing upgrade. This returns the path to the filtered dump.
+sub filter_dump_for_upgrade
 {
 	my ($is_old, $old_version, $dump_file) = @_;
 	my $dump_contents = slurp_file($dump_file);
@@ -61,6 +62,57 @@ sub filter_dump
 	return $dump_file_filtered;
 }
 
+# Test that the given two files (which usually contain output of pg_dump
+# command in plain format) match. Output the difference if any.
+sub compare_dumps
+{
+	my ($dump1, $dump2, $testname) = @_;
+
+	my $compare_res = compare($dump1, $dump2);
+	is($compare_res, 0, $testname);
+
+	# Provide more context if the dumps do not match.
+	if ($compare_res != 0)
+	{
+		my ($stdout, $stderr) =
+		  run_command([ 'diff', '-u', $dump1, $dump2 ]);
+		print "=== diff of $dump1 and $dump2\n";
+		print "=== stdout ===\n";
+		print $stdout;
+		print "=== stderr ===\n";
+		print $stderr;
+		print "=== EOF ===\n";
+	}
+}
+
+# Dump the database specified in the connection string for comparing original
+# and restored databases. The order of columns in COPY statements dumped from
+# the original database and the restored database introduces differences which
+# are difficult to adjust. Hence dump only schema for now.
+sub take_dump_for_comparison
+{
+	my ($connstr, $dump_file, $dbinstance) = @_;
+
+	command_ok(
+		[ 'pg_dump', '-s', '--no-sync', '-d', $connstr, '-f', $dump_file ],
+		'pg_dump on ' . $dbinstance . ' instance');
+}
+
+# Adjust the contents of a dump before its use in a content comparison when
+# testing dump and restore. This returns the path to the adjusted dump.
+sub adjust_dump_for_restore
+{
+	my ($dump_file, $original) = @_;
+	my $dump_adjusted = "${dump_file}_adjusted";
+
+	open(my $dh, '>', $dump_adjusted)
+	  || die "opening $dump_adjusted ";
+	print $dh adjust_regress_dumpfile(slurp_file($dump_file), $original);
+	close($dh);
+
+	return $dump_adjusted;
+}
+
 # The test of pg_upgrade requires two clusters, an old one and a new one
 # that gets upgraded.  Before running the upgrade, a logical dump of the
 # old cluster is taken, and a second logical dump of the new one is taken
@@ -258,6 +310,12 @@ else
 		}
 	}
 	is($rc, 0, 'regression tests pass');
+
+	# Test dump/restore of the objects left behind by regression. Ideally it
+	# should be done in a separate test, but doing it here saves us one full
+	# regression run. Do this while the old cluster remains usable before
+	# upgrading it.
+	test_regression_dump_restore($oldnode, %node_params);
 }
 
 # Initialize a new node for the upgrade.
@@ -502,24 +560,57 @@ push(@dump_command, '--extra-float-digits', '0')
 $newnode->command_ok(\@dump_command, 'dump after running pg_upgrade');
 
 # Filter the contents of the dumps.
-my $dump1_filtered = filter_dump(1, $oldnode->pg_version, $dump1_file);
-my $dump2_filtered = filter_dump(0, $oldnode->pg_version, $dump2_file);
+my $dump1_filtered =
+  filter_dump_for_upgrade(1, $oldnode->pg_version, $dump1_file);
+my $dump2_filtered =
+  filter_dump_for_upgrade(0, $oldnode->pg_version, $dump2_file);
 
 # Compare the two dumps, there should be no differences.
-my $compare_res = compare($dump1_filtered, $dump2_filtered);
-is($compare_res, 0, 'old and new dumps match after pg_upgrade');
+compare_dumps($dump1_filtered, $dump2_filtered,
+	'old and new dumps match after pg_upgrade');
 
-# Provide more context if the dumps do not match.
-if ($compare_res != 0)
+# Test dump and restore of objects left behind regression run. It is expected
+# that regression tests, which create `regression`` database, are run on
+# `src_node` and the node is left in running state.
+sub test_regression_dump_restore
 {
-	my ($stdout, $stderr) =
-	  run_command([ 'diff', '-u', $dump1_filtered, $dump2_filtered ]);
-	print "=== diff of $dump1_filtered and $dump2_filtered\n";
-	print "=== stdout ===\n";
-	print $stdout;
-	print "=== stderr ===\n";
-	print $stderr;
-	print "=== EOF ===\n";
+	my ($src_node, %node_params) = @_;
+	my $dst_node = PostgreSQL::Test::Cluster->new('dst_node');
+	my $dump3_file = "$tempdir/dump3.custom";
+	my $dump4_file = "$tempdir/dump4.sql";
+	my $dump5_file = "$tempdir/dump5.sql";
+
+
+	# Dump to be restored
+	command_ok(
+		[
+			'pg_dump', '-Fc', '--no-sync',
+			'-d', $src_node->connstr('regression'),
+			'-f', $dump3_file
+		],
+		'pg_dump on source instance');
+
+	$dst_node->init(%node_params);
+	$dst_node->start;
+	$dst_node->command_ok([ 'createdb', 'regression' ],
+		"created destination database");
+
+	# Restore into destination database
+	command_ok(
+		[ 'pg_restore', '-d', $dst_node->connstr('regression'), $dump3_file ],
+		'pg_restore on destination instance');
+
+	# Dump original and restored databases for comparison
+	take_dump_for_comparison($src_node->connstr('regression'),
+		$dump4_file, 'original');
+	take_dump_for_comparison($dst_node->connstr('regression'),
+		$dump5_file, 'restored');
+	my $dump4_adjusted = adjust_dump_for_restore($dump4_file, 1);
+	my $dump5_adjusted = adjust_dump_for_restore($dump5_file, 0);
+
+	# Compare adjusted dumps, there should be no differences.
+	compare_dumps($dump4_adjusted, $dump5_adjusted,
+		'dump outputs of original and restored regression database match');
 }
 
 done_testing();
diff --git a/src/test/perl/PostgreSQL/Test/AdjustDump.pm b/src/test/perl/PostgreSQL/Test/AdjustDump.pm
new file mode 100644
index 0000000000..df79cbe7c8
--- /dev/null
+++ b/src/test/perl/PostgreSQL/Test/AdjustDump.pm
@@ -0,0 +1,108 @@
+
+# Copyright (c) 2024-2025, PostgreSQL Global Development Group
+
+=pod
+
+=head1 NAME
+
+PostgreSQL::Test::AdjustDump - helper module for regression dump and restore tests
+
+=head1 SYNOPSIS
+
+  use PostgreSQL::Test::AdjustDump;
+
+  # Adjust contents of dump output file so that dump output from original
+  # regression database and that from the restored regression database match
+  $dump = adjust_regress_dumpfile($dump, $original);
+
+=head1 DESCRIPTION
+
+C<PostgreSQL::Test::AdjustDump> encapsulates various hacks needed to
+compare the results of dump and retore tests
+
+=cut
+
+package PostgreSQL::Test::AdjustDump;
+
+use strict;
+use warnings FATAL => 'all';
+
+use Exporter 'import';
+use PostgreSQL::Version;
+
+our @EXPORT = qw(
+  adjust_regress_dumpfile
+);
+
+=pod
+
+=head1 ROUTINES
+
+=over
+
+=item $dump = adjust_regress_dumpfile($dump, $original)
+
+If we take dump of the regression database left behind after running regression
+tests, restore the dump, and take dump of the restored regression database, the
+outputs of both the dumps differ. Some regression tests purposefully create
+some child tables in such a way that their column orders differ from column
+orders of their respective parents. When these child tables are restore using
+C<CREATE TABLE ... INHERITS> command, they have their column orders same as
+that of their respective parents. Thus the column orders of child tables in the
+original database and those in the restored database differ, causing difference
+in the dump outputs. Adjust these DDL statements in the dump file from original
+database to match those from the restored database so that both the dump files
+match.
+
+Additionally adjust blank and new lines to avoid noise.
+
+Arguments:
+
+=over
+
+=item C<dump>: Contents of dump file
+
+=item C<original>: 1 indicates that the given dump file is from the original
+database, else 0
+
+=back
+
+Returns the adjusted dump text.
+
+=cut
+
+sub adjust_regress_dumpfile
+{
+	my ($dump, $original) = @_;
+
+	# use Unix newlines
+	$dump =~ s/\r\n/\n/g;
+	# Suppress blank lines, as some places in pg_dump emit more or fewer.
+	$dump =~ s/\n\n+/\n/g;
+
+	# Adjust the CREATE TABLE ... INHERITS statements.
+	if ($original)
+	{
+		$dump =~ s/(^CREATE\sTABLE\spublic\.gtestxx_4\s\()
+				   (\n\s+b\sinteger),
+				   (\n\s+a\sinteger)/$1$3,$2/mgx;
+		$dump =~ s/(^CREATE\sTABLE\spublic\.test_type_diff2_c1\s\()
+				   (\n\s+int_four\sbigint),
+				   (\n\s+int_eight\sbigint),
+				   (\n\s+int_two\ssmallint)/$1$4,$2,$3/mgx;
+		$dump =~ s/(CREATE\sTABLE\spublic\.test_type_diff2_c2\s\()
+				   (\n\s+int_eight\sbigint),
+				   (\n\s+int_two\ssmallint),
+				   (\n\s+int_four\sbigint)/$1$3,$4,$2/mgx;
+	}
+
+	return $dump;
+}
+
+=pod
+
+=back
+
+=cut
+
+1;
-- 
2.34.1

