From fbd412df74ff6ece7cc940668d4bdc25bd3ab77b Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Wed, 28 Nov 2018 09:51:49 +0900
Subject: [PATCH 2/2] Fix set of issues with pg_verify_checksums and base
 backups

Three issues are fixed in this patch:
- Base backups forgot to ignore files specific to EXEC_BACKEND, leading
to spurious warnings when checksums are enabled, per analysis from me.
- pg_verify_checksums forgot about files specific to EXEC_BACKEND,
leading to failures of the tool on any such build, particularly
Windows, error found by newly-introduced TAP tests, by various buildfarm
members.
- pg_verify_checksums forgot to count for temporary files and temporary
paths, which could be valid relation files, without checksums, per
report from Andres Freund.  Some tests are added to cover this case.

A new test case which emulates corruption for a file in a different
tablespace is added, coming from from Michael Banck, while I have coded
the main code.

Author: Michael Banck, Michael Paquier
Discussion: https://postgr.es/m/XXX
---
 src/backend/replication/basebackup.c          |   4 +
 .../pg_verify_checksums/pg_verify_checksums.c |  28 ++++-
 src/bin/pg_verify_checksums/t/002_actions.pl  | 100 +++++++++++++-----
 3 files changed, 101 insertions(+), 31 deletions(-)

diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index a7e3db2783..a7b7ab9d09 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -195,6 +195,10 @@ static const char *const noChecksumFiles[] = {
 	"pg_filenode.map",
 	"pg_internal.init",
 	"PG_VERSION",
+#ifdef EXEC_BACKEND
+	"config_exec_params",
+	"config_exec_params.new",
+#endif
 	NULL,
 };
 
diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c
index 1bc020ab6c..5963053e00 100644
--- a/src/bin/pg_verify_checksums/pg_verify_checksums.c
+++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c
@@ -20,6 +20,7 @@
 #include "storage/bufpage.h"
 #include "storage/checksum.h"
 #include "storage/checksum_impl.h"
+#include "storage/fd.h"
 
 
 static int64 files = 0;
@@ -54,6 +55,10 @@ static const char *const skip[] = {
 	"pg_filenode.map",
 	"pg_internal.init",
 	"PG_VERSION",
+#ifdef EXEC_BACKEND
+	"config_exec_params",
+	"config_exec_params.new",
+#endif
 	NULL,
 };
 
@@ -62,13 +67,10 @@ skipfile(const char *fn)
 {
 	const char *const *f;
 
-	if (strcmp(fn, ".") == 0 ||
-		strcmp(fn, "..") == 0)
-		return true;
-
 	for (f = skip; *f; f++)
 		if (strcmp(*f, fn) == 0)
 			return true;
+
 	return false;
 }
 
@@ -146,9 +148,22 @@ scan_directory(const char *basedir, const char *subdir)
 		char		fn[MAXPGPATH];
 		struct stat st;
 
-		if (skipfile(de->d_name))
+		if (strcmp(de->d_name, ".") == 0 ||
+			strcmp(de->d_name, "..") == 0)
 			continue;
 
+		/* Skip temporary files */
+		if (strncmp(de->d_name,
+					PG_TEMP_FILE_PREFIX,
+					strlen(PG_TEMP_FILE_PREFIX)) == 0)
+			continue;
+
+		/* Skip temporary folders */
+		if (strncmp(de->d_name,
+					PG_TEMP_FILES_DIR,
+					strlen(PG_TEMP_FILES_DIR)) == 0)
+			return;
+
 		snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
 		if (lstat(fn, &st) < 0)
 		{
@@ -163,6 +178,9 @@ scan_directory(const char *basedir, const char *subdir)
 					   *segmentpath;
 			BlockNumber segmentno = 0;
 
+			if (skipfile(de->d_name))
+				continue;
+
 			/*
 			 * Cut off at the segment boundary (".") to get the segment number
 			 * in order to mix it into the checksum. Then also cut off at the
diff --git a/src/bin/pg_verify_checksums/t/002_actions.pl b/src/bin/pg_verify_checksums/t/002_actions.pl
index c640cce260..bea8701fe2 100644
--- a/src/bin/pg_verify_checksums/t/002_actions.pl
+++ b/src/bin/pg_verify_checksums/t/002_actions.pl
@@ -5,7 +5,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 36;
+use Test::More tests => 42;
 
 # Initialize node with checksums enabled.
 my $node = get_new_node('node_checksum');
@@ -27,6 +27,12 @@ append_to_file "$pgdata/global/99999_init.123", "";
 append_to_file "$pgdata/global/99999_fsm.123", "";
 append_to_file "$pgdata/global/99999_vm.123", "";
 
+# There are temporary files and folders with dummy contents, which
+# should be ignored by the scan.
+append_to_file "$pgdata/global/pgsql_tmp_123", "foo";
+mkdir "$pgdata/global/pgsql_tmp";
+append_to_file "$pgdata/global/pgsql_tmp/1.1", "foo";
+
 # Checksums pass on a newly-created cluster
 command_ok(['pg_verify_checksums',  '-D', $pgdata],
 		   "succeeds with offline cluster");
@@ -37,31 +43,7 @@ command_fails(['pg_verify_checksums',  '-D', $pgdata],
 			  "fails with online cluster");
 
 # Create table to corrupt and get its relfilenode
-$node->safe_psql('postgres',
-	"SELECT a INTO corrupt1 FROM generate_series(1,10000) AS a;
-	ALTER TABLE corrupt1 SET (autovacuum_enabled=false);");
-
-my $file_corrupted = $node->safe_psql('postgres',
-	"SELECT pg_relation_filepath('corrupt1')");
-my $relfilenode_corrupted =  $node->safe_psql('postgres',
-	"SELECT relfilenode FROM pg_class WHERE relname = 'corrupt1';");
-
-# Set page header and block size
-my $pageheader_size = 24;
-my $block_size = $node->safe_psql('postgres', 'SHOW block_size;');
-$node->stop;
-
-# Checksums are correct for single relfilenode as the table is not
-# corrupted yet.
-command_ok(['pg_verify_checksums',  '-D', $pgdata,
-	'-r', $relfilenode_corrupted],
-	"succeeds for single relfilenode with offline cluster");
-
-# Time to create some corruption
-open my $file, '+<', "$pgdata/$file_corrupted";
-seek($file, $pageheader_size, 0);
-syswrite($file, '\0\0\0\0\0\0\0\0\0');
-close $file;
+my $relfilenode_corrupted = create_corruption($node, 'corrupt1', 'pg_default');
 
 # Global checksum checks fail
 $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata],
@@ -78,6 +60,72 @@ $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata, '-r',
 						  [qr/checksum verification failed/],
 						  'fails for corrupted data on single relfilenode');
 
+# Drop corrupt table again and make sure there is no more corruption
+$node->start;
+$node->safe_psql('postgres', 'DROP TABLE corrupt1;');
+$node->stop;
+$node->command_ok(['pg_verify_checksums', '-D', $pgdata],
+        'succeeds again: '.$node->data_dir);
+
+# Create table to corrupt in a non-default tablespace and get its relfilenode
+my $tablespace_dir = $node->data_dir."/../ts_corrupt_dir";
+mkdir ($tablespace_dir);
+$node->start;
+$node->safe_psql('postgres', "CREATE TABLESPACE ts_corrupt LOCATION '".$tablespace_dir."';");
+$relfilenode_corrupted = create_corruption($node, 'corrupt2', 'ts_corrupt');
+$node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata],
+						  1,
+						  [qr/Bad checksums:.*1/],
+						  [qr/checksum verification failed/],
+						  'fails with corrupted data in non-default tablespace');
+
+# Drop corrupt table again and make sure there is no more corruption
+$node->start;
+$node->safe_psql('postgres', 'DROP TABLE corrupt2;');
+$node->stop;
+$node->command_ok(['pg_verify_checksums', '-D', $pgdata],
+        'succeeds again');
+
+# Utility routine to create a table with corrupted checksums.
+# It stops the node (if running), and starts it again.
+sub create_corruption
+{
+	my $node = shift;
+	my $table = shift;
+	my $tablespace = shift;
+
+	$node->safe_psql('postgres',
+		"SELECT a INTO ".$table." FROM generate_series(1,10000) AS a;
+		ALTER TABLE ".$table." SET (autovacuum_enabled=false);");
+
+	$node->safe_psql('postgres',
+		"ALTER TABLE ".$table." SET TABLESPACE ".$tablespace.";");
+
+	my $file_corrupted = $node->safe_psql('postgres',
+		"SELECT pg_relation_filepath('".$table."');");
+	my $relfilenode_corrupted =  $node->safe_psql('postgres',
+		"SELECT relfilenode FROM pg_class WHERE relname = '".$table."';");
+
+	# Set page header and block size
+	my $pageheader_size = 24;
+	my $block_size = $node->safe_psql('postgres', 'SHOW block_size;');
+	$node->stop;
+
+	# Checksums are correct for single relfilenode as the table is not
+	# corrupted yet.
+	command_ok(['pg_verify_checksums',  '-D', $pgdata,
+		'-r', $relfilenode_corrupted],
+		"succeeds for single relfilenode with offline cluster");
+
+	# Time to create some corruption
+	open my $file, '+<', "$pgdata/$file_corrupted";
+	seek($file, $pageheader_size, 0);
+	syswrite($file, '\0\0\0\0\0\0\0\0\0');
+	close $file;
+
+	return $relfilenode_corrupted;
+}
+
 # Utility routine to check that pg_verify_checksums is able to detect
 # correctly-named relation files filled with some corrupted data.
 sub fail_corrupt
-- 
2.19.1

