From 187d84a0ffc94fb9d5c9c0f6708227cc8f47fa3c Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horiguchi.kyotaro@lab.ntt.co.jp>
Date: Thu, 25 Oct 2018 16:48:47 +0900
Subject: [PATCH 1/2] Make pg_verify_checksums conscious of unknown files

---
 src/bin/pg_verify_checksums/pg_verify_checksums.c | 225 +++++++++++++++++-----
 1 file changed, 179 insertions(+), 46 deletions(-)

diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c
index f0e09bea20..4b527913c1 100644
--- a/src/bin/pg_verify_checksums/pg_verify_checksums.c
+++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c
@@ -26,6 +26,9 @@
 static int64 files = 0;
 static int64 blocks = 0;
 static int64 badblocks = 0;
+static int64 skipped_known = 0;
+static int64 skipped_unknown = 0;
+static int64 skipped_dirs = 0;
 static ControlFileData *ControlFile;
 
 static char *only_relfilenode = NULL;
@@ -33,6 +36,46 @@ static bool verbose = false;
 
 static const char *progname;
 
+/* struct for checksum verification paremter*/
+typedef struct
+{
+	union
+	{
+		struct
+		{
+			BlockNumber	segmentno;
+		} heap_param;
+	} params;
+} checksum_scan_context;
+
+/* enum for return value of find_file_type */
+typedef enum
+{
+	ENTRY_TO_IGNORE,
+	DIR_TO_SCAN,
+	HEAP_TO_SCAN,
+	FILE_TO_SKIP,
+	DIR_TO_SKIP,
+	FILE_UNKNOWN
+} checksum_file_types;
+
+/* black (explisit exclusion) list for checksum verification */
+static const char *const checksum_known_to_skip[] = {
+	"pg_control",
+ 	"pg_internal.init",
+	"pg_filenode.map",
+	"PG_VERSION",
+	"config_exec_params",
+	"config_exec_params.new",
+	"pgsql_tmp",					/* this is a directory */
+	NULL
+};
+
+static checksum_file_types find_file_type(const char *fn,
+										  const char *relfilenode,
+										  checksum_scan_context *ctx);
+
+
 static void
 usage(void)
 {
@@ -116,11 +159,12 @@ isRelFileName(const char *fn)
 }
 
 static void
-scan_file(const char *fn, BlockNumber segmentno)
+scan_heap_file(const char *fn, checksum_scan_context *ctx)
 {
 	PGAlignedBlock buf;
 	PageHeader	header = (PageHeader) buf.data;
 	int			f;
+	BlockNumber segmentno = ctx->params.heap_param.segmentno;
 	BlockNumber blockno;
 
 	f = open(fn, O_RDONLY | PG_BINARY, 0);
@@ -187,63 +231,147 @@ scan_directory(const char *basedir, const char *subdir)
 	while ((de = readdir(dir)) != NULL)
 	{
 		char		fn[MAXPGPATH];
-		struct stat st;
-
-		if (!isRelFileName(de->d_name))
-			continue;
+		checksum_scan_context ctx;
 
 		snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
-		if (lstat(fn, &st) < 0)
+		switch (find_file_type(fn, only_relfilenode, &ctx))
 		{
-			fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
-					progname, fn, strerror(errno));
-			exit(1);
+		case ENTRY_TO_IGNORE:
+			continue;		/* ignore completely silently */
+		case FILE_TO_SKIP:
+			if (verbose)
+				fprintf(stderr, "skipped file: %s/%s/%s\n",
+						basedir, subdir, de->d_name);
+			skipped_known++;
+			continue;
+		case DIR_TO_SKIP:
+			if (verbose)
+				fprintf(stderr, "skipped directory: %s/%s/%s\n",
+						basedir, subdir, de->d_name);
+			skipped_dirs++;
+			continue;
+		case FILE_UNKNOWN:
+			if (verbose)
+				fprintf(stderr, "skipped unknown file: %s/%s/%s\n",
+						basedir, subdir, de->d_name);
+			skipped_unknown++;
+			continue;
+		case HEAP_TO_SCAN:
+			scan_heap_file(fn, &ctx);
+			break;
+		case DIR_TO_SCAN:
+			scan_directory(path, de->d_name);
+			break;
 		}
-		if (S_ISREG(st.st_mode))
+	}
+
+	closedir(dir);
+}
+
+/*
+ * find_file_type: identify what to do on a file
+ *
+ * fn is a file path in full path or relative down from the current directory.
+ * relfilenode is filter string of file. Only specified files of node number or
+ * databaseid/filenodenum will be verified checksum.
+ * ctx is the parameter needed for following checksum scan.
+ */
+static checksum_file_types
+find_file_type(const char *fn, const char *relfilenode,
+			   checksum_scan_context *ctx)
+{
+	struct stat st;
+	char		fnonly[MAXPGPATH];
+	const char *fname;
+	char	   *forkpath;
+	char	   *segmentpath;
+	const char *const *p;
+	bool		is_subdir = false;
+
+	/* find file name the full path */
+	fname = strrchr(fn, '/');
+	if (fname)
+		fname++;
+	else
+		fname = fn;
+
+	if (strcmp(fname, ".") == 0 ||
+		strcmp(fname, "..") == 0)
+		return ENTRY_TO_IGNORE;
+
+	if (lstat(fn, &st) < 0)
+	{
+		fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
+				progname, fn, strerror(errno));
+		exit(1);
+	}
+
+#ifndef WIN32
+	if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
+#else
+	if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn))
+#endif
+		is_subdir = true;
+
+	/* exluded by blacklist */
+
+	for (p = checksum_known_to_skip ; *p ; p++)
+	{
+		if (strcmp(*p, fname) != 0)
+			continue;
+
+		if (!is_subdir)
+			return FILE_TO_SKIP;
+		else
+			return DIR_TO_SKIP;
+	}
+
+	if (is_subdir)
+		return DIR_TO_SCAN;
+
+	/* we now know only of relfiles */
+	if (isRelFileName(fname))
+	{
+		/* copy the path so that we can scribble on it */
+		strlcpy(fnonly, fn, sizeof(fnonly));
+		ctx->params.heap_param.segmentno = 0;
+		segmentpath = strchr(fnonly, '.');
+
+		/* make sure that the dot is in the last segment in the path  */
+		if (segmentpath != NULL && strchr(segmentpath, '/') == NULL)
 		{
-			char		fnonly[MAXPGPATH];
-			char	   *forkpath,
-					   *segmentpath;
-			BlockNumber segmentno = 0;
+			*segmentpath++ = '\0';
+			ctx->params.heap_param.segmentno = atoi(segmentpath);
 
-			/*
-			 * Cut off at the segment boundary (".") to get the segment number
-			 * in order to mix it into the checksum. Then also cut off at the
-			 * fork boundary, to get the relfilenode the file belongs to for
-			 * filtering.
-			 */
-			strlcpy(fnonly, de->d_name, sizeof(fnonly));
-			segmentpath = strchr(fnonly, '.');
-			if (segmentpath != NULL)
-			{
-				*segmentpath++ = '\0';
-				segmentno = atoi(segmentpath);
-				if (segmentno == 0)
-				{
-					fprintf(stderr, _("%s: invalid segment number %d in file name \"%s\"\n"),
-							progname, segmentno, fn);
-					exit(1);
-				}
-			}
+			/* something's wrong, treat it as unknown file  */
+			if (ctx->params.heap_param.segmentno == 0)
+				return FILE_UNKNOWN;
+		}
+	
+		if (only_relfilenode)
+		{
+			char *p;
 
-			forkpath = strchr(fnonly, '_');
-			if (forkpath != NULL)
+			/* find file suffix if any */
+			forkpath = strrchr(fnonly, '_');
+
+			/* the underscore must be in the last segment in the path */
+			if (forkpath != NULL && strchr(forkpath, '/') == NULL)
 				*forkpath++ = '\0';
 
-			if (only_relfilenode && strcmp(only_relfilenode, fnonly) != 0)
+			/* make a tail match with only_relfilenode */
+			p = fnonly + strlen(fnonly) - strlen(relfilenode);
+			if (fnonly > p ||					 /* cannot match*/
+				(fnonly < p && *(p-1) != '/') || /* avoid false match */
+				strcmp(relfilenode, p) != 0)
 				/* Relfilenode not to be included */
-				continue;
-
-			scan_file(fn, segmentno);
+				return FILE_TO_SKIP;
 		}
-#ifndef WIN32
-		else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
-#else
-		else if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn))
-#endif
-			scan_directory(path, de->d_name);
+
+		return HEAP_TO_SCAN;
 	}
-	closedir(dir);
+
+	return FILE_UNKNOWN;
 }
 
 int
@@ -359,6 +487,11 @@ main(int argc, char *argv[])
 	printf(_("Files scanned:  %s\n"), psprintf(INT64_FORMAT, files));
 	printf(_("Blocks scanned: %s\n"), psprintf(INT64_FORMAT, blocks));
 	printf(_("Bad checksums:  %s\n"), psprintf(INT64_FORMAT, badblocks));
+	printf(_("Files skipped: %s\n"),  psprintf(INT64_FORMAT, skipped_known));
+	printf(_("Unknown files skipped: %s\n"),
+		   psprintf(INT64_FORMAT, skipped_unknown));
+	printf(_("Skipped directories: %s\n"),
+		   psprintf(INT64_FORMAT, skipped_dirs));
 
 	if (badblocks > 0)
 		return 1;
-- 
2.16.3

