From 9ac82aebd2de8d3821633fcbc0838bd872f2e423 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Mon, 5 Feb 2018 15:48:35 +0900
Subject: [PATCH 5/5] Use filtering list of base backups in pg_rewind to
 exclude more content

After being rewound, a standby to-be-recycled needs to perform recovery
from the last checkpoint where WAL forked after a promotion, which leads
it to automatically remove some files which may have been copied from
the source cluster. Make use of the same filtering list as base backups
to find out what is this data and then remove it. This reduces the
amount of data transferred during a rewind without changing the
usefulness of the operation. This takes advantage of the
newly-introduced basebackup_paths.h to allow pg_rewind to have access to
this data.

Documentation is updated to take into account what is filtered out.
---
 doc/src/sgml/ref/pg_rewind.sgml | 12 ++++++++-
 src/bin/pg_rewind/filemap.c     | 60 +++++++++++++++++++++++++++++++++++------
 2 files changed, 63 insertions(+), 9 deletions(-)

diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 8e49249826..ccd0ae505e 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -231,7 +231,17 @@ PostgreSQL documentation
      <para>
       Copy all other files such as <filename>pg_xact</filename> and
       configuration files from the source cluster to the target cluster
-      (everything except the relation files).
+      (everything except the relation files). Similarly to base backups,
+      the contents of the directories <filename>pg_dynshmem/</filename>,
+      <filename>pg_notify/</filename>, <filename>pg_replslot/</filename>,
+      <filename>pg_serial/</filename>, <filename>pg_snapshots/</filename>,
+      <filename>pg_stat_tmp/</filename>, and
+      <filename>pg_subtrans/</filename> are omitted from the data copied
+      from the source cluster. Any file or directory beginning with
+      <filename>pgsql_tmp</filename> is omitted, as well as are
+      <filename>pg_internal.init</filename>,
+      <filename>postmaster.opts</filename> and
+      <filename>postmaster.pid</filename>.
      </para>
     </step>
     <step>
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 1c462c1041..0a6426e6c2 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -21,6 +21,7 @@
 #include "common/string.h"
 #include "catalog/pg_tablespace.h"
 #include "pg_paths.h"
+#include "replication/basebackup_paths.h"
 #include "storage/fd.h"
 
 filemap_t  *filemap = NULL;
@@ -68,15 +69,37 @@ process_source_file(const char *path, file_type_t type, size_t newsize,
 	file_action_t action = FILE_ACTION_NONE;
 	size_t		oldsize = 0;
 	file_entry_t *entry;
+	int			excludeIdx;
 
 	Assert(map->array == NULL);
 
 	/*
-	 * Completely ignore some special files in source and destination.
+	 * Completely ignore some special files in source and destination.  This
+	 * filters willingly any files matching an entry in the list of files to
+	 * filter out.
 	 */
-	if (strcmp(path, POSTMASTER_PID_FILE) == 0 ||
-		strcmp(path, POSTMASTER_OPTS_FILE) == 0)
-		return;
+	for (excludeIdx = 0; backupExcludeFiles[excludeIdx] != NULL; excludeIdx++)
+	{
+		if (strstr(path, backupExcludeFiles[excludeIdx]) != NULL)
+		{
+			pg_log(PG_DEBUG, "file \"%s\" excluded from source file list\n",
+				   path);
+			return;
+		}
+	}
+
+	/*
+	 * ... And ignore some directories.
+	 */
+	for (excludeIdx = 0; backupExcludeDirs[excludeIdx] != NULL; excludeIdx++)
+	{
+		if (strcmp(path, backupExcludeDirs[excludeIdx]) == 0)
+		{
+			pg_log(PG_DEBUG, "directory \"%s\" excluded from source file list\n",
+				   path);
+			return;
+		}
+	}
 
 	/*
 	 * Pretend that pg_wal is a directory, even if it's really a symlink. We
@@ -259,6 +282,7 @@ process_target_file(const char *path, file_type_t type, size_t oldsize,
 	file_entry_t *key_ptr;
 	filemap_t  *map = filemap;
 	file_entry_t *entry;
+	int			excludeIdx;
 
 	snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
 	if (lstat(localpath, &statbuf) < 0)
@@ -287,11 +311,31 @@ process_target_file(const char *path, file_type_t type, size_t oldsize,
 	}
 
 	/*
-	 * Completely ignore some special files
+	 * Completely ignore some special files.  This filters willingly any files
+	 * matching an entry in the list of files to filter out.
 	 */
-	if (strcmp(path, POSTMASTER_PID_FILE) == 0 ||
-		strcmp(path, POSTMASTER_OPTS_FILE) == 0)
-		return;
+	for (excludeIdx = 0; backupExcludeFiles[excludeIdx] != NULL; excludeIdx++)
+	{
+		if (strstr(path, backupExcludeFiles[excludeIdx]) != NULL)
+		{
+			pg_log(PG_DEBUG, "file \"%s\" excluded from target file list\n",
+				   path);
+			return;
+		}
+	}
+
+	/*
+	 * ... And ignore some directories.
+	 */
+	for (excludeIdx = 0; backupExcludeDirs[excludeIdx] != NULL; excludeIdx++)
+	{
+		if (strcmp(path, backupExcludeDirs[excludeIdx]) == 0)
+		{
+			pg_log(PG_DEBUG, "directory \"%s\" excluded from target file list\n",
+				   path);
+			return;
+		}
+	}
 
 	/*
 	 * Like in process_source_file, pretend that xlog is always a  directory.
-- 
2.16.1

