From ecb401f0215e9e8c2b65162a5a4941c35038ba68 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Wed, 19 Aug 2020 15:34:37 +0300
Subject: [PATCH v2 2/5] Refactor pg_rewind for more clear decision making.

Deciding what to do with each file is now a separate step after all the
necessary information has been gathered. It is more clear that way.
Previously, the decision-making was divided between process_source_file()
and process_target_file(), and it was a bit hard to piece together what the
overall rules were.
---
 src/bin/pg_rewind/copy_fetch.c  |  14 +-
 src/bin/pg_rewind/file_ops.c    |  16 +-
 src/bin/pg_rewind/filemap.c     | 569 ++++++++++++++++----------------
 src/bin/pg_rewind/filemap.h     |  67 ++--
 src/bin/pg_rewind/libpq_fetch.c |  12 +-
 src/bin/pg_rewind/parsexlog.c   |   2 +-
 src/bin/pg_rewind/pg_rewind.c   |   8 +-
 7 files changed, 371 insertions(+), 317 deletions(-)

diff --git a/src/bin/pg_rewind/copy_fetch.c b/src/bin/pg_rewind/copy_fetch.c
index 1edab5f1867..18fad32600e 100644
--- a/src/bin/pg_rewind/copy_fetch.c
+++ b/src/bin/pg_rewind/copy_fetch.c
@@ -210,7 +210,7 @@ copy_executeFileMap(filemap_t *map)
 	for (i = 0; i < map->narray; i++)
 	{
 		entry = map->array[i];
-		execute_pagemap(&entry->pagemap, entry->path);
+		execute_pagemap(&entry->target_modified_pages, entry->path);
 
 		switch (entry->action)
 		{
@@ -219,16 +219,16 @@ copy_executeFileMap(filemap_t *map)
 				break;
 
 			case FILE_ACTION_COPY:
-				rewind_copy_file_range(entry->path, 0, entry->newsize, true);
+				rewind_copy_file_range(entry->path, 0, entry->source_size, true);
 				break;
 
 			case FILE_ACTION_TRUNCATE:
-				truncate_target_file(entry->path, entry->newsize);
+				truncate_target_file(entry->path, entry->source_size);
 				break;
 
 			case FILE_ACTION_COPY_TAIL:
-				rewind_copy_file_range(entry->path, entry->oldsize,
-									   entry->newsize, false);
+				rewind_copy_file_range(entry->path, entry->target_size,
+									   entry->source_size, false);
 				break;
 
 			case FILE_ACTION_CREATE:
@@ -238,6 +238,10 @@ copy_executeFileMap(filemap_t *map)
 			case FILE_ACTION_REMOVE:
 				remove_target(entry);
 				break;
+
+			case FILE_ACTION_UNDECIDED:
+				pg_fatal("no action decided for \"%s\"", entry->path);
+				break;
 		}
 	}
 
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 55439db20ba..ec37d0b2e0d 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -126,8 +126,9 @@ void
 remove_target(file_entry_t *entry)
 {
 	Assert(entry->action == FILE_ACTION_REMOVE);
+	Assert(entry->target_exists);
 
-	switch (entry->type)
+	switch (entry->target_type)
 	{
 		case FILE_TYPE_DIRECTORY:
 			remove_target_dir(entry->path);
@@ -140,6 +141,10 @@ remove_target(file_entry_t *entry)
 		case FILE_TYPE_SYMLINK:
 			remove_target_symlink(entry->path);
 			break;
+
+		case FILE_TYPE_UNDEFINED:
+			pg_fatal("undefined file type for \"%s\"", entry->path);
+			break;
 	}
 }
 
@@ -147,21 +152,26 @@ void
 create_target(file_entry_t *entry)
 {
 	Assert(entry->action == FILE_ACTION_CREATE);
+	Assert(!entry->target_exists);
 
-	switch (entry->type)
+	switch (entry->source_type)
 	{
 		case FILE_TYPE_DIRECTORY:
 			create_target_dir(entry->path);
 			break;
 
 		case FILE_TYPE_SYMLINK:
-			create_target_symlink(entry->path, entry->link_target);
+			create_target_symlink(entry->path, entry->source_link_target);
 			break;
 
 		case FILE_TYPE_REGULAR:
 			/* can't happen. Regular files are created with open_target_file. */
 			pg_fatal("invalid action (CREATE) for regular file");
 			break;
+
+		case FILE_TYPE_UNDEFINED:
+			pg_fatal("undefined file type for \"%s\"", entry->path);
+			break;
 	}
 }
 
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 1879257b66a..7971daeda5e 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -26,6 +26,8 @@ static bool isRelDataFile(const char *path);
 static char *datasegpath(RelFileNode rnode, ForkNumber forknum,
 						 BlockNumber segno);
 static int	path_cmp(const void *a, const void *b);
+
+static file_entry_t *get_filemap_entry(const char *path, bool create);
 static int	final_filemap_cmp(const void *a, const void *b);
 static void filemap_list_to_array(filemap_t *map);
 static bool check_file_excluded(const char *path, bool is_source);
@@ -146,6 +148,65 @@ filemap_create(void)
 	filemap = map;
 }
 
+/* Look up or create entry for 'path' */
+static file_entry_t *
+get_filemap_entry(const char *path, bool create)
+{
+	filemap_t  *map = filemap;
+	file_entry_t *entry;
+	file_entry_t **e;
+	file_entry_t key;
+	file_entry_t *key_ptr;
+
+	if (map->array)
+	{
+		key.path = (char *) path;
+		key_ptr = &key;
+		e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
+					path_cmp);
+	}
+	else
+		e = NULL;
+
+	if (e)
+		entry = *e;
+	else if (!create)
+		entry = NULL;
+	else
+	{
+		/* Create a new entry for this file */
+		entry = pg_malloc(sizeof(file_entry_t));
+		entry->path = pg_strdup(path);
+		entry->isrelfile = isRelDataFile(path);
+		entry->action = FILE_ACTION_UNDECIDED;
+
+		entry->target_exists = false;
+		entry->target_type = FILE_TYPE_UNDEFINED;
+		entry->target_size = 0;
+		entry->target_link_target = NULL;
+		entry->target_modified_pages.bitmap = NULL;
+		entry->target_modified_pages.bitmapsize = 0;
+
+		entry->source_exists = false;
+		entry->source_type = FILE_TYPE_UNDEFINED;
+		entry->source_size = 0;
+		entry->source_link_target = NULL;
+
+		entry->next = NULL;
+
+		if (map->last)
+		{
+			map->last->next = entry;
+			map->last = entry;
+		}
+		else
+			map->first = map->last = entry;
+		map->nlist++;
+	}
+
+	return entry;
+}
+
 /*
  * Callback for processing source file list.
  *
@@ -154,25 +215,12 @@ filemap_create(void)
  * exists in the target and whether the size matches.
  */
 void
-process_source_file(const char *path, file_type_t type, size_t newsize,
+process_source_file(const char *path, file_type_t type, size_t size,
 					const char *link_target)
 {
-	bool		exists;
-	char		localpath[MAXPGPATH];
-	struct stat statbuf;
-	filemap_t  *map = filemap;
-	file_action_t action = FILE_ACTION_NONE;
-	size_t		oldsize = 0;
 	file_entry_t *entry;
 
-	Assert(map->array == NULL);
-
-	/*
-	 * Skip any files matching the exclusion filters. This has the effect to
-	 * remove all those files on the target.
-	 */
-	if (check_file_excluded(path, true))
-		return;
+	Assert(filemap->array == NULL);
 
 	/*
 	 * Pretend that pg_wal is a directory, even if it's really a symlink. We
@@ -182,16 +230,6 @@ process_source_file(const char *path, file_type_t type, size_t newsize,
 	if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
 		type = FILE_TYPE_DIRECTORY;
 
-	/*
-	 * Skip temporary files, .../pgsql_tmp/... and .../pgsql_tmp.* in source.
-	 * This has the effect that all temporary files in the destination will be
-	 * removed.
-	 */
-	if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL)
-		return;
-	if (strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
-		return;
-
 	/*
 	 * sanity check: a filename that looks like a data file better be a
 	 * regular file
@@ -199,142 +237,12 @@ process_source_file(const char *path, file_type_t type, size_t newsize,
 	if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
 		pg_fatal("data file \"%s\" in source is not a regular file", path);
 
-	snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
-
-	/* Does the corresponding file exist in the target data dir? */
-	if (lstat(localpath, &statbuf) < 0)
-	{
-		if (errno != ENOENT)
-			pg_fatal("could not stat file \"%s\": %m",
-					 localpath);
-
-		exists = false;
-	}
-	else
-		exists = true;
-
-	switch (type)
-	{
-		case FILE_TYPE_DIRECTORY:
-			if (exists && !S_ISDIR(statbuf.st_mode) && strcmp(path, "pg_wal") != 0)
-			{
-				/* it's a directory in source, but not in target. Strange.. */
-				pg_fatal("\"%s\" is not a directory", localpath);
-			}
-
-			if (!exists)
-				action = FILE_ACTION_CREATE;
-			else
-				action = FILE_ACTION_NONE;
-			oldsize = 0;
-			break;
-
-		case FILE_TYPE_SYMLINK:
-			if (exists &&
-#ifndef WIN32
-				!S_ISLNK(statbuf.st_mode)
-#else
-				!pgwin32_is_junction(localpath)
-#endif
-				)
-			{
-				/*
-				 * It's a symbolic link in source, but not in target.
-				 * Strange..
-				 */
-				pg_fatal("\"%s\" is not a symbolic link", localpath);
-			}
-
-			if (!exists)
-				action = FILE_ACTION_CREATE;
-			else
-				action = FILE_ACTION_NONE;
-			oldsize = 0;
-			break;
-
-		case FILE_TYPE_REGULAR:
-			if (exists && !S_ISREG(statbuf.st_mode))
-				pg_fatal("\"%s\" is not a regular file", localpath);
-
-			if (!exists || !isRelDataFile(path))
-			{
-				/*
-				 * File exists in source, but not in target. Or it's a
-				 * non-data file that we have no special processing for. Copy
-				 * it in toto.
-				 *
-				 * An exception: PG_VERSIONs should be identical, but avoid
-				 * overwriting it for paranoia.
-				 */
-				if (pg_str_endswith(path, "PG_VERSION"))
-				{
-					action = FILE_ACTION_NONE;
-					oldsize = statbuf.st_size;
-				}
-				else
-				{
-					action = FILE_ACTION_COPY;
-					oldsize = 0;
-				}
-			}
-			else
-			{
-				/*
-				 * It's a data file that exists in both.
-				 *
-				 * If it's larger in target, we can truncate it. There will
-				 * also be a WAL record of the truncation in the source
-				 * system, so WAL replay would eventually truncate the target
-				 * too, but we might as well do it now.
-				 *
-				 * If it's smaller in the target, it means that it has been
-				 * truncated in the target, or enlarged in the source, or
-				 * both. If it was truncated in the target, we need to copy
-				 * the missing tail from the source system. If it was enlarged
-				 * in the source system, there will be WAL records in the
-				 * source system for the new blocks, so we wouldn't need to
-				 * copy them here. But we don't know which scenario we're
-				 * dealing with, and there's no harm in copying the missing
-				 * blocks now, so do it now.
-				 *
-				 * If it's the same size, do nothing here. Any blocks modified
-				 * in the target will be copied based on parsing the target
-				 * system's WAL, and any blocks modified in the source will be
-				 * updated after rewinding, when the source system's WAL is
-				 * replayed.
-				 */
-				oldsize = statbuf.st_size;
-				if (oldsize < newsize)
-					action = FILE_ACTION_COPY_TAIL;
-				else if (oldsize > newsize)
-					action = FILE_ACTION_TRUNCATE;
-				else
-					action = FILE_ACTION_NONE;
-			}
-			break;
-	}
-
-	/* Create a new entry for this file */
-	entry = pg_malloc(sizeof(file_entry_t));
-	entry->path = pg_strdup(path);
-	entry->type = type;
-	entry->action = action;
-	entry->oldsize = oldsize;
-	entry->newsize = newsize;
-	entry->link_target = link_target ? pg_strdup(link_target) : NULL;
-	entry->next = NULL;
-	entry->pagemap.bitmap = NULL;
-	entry->pagemap.bitmapsize = 0;
-	entry->isrelfile = isRelDataFile(path);
-
-	if (map->last)
-	{
-		map->last->next = entry;
-		map->last = entry;
-	}
-	else
-		map->first = map->last = entry;
-	map->nlist++;
+	/* Remember this source file */
+	entry = get_filemap_entry(path, true);
+	entry->source_exists = true;
+	entry->source_type = type;
+	entry->source_size = size;
+	entry->source_link_target = link_target ? pg_strdup(link_target) : NULL;
 }
 
 /*
@@ -345,14 +253,9 @@ process_source_file(const char *path, file_type_t type, size_t newsize,
  * deletion.
  */
 void
-process_target_file(const char *path, file_type_t type, size_t oldsize,
+process_target_file(const char *path, file_type_t type, size_t size,
 					const char *link_target)
 {
-	bool		exists;
-	char		localpath[MAXPGPATH];
-	struct stat statbuf;
-	file_entry_t key;
-	file_entry_t *key_ptr;
 	filemap_t  *map = filemap;
 	file_entry_t *entry;
 
@@ -361,17 +264,6 @@ process_target_file(const char *path, file_type_t type, size_t oldsize,
 	 * from the target data folder all paths which have been filtered out from
 	 * the source data folder when processing the source files.
 	 */
-
-	snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
-	if (lstat(localpath, &statbuf) < 0)
-	{
-		if (errno != ENOENT)
-			pg_fatal("could not stat file \"%s\": %m",
-					 localpath);
-
-		exists = false;
-	}
-
 	if (map->array == NULL)
 	{
 		/* on first call, initialize lookup array */
@@ -389,120 +281,76 @@ process_target_file(const char *path, file_type_t type, size_t oldsize,
 	}
 
 	/*
-	 * Like in process_source_file, pretend that xlog is always a  directory.
+	 * Like in process_source_file, pretend that pg_wal is always a directory.
 	 */
 	if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
 		type = FILE_TYPE_DIRECTORY;
 
-	key.path = (char *) path;
-	key_ptr = &key;
-	exists = (bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
-					  path_cmp) != NULL);
-
-	/* Remove any file or folder that doesn't exist in the source system. */
-	if (!exists)
-	{
-		entry = pg_malloc(sizeof(file_entry_t));
-		entry->path = pg_strdup(path);
-		entry->type = type;
-		entry->action = FILE_ACTION_REMOVE;
-		entry->oldsize = oldsize;
-		entry->newsize = 0;
-		entry->link_target = link_target ? pg_strdup(link_target) : NULL;
-		entry->next = NULL;
-		entry->pagemap.bitmap = NULL;
-		entry->pagemap.bitmapsize = 0;
-		entry->isrelfile = isRelDataFile(path);
-
-		if (map->last == NULL)
-			map->first = entry;
-		else
-			map->last->next = entry;
-		map->last = entry;
-		map->nlist++;
-	}
-	else
-	{
-		/*
-		 * We already handled all files that exist in the source system in
-		 * process_source_file().
-		 */
-	}
+	/* Remember this target file */
+	entry = get_filemap_entry(path, true);
+	entry->target_exists = true;
+	entry->target_type = type;
+	entry->target_size = size;
+	entry->target_link_target = link_target ? pg_strdup(link_target) : NULL;
 }
 
 /*
  * This callback gets called while we read the WAL in the target, for every
- * block that have changed in the target system. It makes note of all the
+ * block that have changed in the target system.  It makes note of all the
  * changed blocks in the pagemap of the file.
+ *
+ * NOTE: All the files on both systems must have already been added to the
+ * file map!
  */
 void
-process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno)
+process_target_wal_block_change(ForkNumber forknum, RelFileNode rnode,
+								BlockNumber blkno)
 {
 	char	   *path;
-	file_entry_t key;
-	file_entry_t *key_ptr;
 	file_entry_t *entry;
 	BlockNumber blkno_inseg;
 	int			segno;
-	filemap_t  *map = filemap;
-	file_entry_t **e;
 
-	Assert(map->array);
+	Assert(filemap->array);
 
 	segno = blkno / RELSEG_SIZE;
 	blkno_inseg = blkno % RELSEG_SIZE;
 
 	path = datasegpath(rnode, forknum, segno);
-
-	key.path = (char *) path;
-	key_ptr = &key;
-
-	e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
-				path_cmp);
-	if (e)
-		entry = *e;
-	else
-		entry = NULL;
+	entry = get_filemap_entry(path, false);
 	pfree(path);
 
 	if (entry)
 	{
-		Assert(entry->isrelfile);
-
-		switch (entry->action)
-		{
-			case FILE_ACTION_NONE:
-			case FILE_ACTION_TRUNCATE:
-				/* skip if we're truncating away the modified block anyway */
-				if ((blkno_inseg + 1) * BLCKSZ <= entry->newsize)
-					datapagemap_add(&entry->pagemap, blkno_inseg);
-				break;
+		int64		end_offset;
 
-			case FILE_ACTION_COPY_TAIL:
-
-				/*
-				 * skip the modified block if it is part of the "tail" that
-				 * we're copying anyway.
-				 */
-				if ((blkno_inseg + 1) * BLCKSZ <= entry->oldsize)
-					datapagemap_add(&entry->pagemap, blkno_inseg);
-				break;
+		Assert(entry->isrelfile);
 
-			case FILE_ACTION_COPY:
-			case FILE_ACTION_REMOVE:
-				break;
+		if (entry->target_type != FILE_TYPE_REGULAR)
+			pg_fatal("unexpected page modification for directory or symbolic link \"%s\"",
+					 entry->path);
 
-			case FILE_ACTION_CREATE:
-				pg_fatal("unexpected page modification for directory or symbolic link \"%s\"", entry->path);
-		}
+		/*
+		 * If the block beyond the EOF in the source system, no need to
+		 * remember it now, because we're going to truncate it away from the
+		 * target anyway. Also no need to remember the block if it's beyond
+		 * the current EOF in the target system; we will copy it over with the
+		 * "tail" from the source system, anyway.
+		 */
+		end_offset = (blkno_inseg + 1) * BLCKSZ;
+		if (end_offset <= entry->source_size &&
+			end_offset <= entry->target_size)
+			datapagemap_add(&entry->target_modified_pages, blkno_inseg);
 	}
 	else
 	{
 		/*
 		 * If we don't have any record of this file in the file map, it means
-		 * that it's a relation that doesn't exist in the source system, and
-		 * it was subsequently removed in the target system, too. We can
-		 * safely ignore it.
+		 * that it's a relation that doesn't exist in the source system.  It
+		 * could exist in the target system; we haven't moved the target-only
+		 * entries from the linked list to the array yet!  But in any case, if
+		 * it doesn't exist in the source it will be removed from the target
+		 * too, and we can safely ignore it.
 		 */
 	}
 }
@@ -593,16 +441,6 @@ filemap_list_to_array(filemap_t *map)
 	map->first = map->last = NULL;
 }
 
-void
-filemap_finalize(void)
-{
-	filemap_t  *map = filemap;
-
-	filemap_list_to_array(map);
-	qsort(map->array, map->narray, sizeof(file_entry_t *),
-		  final_filemap_cmp);
-}
-
 static const char *
 action_to_str(file_action_t action)
 {
@@ -643,26 +481,26 @@ calculate_totals(void)
 	{
 		entry = map->array[i];
 
-		if (entry->type != FILE_TYPE_REGULAR)
+		if (entry->source_type != FILE_TYPE_REGULAR)
 			continue;
 
-		map->total_size += entry->newsize;
+		map->total_size += entry->source_size;
 
 		if (entry->action == FILE_ACTION_COPY)
 		{
-			map->fetch_size += entry->newsize;
+			map->fetch_size += entry->source_size;
 			continue;
 		}
 
 		if (entry->action == FILE_ACTION_COPY_TAIL)
-			map->fetch_size += (entry->newsize - entry->oldsize);
+			map->fetch_size += (entry->source_size - entry->target_size);
 
-		if (entry->pagemap.bitmapsize > 0)
+		if (entry->target_modified_pages.bitmapsize > 0)
 		{
 			datapagemap_iterator_t *iter;
 			BlockNumber blk;
 
-			iter = datapagemap_iterate(&entry->pagemap);
+			iter = datapagemap_iterate(&entry->target_modified_pages);
 			while (datapagemap_next(iter, &blk))
 				map->fetch_size += BLCKSZ;
 
@@ -682,13 +520,13 @@ print_filemap(void)
 	{
 		entry = map->array[i];
 		if (entry->action != FILE_ACTION_NONE ||
-			entry->pagemap.bitmapsize > 0)
+			entry->target_modified_pages.bitmapsize > 0)
 		{
 			pg_log_debug("%s (%s)", entry->path,
 						 action_to_str(entry->action));
 
-			if (entry->pagemap.bitmapsize > 0)
-				datapagemap_print(&entry->pagemap);
+			if (entry->target_modified_pages.bitmapsize > 0)
+				datapagemap_print(&entry->target_modified_pages);
 		}
 	}
 	fflush(stdout);
@@ -837,3 +675,180 @@ final_filemap_cmp(const void *a, const void *b)
 	else
 		return strcmp(fa->path, fb->path);
 }
+
+/*
+ * Decide what action to perform to a file.
+ */
+static file_action_t
+decide_file_action(file_entry_t *entry)
+{
+	const char *path = entry->path;
+
+	/*
+	 * Don't touch the control file. It is handled specially, after copying
+	 * all the other files.
+	 */
+	if (strcmp(path, "global/pg_control") == 0)
+		return FILE_ACTION_NONE;
+
+	/*
+	 * Remove all files matching the exclusion filters in the target.
+	 */
+	if (check_file_excluded(path, true))
+	{
+		if (entry->target_exists)
+			return FILE_ACTION_REMOVE;
+		else
+			return FILE_ACTION_NONE;
+	}
+
+	/*
+	 * Also remove all temporary files, .../pgsql_tmp/... and .../pgsql_tmp.*
+	 * in the target.
+	 */
+	if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL ||
+		strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
+	{
+		if (entry->target_exists)
+			return FILE_ACTION_REMOVE;
+		else
+			return FILE_ACTION_NONE;
+	}
+
+	/*
+	 * Handle cases where the file is missing from one of the systems.
+	 */
+	if (!entry->target_exists && entry->source_exists)
+	{
+		/*
+		 * File exists in source, but not in target. Copy it in toto. (If it's
+		 * a relation data file, WAL replay after rewinding should re-create
+		 * it anyway. But there's no harm in copying it now.)
+		 */
+		switch (entry->source_type)
+		{
+			case FILE_TYPE_DIRECTORY:
+			case FILE_TYPE_SYMLINK:
+				return FILE_ACTION_CREATE;
+			case FILE_TYPE_REGULAR:
+				return FILE_ACTION_COPY;
+			case FILE_TYPE_UNDEFINED:
+				pg_fatal("unknown file type for \"%s\"", entry->path);
+				break;
+		}
+	}
+	else if (entry->target_exists && !entry->source_exists)
+	{
+		/* File exists in target, but not source. Remove it. */
+		return FILE_ACTION_REMOVE;
+	}
+	else if (!entry->target_exists && !entry->source_exists)
+	{
+		/*
+		 * Doesn't exist in either server. Why does it have an entry in the
+		 * first place??
+		 */
+		return FILE_ACTION_NONE;
+	}
+
+	/*
+	 * Otherwise, the file exists on both systems
+	 */
+	Assert(entry->target_exists && entry->source_exists);
+
+	if (entry->source_type != entry->target_type)
+	{
+		/* But it's a different kind of object. Strange.. */
+		pg_fatal("file \"%s\" is of different type in source and target", entry->path);
+	}
+
+	/*
+	 * PG_VERSION files should be identical on both systems, but avoid
+	 * overwriting them for paranoia.
+	 */
+	if (pg_str_endswith(entry->path, "PG_VERSION"))
+		return FILE_ACTION_NONE;
+
+	switch (entry->source_type)
+	{
+		case FILE_TYPE_DIRECTORY:
+			return FILE_ACTION_NONE;
+
+		case FILE_TYPE_SYMLINK:
+			/* FIXME: Check if it points to the same target? */
+			return FILE_ACTION_NONE;
+
+		case FILE_TYPE_REGULAR:
+			if (!entry->isrelfile)
+			{
+				/*
+				 * It's a non-data file that we have no special processing
+				 * for. Copy it in toto.
+				 */
+				return FILE_ACTION_COPY;
+			}
+			else
+			{
+				/*
+				 * It's a data file that exists in both systems.
+				 *
+				 * If it's larger in target, we can truncate it. There will
+				 * also be a WAL record of the truncation in the source
+				 * system, so WAL replay would eventually truncate the target
+				 * too, but we might as well do it now.
+				 *
+				 * If it's smaller in the target, it means that it has been
+				 * truncated in the target, or enlarged in the source, or
+				 * both. If it was truncated in the target, we need to copy
+				 * the missing tail from the source system. If it was enlarged
+				 * in the source system, there will be WAL records in the
+				 * source system for the new blocks, so we wouldn't need to
+				 * copy them here. But we don't know which scenario we're
+				 * dealing with, and there's no harm in copying the missing
+				 * blocks now, so do it now.
+				 *
+				 * If it's the same size, do nothing here. Any blocks modified
+				 * in the target will be copied based on parsing the target
+				 * system's WAL, and any blocks modified in the source will be
+				 * updated after rewinding, when the source system's WAL is
+				 * replayed.
+				 */
+				if (entry->target_size < entry->source_size)
+					return FILE_ACTION_COPY_TAIL;
+				else if (entry->target_size > entry->source_size)
+					return FILE_ACTION_TRUNCATE;
+				else
+					return FILE_ACTION_NONE;
+			}
+			break;
+
+		case FILE_TYPE_UNDEFINED:
+			pg_fatal("unknown file type for \"%s\"", path);
+			break;
+	}
+
+	/* unreachable */
+	pg_fatal("could not decide what to do with file \"%s\"", path);
+}
+
+/*
+ * Decide what to do with each file.
+ */
+void
+filemap_finalize()
+{
+	int			i;
+
+	filemap_list_to_array(filemap);
+
+	for (i = 0; i < filemap->narray; i++)
+	{
+		file_entry_t *entry = filemap->array[i];
+
+		entry->action = decide_file_action(entry);
+	}
+
+	/* Sort the actions to the order that they should be performed */
+	qsort(filemap->array, filemap->narray, sizeof(file_entry_t *),
+		  final_filemap_cmp);
+}
diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h
index 0cb7425170c..a5e8df57f40 100644
--- a/src/bin/pg_rewind/filemap.h
+++ b/src/bin/pg_rewind/filemap.h
@@ -14,17 +14,21 @@
 
 /*
  * For every file found in the local or remote system, we have a file entry
- * which says what we are going to do with the file. For relation files,
- * there is also a page map, marking pages in the file that were changed
- * locally.
- *
- * The enum values are sorted in the order we want actions to be processed.
+ * that contains information about the file on both systems.  For relation
+ * files, there is also a page map that marks pages in the file that were
+ * changed in the target after the last common checkpoint.  Each entry also
+ * contains an 'action' field, which says what we are going to do with the
+ * file.
  */
+
+/* these enum values are sorted in the order we want actions to be processed */
 typedef enum
 {
+	FILE_ACTION_UNDECIDED = 0,	/* not decided yet */
+
 	FILE_ACTION_CREATE,			/* create local directory or symbolic link */
 	FILE_ACTION_COPY,			/* copy whole file, overwriting if exists */
-	FILE_ACTION_COPY_TAIL,		/* copy tail from 'oldsize' to 'newsize' */
+	FILE_ACTION_COPY_TAIL,		/* copy tail from 'source_size' to 'target_size' */
 	FILE_ACTION_NONE,			/* no action (we might still copy modified
 								 * blocks based on the parsed WAL) */
 	FILE_ACTION_TRUNCATE,		/* truncate local file to 'newsize' bytes */
@@ -33,6 +37,8 @@ typedef enum
 
 typedef enum
 {
+	FILE_TYPE_UNDEFINED = 0,
+
 	FILE_TYPE_REGULAR,
 	FILE_TYPE_DIRECTORY,
 	FILE_TYPE_SYMLINK
@@ -41,19 +47,30 @@ typedef enum
 typedef struct file_entry_t
 {
 	char	   *path;
-	file_type_t type;
+	bool		isrelfile;		/* is it a relation data file? */
 
-	file_action_t action;
+	/*
+	 * Status of the file in the target.
+	 */
+	bool		target_exists;
+	file_type_t target_type;
+	size_t		target_size; /* for a regular file */
+	char	   *target_link_target; /* for a symlink */
 
-	/* for a regular file */
-	size_t		oldsize;
-	size_t		newsize;
-	bool		isrelfile;		/* is it a relation data file? */
+	datapagemap_t target_modified_pages;
 
-	datapagemap_t pagemap;
+	/*
+	 * Status of the file in the source.
+	 */
+	bool		source_exists;
+	file_type_t source_type;
+	size_t		source_size;
+	char	   *source_link_target; /* for a symlink */
 
-	/* for a symlink */
-	char	   *link_target;
+	/*
+	 * What will we do to the file?
+	 */
+	file_action_t action;
 
 	struct file_entry_t *next;
 } file_entry_t;
@@ -70,20 +87,19 @@ typedef struct filemap_t
 
 	/*
 	 * After processing all the remote files, the entries in the linked list
-	 * are moved to this array. After processing local files, too, all the
+	 * are moved to this array.  After processing local files, too, all the
 	 * local entries are added to the array by filemap_finalize, and sorted in
-	 * the final order. After filemap_finalize, all the entries are in the
+	 * the final order.  After filemap_finalize, all the entries are in the
 	 * array, and the linked list is empty.
 	 */
 	file_entry_t **array;
 	int			narray;			/* current length of array */
 
 	/*
-	 * Summary information. total_size is the total size of the source
-	 * cluster, and fetch_size is the number of bytes that needs to be copied.
+	 * Summary information.
 	 */
-	uint64		total_size;
-	uint64		fetch_size;
+	uint64		total_size;		/* total size of the source cluster */
+	uint64		fetch_size;		/* number of bytes that needs to be copied */
 } filemap_t;
 
 extern filemap_t *filemap;
@@ -94,11 +110,12 @@ extern void print_filemap(void);
 
 /* Functions for populating the filemap */
 extern void process_source_file(const char *path, file_type_t type,
-								size_t newsize, const char *link_target);
+								size_t size, const char *link_target);
 extern void process_target_file(const char *path, file_type_t type,
-								size_t newsize, const char *link_target);
-extern void process_block_change(ForkNumber forknum, RelFileNode rnode,
-								 BlockNumber blkno);
+								size_t size, const char *link_target);
+extern void process_target_wal_block_change(ForkNumber forknum,
+											RelFileNode rnode,
+											BlockNumber blkno);
 extern void filemap_finalize(void);
 
 #endif							/* FILEMAP_H */
diff --git a/src/bin/pg_rewind/libpq_fetch.c b/src/bin/pg_rewind/libpq_fetch.c
index bf4dfc23b96..7fc9161b8c8 100644
--- a/src/bin/pg_rewind/libpq_fetch.c
+++ b/src/bin/pg_rewind/libpq_fetch.c
@@ -465,7 +465,7 @@ libpq_executeFileMap(filemap_t *map)
 		entry = map->array[i];
 
 		/* If this is a relation file, copy the modified blocks */
-		execute_pagemap(&entry->pagemap, entry->path);
+		execute_pagemap(&entry->target_modified_pages, entry->path);
 
 		switch (entry->action)
 		{
@@ -476,15 +476,15 @@ libpq_executeFileMap(filemap_t *map)
 			case FILE_ACTION_COPY:
 				/* Truncate the old file out of the way, if any */
 				open_target_file(entry->path, true);
-				fetch_file_range(entry->path, 0, entry->newsize);
+				fetch_file_range(entry->path, 0, entry->source_size);
 				break;
 
 			case FILE_ACTION_TRUNCATE:
-				truncate_target_file(entry->path, entry->newsize);
+				truncate_target_file(entry->path, entry->source_size);
 				break;
 
 			case FILE_ACTION_COPY_TAIL:
-				fetch_file_range(entry->path, entry->oldsize, entry->newsize);
+				fetch_file_range(entry->path, entry->target_size, entry->source_size);
 				break;
 
 			case FILE_ACTION_REMOVE:
@@ -494,6 +494,10 @@ libpq_executeFileMap(filemap_t *map)
 			case FILE_ACTION_CREATE:
 				create_target(entry);
 				break;
+
+			case FILE_ACTION_UNDECIDED:
+				pg_fatal("no action decided for \"%s\"", entry->path);
+				break;
 		}
 	}
 
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 2229c86f9af..2baeb74ae93 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -436,6 +436,6 @@ extractPageInfo(XLogReaderState *record)
 		if (forknum != MAIN_FORKNUM)
 			continue;
 
-		process_block_change(forknum, rnode, blkno);
+		process_target_wal_block_change(forknum, rnode, blkno);
 	}
 }
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index c9b9e480c0f..210984d302b 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -369,7 +369,7 @@ main(int argc, char **argv)
 				chkpttli);
 
 	/*
-	 * Build the filemap, by comparing the source and target data directories.
+	 * Collect information about all files in the target and source systems.
 	 */
 	filemap_create();
 	if (showprogress)
@@ -390,8 +390,12 @@ main(int argc, char **argv)
 		pg_log_info("reading WAL in target");
 	extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
 				   ControlFile_target.checkPoint, restore_command);
-	filemap_finalize();
 
+	/*
+	 * We have collected all information we need from both systems. Decide
+	 * what to do with each file.
+	 */
+	filemap_finalize();
 	if (showprogress)
 		calculate_totals();
 
-- 
2.20.1

