From f7c4bf1c055149e8e32ccca20ecc4a9af363a423 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Wed, 19 Aug 2020 15:34:39 +0300
Subject: [PATCH v2 3/5] pg_rewind: Replace the hybrid list+array data
 structure with simplehash.

Now that simplehash can be use in frontend code, let's make use of it.
---
 src/bin/pg_rewind/copy_fetch.c  |   4 +-
 src/bin/pg_rewind/fetch.c       |   2 +-
 src/bin/pg_rewind/fetch.h       |   2 +-
 src/bin/pg_rewind/filemap.c     | 285 ++++++++++++++------------------
 src/bin/pg_rewind/filemap.h     |  67 +++-----
 src/bin/pg_rewind/libpq_fetch.c |   4 +-
 src/bin/pg_rewind/pg_rewind.c   |  12 +-
 7 files changed, 168 insertions(+), 208 deletions(-)

diff --git a/src/bin/pg_rewind/copy_fetch.c b/src/bin/pg_rewind/copy_fetch.c
index 18fad32600e..61aed8018b6 100644
--- a/src/bin/pg_rewind/copy_fetch.c
+++ b/src/bin/pg_rewind/copy_fetch.c
@@ -207,9 +207,9 @@ copy_executeFileMap(filemap_t *map)
 	file_entry_t *entry;
 	int			i;
 
-	for (i = 0; i < map->narray; i++)
+	for (i = 0; i < map->nactions; i++)
 	{
-		entry = map->array[i];
+		entry = map->actions[i];
 		execute_pagemap(&entry->target_modified_pages, entry->path);
 
 		switch (entry->action)
diff --git a/src/bin/pg_rewind/fetch.c b/src/bin/pg_rewind/fetch.c
index f18fe5386ed..f41d0f295ea 100644
--- a/src/bin/pg_rewind/fetch.c
+++ b/src/bin/pg_rewind/fetch.c
@@ -37,7 +37,7 @@ fetchSourceFileList(void)
  * Fetch all relation data files that are marked in the given data page map.
  */
 void
-executeFileMap(void)
+execute_file_actions(filemap_t *filemap)
 {
 	if (datadir_source)
 		copy_executeFileMap(filemap);
diff --git a/src/bin/pg_rewind/fetch.h b/src/bin/pg_rewind/fetch.h
index 7cf8b6ea090..b20df8b1537 100644
--- a/src/bin/pg_rewind/fetch.h
+++ b/src/bin/pg_rewind/fetch.h
@@ -25,7 +25,7 @@
  */
 extern void fetchSourceFileList(void);
 extern char *fetchFile(const char *filename, size_t *filesize);
-extern void executeFileMap(void);
+extern void execute_file_actions(filemap_t *filemap);
 
 /* in libpq_fetch.c */
 extern void libpqProcessFileList(void);
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 7971daeda5e..e6e037d1cd4 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -3,6 +3,19 @@
  * filemap.c
  *	  A data structure for keeping track of files that have changed.
  *
+ * This source file contains the logic to decide what to do with different
+ * kinds of files, and the data structure to support it.  Before modifying
+ * anything, pg_rewind collects information about all the files and their
+ * attributes in the target and source data directories.  It also scans the
+ * WAL log in the target, and collects information about data blocks that
+ * were changed.  All this information is stored in a hash table, using the
+ * file path, relative to the root of the data directory, as the key.
+ *
+ * After collecting all the information required, the filemap_finalize()
+ * function scans the hash table and decides what action needs to be taken
+ * for each file.  Finally, it sorts the array to the final order that the
+ * actions should be executed in.
+ *
  * Copyright (c) 2013-2020, PostgreSQL Global Development Group
  *
  *-------------------------------------------------------------------------
@@ -14,22 +27,39 @@
 #include <unistd.h>
 
 #include "catalog/pg_tablespace_d.h"
+#include "common/hashfn.h"
 #include "common/string.h"
 #include "datapagemap.h"
 #include "filemap.h"
 #include "pg_rewind.h"
 #include "storage/fd.h"
 
-filemap_t  *filemap = NULL;
+/*
+ * Define a hash table which we can use to store information about the files
+ * mentioned in the backup manifest.
+ */
+static uint32 hash_string_pointer(const char *s);
+#define SH_PREFIX		filehash
+#define SH_ELEMENT_TYPE	file_entry_t
+#define SH_KEY_TYPE		const char *
+#define	SH_KEY			path
+#define SH_HASH_KEY(tb, key)	hash_string_pointer(key)
+#define SH_EQUAL(tb, a, b)		(strcmp(a, b) == 0)
+#define	SH_SCOPE		static inline
+#define SH_RAW_ALLOCATOR	pg_malloc0
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static filehash_hash *filehash;
 
 static bool isRelDataFile(const char *path);
 static char *datasegpath(RelFileNode rnode, ForkNumber forknum,
 						 BlockNumber segno);
-static int	path_cmp(const void *a, const void *b);
 
-static file_entry_t *get_filemap_entry(const char *path, bool create);
+static file_entry_t *insert_filehash_entry(const char *path);
+static file_entry_t *lookup_filehash_entry(const char *path);
 static int	final_filemap_cmp(const void *a, const void *b);
-static void filemap_list_to_array(filemap_t *map);
 static bool check_file_excluded(const char *path, bool is_source);
 
 /*
@@ -131,54 +161,26 @@ static const struct exclude_list_item excludeFiles[] =
 };
 
 /*
- * Create a new file map (stored in the global pointer "filemap").
+ * Initialize the hash table for the file map.
  */
 void
-filemap_create(void)
+filemap_init(void)
 {
-	filemap_t  *map;
-
-	map = pg_malloc(sizeof(filemap_t));
-	map->first = map->last = NULL;
-	map->nlist = 0;
-	map->array = NULL;
-	map->narray = 0;
-
-	Assert(filemap == NULL);
-	filemap = map;
+	filehash = filehash_create(1000, NULL);
 }
 
-/* Look up or create entry for 'path' */
+/* Look up entry for 'path', creating new one if it doesn't exists */
 static file_entry_t *
-get_filemap_entry(const char *path, bool create)
+insert_filehash_entry(const char *path)
 {
-	filemap_t  *map = filemap;
 	file_entry_t *entry;
-	file_entry_t **e;
-	file_entry_t key;
-	file_entry_t *key_ptr;
-
-	if (map->array)
-	{
-		key.path = (char *) path;
-		key_ptr = &key;
-		e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
-					path_cmp);
-	}
-	else
-		e = NULL;
+	bool		found;
 
-	if (e)
-		entry = *e;
-	else if (!create)
-		entry = NULL;
-	else
+	entry = filehash_insert(filehash, path, &found);
+	if (!found)
 	{
-		/* Create a new entry for this file */
-		entry = pg_malloc(sizeof(file_entry_t));
 		entry->path = pg_strdup(path);
 		entry->isrelfile = isRelDataFile(path);
-		entry->action = FILE_ACTION_UNDECIDED;
 
 		entry->target_exists = false;
 		entry->target_type = FILE_TYPE_UNDEFINED;
@@ -192,21 +194,18 @@ get_filemap_entry(const char *path, bool create)
 		entry->source_size = 0;
 		entry->source_link_target = NULL;
 
-		entry->next = NULL;
-
-		if (map->last)
-		{
-			map->last->next = entry;
-			map->last = entry;
-		}
-		else
-			map->first = map->last = entry;
-		map->nlist++;
+		entry->action = FILE_ACTION_UNDECIDED;
 	}
 
 	return entry;
 }
 
+static file_entry_t *
+lookup_filehash_entry(const char *path)
+{
+	return filehash_lookup(filehash, path);
+}
+
 /*
  * Callback for processing source file list.
  *
@@ -220,8 +219,6 @@ process_source_file(const char *path, file_type_t type, size_t size,
 {
 	file_entry_t *entry;
 
-	Assert(filemap->array == NULL);
-
 	/*
 	 * Pretend that pg_wal is a directory, even if it's really a symlink. We
 	 * don't want to mess with the symlink itself, nor complain if it's a
@@ -238,7 +235,9 @@ process_source_file(const char *path, file_type_t type, size_t size,
 		pg_fatal("data file \"%s\" in source is not a regular file", path);
 
 	/* Remember this source file */
-	entry = get_filemap_entry(path, true);
+	entry = insert_filehash_entry(path);
+	if (entry->source_exists)
+		pg_fatal("duplicate source file \"%s\"", path);
 	entry->source_exists = true;
 	entry->source_type = type;
 	entry->source_size = size;
@@ -256,7 +255,6 @@ void
 process_target_file(const char *path, file_type_t type, size_t size,
 					const char *link_target)
 {
-	filemap_t  *map = filemap;
 	file_entry_t *entry;
 
 	/*
@@ -264,21 +262,6 @@ process_target_file(const char *path, file_type_t type, size_t size,
 	 * from the target data folder all paths which have been filtered out from
 	 * the source data folder when processing the source files.
 	 */
-	if (map->array == NULL)
-	{
-		/* on first call, initialize lookup array */
-		if (map->nlist == 0)
-		{
-			/* should not happen */
-			pg_fatal("source file list is empty");
-		}
-
-		filemap_list_to_array(map);
-
-		Assert(map->array != NULL);
-
-		qsort(map->array, map->narray, sizeof(file_entry_t *), path_cmp);
-	}
 
 	/*
 	 * Like in process_source_file, pretend that pg_wal is always a directory.
@@ -287,7 +270,9 @@ process_target_file(const char *path, file_type_t type, size_t size,
 		type = FILE_TYPE_DIRECTORY;
 
 	/* Remember this target file */
-	entry = get_filemap_entry(path, true);
+	entry = insert_filehash_entry(path);
+	if (entry->target_exists)
+		pg_fatal("duplicate source file \"%s\"", path);
 	entry->target_exists = true;
 	entry->target_type = type;
 	entry->target_size = size;
@@ -300,7 +285,7 @@ process_target_file(const char *path, file_type_t type, size_t size,
  * changed blocks in the pagemap of the file.
  *
  * NOTE: All the files on both systems must have already been added to the
- * file map!
+ * hash table!
  */
 void
 process_target_wal_block_change(ForkNumber forknum, RelFileNode rnode,
@@ -311,47 +296,45 @@ process_target_wal_block_change(ForkNumber forknum, RelFileNode rnode,
 	BlockNumber blkno_inseg;
 	int			segno;
 
-	Assert(filemap->array);
-
 	segno = blkno / RELSEG_SIZE;
 	blkno_inseg = blkno % RELSEG_SIZE;
 
 	path = datasegpath(rnode, forknum, segno);
-	entry = get_filemap_entry(path, false);
+	entry = lookup_filehash_entry(path);
 	pfree(path);
 
+	/*
+	 * If the block still exists in both systems, remember it. Otherwise we
+	 * can safely ignore it.
+	 *
+	 * If the block is beyond the EOF in the source system, or the file doesn't
+	 * doesn'exist in the source at all, we're going to truncate/remove it away
+	 * from the target anyway. Likewise, if it doesn't exist in the target
+	 * anymore, we will copy it over with the "tail" from the source system,
+	 * anyway.
+	 *
+	 * It is possible to find WAL for a file that doesn't exist on either
+	 * system anymore. It means that the relation was dropped later in the
+	 * target system, and independently on the source system too, or that
+	 * it was created and dropped in the target system and it never existed
+	 * in the source. Either way, we can safely ignore it.
+	 */
 	if (entry)
 	{
-		int64		end_offset;
-
 		Assert(entry->isrelfile);
 
 		if (entry->target_type != FILE_TYPE_REGULAR)
 			pg_fatal("unexpected page modification for directory or symbolic link \"%s\"",
 					 entry->path);
 
-		/*
-		 * If the block beyond the EOF in the source system, no need to
-		 * remember it now, because we're going to truncate it away from the
-		 * target anyway. Also no need to remember the block if it's beyond
-		 * the current EOF in the target system; we will copy it over with the
-		 * "tail" from the source system, anyway.
-		 */
-		end_offset = (blkno_inseg + 1) * BLCKSZ;
-		if (end_offset <= entry->source_size &&
-			end_offset <= entry->target_size)
-			datapagemap_add(&entry->target_modified_pages, blkno_inseg);
-	}
-	else
-	{
-		/*
-		 * If we don't have any record of this file in the file map, it means
-		 * that it's a relation that doesn't exist in the source system.  It
-		 * could exist in the target system; we haven't moved the target-only
-		 * entries from the linked list to the array yet!  But in any case, if
-		 * it doesn't exist in the source it will be removed from the target
-		 * too, and we can safely ignore it.
-		 */
+		if (entry->target_exists && entry->source_exists)
+		{
+			off_t		end_offset;
+
+			end_offset = (blkno_inseg + 1) * BLCKSZ;
+			if (end_offset <= entry->source_size && end_offset <= entry->target_size)
+				datapagemap_add(&entry->target_modified_pages, blkno_inseg);
+		}
 	}
 }
 
@@ -413,34 +396,6 @@ check_file_excluded(const char *path, bool is_source)
 	return false;
 }
 
-/*
- * Convert the linked list of entries in map->first/last to the array,
- * map->array.
- */
-static void
-filemap_list_to_array(filemap_t *map)
-{
-	int			narray;
-	file_entry_t *entry,
-			   *next;
-
-	map->array = (file_entry_t **)
-		pg_realloc(map->array,
-				   (map->nlist + map->narray) * sizeof(file_entry_t *));
-
-	narray = map->narray;
-	for (entry = map->first; entry != NULL; entry = next)
-	{
-		map->array[narray++] = entry;
-		next = entry->next;
-		entry->next = NULL;
-	}
-	Assert(narray == map->nlist + map->narray);
-	map->narray = narray;
-	map->nlist = 0;
-	map->first = map->last = NULL;
-}
-
 static const char *
 action_to_str(file_action_t action)
 {
@@ -468,32 +423,31 @@ action_to_str(file_action_t action)
  * Calculate the totals needed for progress reports.
  */
 void
-calculate_totals(void)
+calculate_totals(filemap_t *filemap)
 {
 	file_entry_t *entry;
 	int			i;
-	filemap_t  *map = filemap;
 
-	map->total_size = 0;
-	map->fetch_size = 0;
+	filemap->total_size = 0;
+	filemap->fetch_size = 0;
 
-	for (i = 0; i < map->narray; i++)
+	for (i = 0; i < filemap->nactions; i++)
 	{
-		entry = map->array[i];
+		entry = filemap->actions[i];
 
 		if (entry->source_type != FILE_TYPE_REGULAR)
 			continue;
 
-		map->total_size += entry->source_size;
+		filemap->total_size += entry->source_size;
 
 		if (entry->action == FILE_ACTION_COPY)
 		{
-			map->fetch_size += entry->source_size;
+			filemap->fetch_size += entry->source_size;
 			continue;
 		}
 
 		if (entry->action == FILE_ACTION_COPY_TAIL)
-			map->fetch_size += (entry->source_size - entry->target_size);
+			filemap->fetch_size += (entry->source_size - entry->target_size);
 
 		if (entry->target_modified_pages.bitmapsize > 0)
 		{
@@ -502,7 +456,7 @@ calculate_totals(void)
 
 			iter = datapagemap_iterate(&entry->target_modified_pages);
 			while (datapagemap_next(iter, &blk))
-				map->fetch_size += BLCKSZ;
+				filemap->fetch_size += BLCKSZ;
 
 			pg_free(iter);
 		}
@@ -510,15 +464,14 @@ calculate_totals(void)
 }
 
 void
-print_filemap(void)
+print_filemap(filemap_t *filemap)
 {
-	filemap_t  *map = filemap;
 	file_entry_t *entry;
 	int			i;
 
-	for (i = 0; i < map->narray; i++)
+	for (i = 0; i < filemap->nactions; i++)
 	{
-		entry = map->array[i];
+		entry = filemap->actions[i];
 		if (entry->action != FILE_ACTION_NONE ||
 			entry->target_modified_pages.bitmapsize > 0)
 		{
@@ -640,15 +593,6 @@ datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
 		return path;
 }
 
-static int
-path_cmp(const void *a, const void *b)
-{
-	file_entry_t *fa = *((file_entry_t **) a);
-	file_entry_t *fb = *((file_entry_t **) b);
-
-	return strcmp(fa->path, fb->path);
-}
-
 /*
  * In the final stage, the filemap is sorted so that removals come last.
  * From disk space usage point of view, it would be better to do removals
@@ -834,21 +778,48 @@ decide_file_action(file_entry_t *entry)
 /*
  * Decide what to do with each file.
  */
-void
+filemap_t *
 filemap_finalize()
 {
 	int			i;
+	filehash_iterator it;
+	file_entry_t *entry;
+	filemap_t  *filemap;
 
-	filemap_list_to_array(filemap);
-
-	for (i = 0; i < filemap->narray; i++)
+	filehash_start_iterate(filehash, &it);
+	while ((entry = filehash_iterate(filehash, &it)) != NULL)
 	{
-		file_entry_t *entry = filemap->array[i];
-
 		entry->action = decide_file_action(entry);
 	}
 
-	/* Sort the actions to the order that they should be performed */
-	qsort(filemap->array, filemap->narray, sizeof(file_entry_t *),
+	/*
+	 * Turn the hash table into an array, sorted in the order that the actions
+	 * should be performed.
+	 */
+	filemap = pg_malloc(offsetof(filemap_t, actions) +
+						filehash->members * sizeof(file_entry_t *));
+	filemap->nactions = filehash->members;
+	filehash_start_iterate(filehash, &it);
+	i = 0;
+	while ((entry = filehash_iterate(filehash, &it)) != NULL)
+	{
+		filemap->actions[i++] = entry;
+	}
+
+	qsort(&filemap->actions, filemap->nactions, sizeof(file_entry_t *),
 		  final_filemap_cmp);
+
+	return filemap;
+}
+
+
+/*
+ * Helper function for filemap hash table.
+ */
+static uint32
+hash_string_pointer(const char *s)
+{
+	unsigned char *ss = (unsigned char *) s;
+
+	return hash_bytes(ss, strlen(s));
 }
diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h
index a5e8df57f40..3660ffe0990 100644
--- a/src/bin/pg_rewind/filemap.h
+++ b/src/bin/pg_rewind/filemap.h
@@ -12,15 +12,6 @@
 #include "storage/block.h"
 #include "storage/relfilenode.h"
 
-/*
- * For every file found in the local or remote system, we have a file entry
- * that contains information about the file on both systems.  For relation
- * files, there is also a page map that marks pages in the file that were
- * changed in the target after the last common checkpoint.  Each entry also
- * contains an 'action' field, which says what we are going to do with the
- * file.
- */
-
 /* these enum values are sorted in the order we want actions to be processed */
 typedef enum
 {
@@ -44,9 +35,21 @@ typedef enum
 	FILE_TYPE_SYMLINK
 } file_type_t;
 
+/*
+ * For every file found in the local or remote system, we have a file entry
+ * that contains information about the file on both systems.  For relation
+ * files, there is also a page map that marks pages in the file that were
+ * changed in the target after the last common checkpoint.
+ *
+ * When gathering information, these are kept in a hash table, private to
+ * filemap.c. filemap_finalize() fills in the 'action' field, sorts all the
+ * entries, and returns them in an array, ready for executing the actions.
+ */
 typedef struct file_entry_t
 {
-	char	   *path;
+	uint32		status;			/* hash status */
+
+	const char *path;
 	bool		isrelfile;		/* is it a relation data file? */
 
 	/*
@@ -71,44 +74,25 @@ typedef struct file_entry_t
 	 * What will we do to the file?
 	 */
 	file_action_t action;
-
-	struct file_entry_t *next;
 } file_entry_t;
 
+/*
+ * This represents the final decisions on what to do with each file.
+ * 'actions' array contains an entry for each file, sorted in the order
+ * that their actions should executed.
+ */
 typedef struct filemap_t
 {
-	/*
-	 * New entries are accumulated to a linked list, in process_source_file
-	 * and process_target_file.
-	 */
-	file_entry_t *first;
-	file_entry_t *last;
-	int			nlist;			/* number of entries currently in list */
-
-	/*
-	 * After processing all the remote files, the entries in the linked list
-	 * are moved to this array.  After processing local files, too, all the
-	 * local entries are added to the array by filemap_finalize, and sorted in
-	 * the final order.  After filemap_finalize, all the entries are in the
-	 * array, and the linked list is empty.
-	 */
-	file_entry_t **array;
-	int			narray;			/* current length of array */
-
-	/*
-	 * Summary information.
-	 */
+	/* Summary information, filled by calculate_totals() */
 	uint64		total_size;		/* total size of the source cluster */
 	uint64		fetch_size;		/* number of bytes that needs to be copied */
-} filemap_t;
 
-extern filemap_t *filemap;
-
-extern void filemap_create(void);
-extern void calculate_totals(void);
-extern void print_filemap(void);
+	int			nactions;		/* size of 'actions' array */
+	file_entry_t *actions[FLEXIBLE_ARRAY_MEMBER];
+} filemap_t;
 
 /* Functions for populating the filemap */
+extern void filemap_init(void);
 extern void process_source_file(const char *path, file_type_t type,
 								size_t size, const char *link_target);
 extern void process_target_file(const char *path, file_type_t type,
@@ -116,6 +100,9 @@ extern void process_target_file(const char *path, file_type_t type,
 extern void process_target_wal_block_change(ForkNumber forknum,
 											RelFileNode rnode,
 											BlockNumber blkno);
-extern void filemap_finalize(void);
+
+extern filemap_t *filemap_finalize(void);
+extern void calculate_totals(filemap_t *filemap);
+extern void print_filemap(filemap_t *filemap);
 
 #endif							/* FILEMAP_H */
diff --git a/src/bin/pg_rewind/libpq_fetch.c b/src/bin/pg_rewind/libpq_fetch.c
index 7fc9161b8c8..9c541bb73d5 100644
--- a/src/bin/pg_rewind/libpq_fetch.c
+++ b/src/bin/pg_rewind/libpq_fetch.c
@@ -460,9 +460,9 @@ libpq_executeFileMap(filemap_t *map)
 				 PQresultErrorMessage(res));
 	PQclear(res);
 
-	for (i = 0; i < map->narray; i++)
+	for (i = 0; i < map->nactions; i++)
 	{
-		entry = map->array[i];
+		entry = map->actions[i];
 
 		/* If this is a relation file, copy the modified blocks */
 		execute_pagemap(&entry->target_modified_pages, entry->path);
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 210984d302b..2bdeed26c53 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -129,6 +129,7 @@ main(int argc, char **argv)
 	TimeLineID	endtli;
 	ControlFileData ControlFile_new;
 	bool		writerecoveryconf = false;
+	filemap_t  *filemap;
 
 	pg_logging_init(argv[0]);
 	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind"));
@@ -371,10 +372,11 @@ main(int argc, char **argv)
 	/*
 	 * Collect information about all files in the target and source systems.
 	 */
-	filemap_create();
 	if (showprogress)
 		pg_log_info("reading source file list");
+	filemap_init();
 	fetchSourceFileList();
+
 	if (showprogress)
 		pg_log_info("reading target file list");
 	traverse_datadir(datadir_target, &process_target_file);
@@ -395,13 +397,13 @@ main(int argc, char **argv)
 	 * We have collected all information we need from both systems. Decide
 	 * what to do with each file.
 	 */
-	filemap_finalize();
+	filemap = filemap_finalize();
 	if (showprogress)
-		calculate_totals();
+		calculate_totals(filemap);
 
 	/* this is too verbose even for verbose mode */
 	if (debug)
-		print_filemap();
+		print_filemap(filemap);
 
 	/*
 	 * Ok, we're ready to start copying things over.
@@ -421,7 +423,7 @@ main(int argc, char **argv)
 	 * modified the target directory and there is no turning back!
 	 */
 
-	executeFileMap();
+	execute_file_actions(filemap);
 
 	progress_report(true);
 
-- 
2.20.1

