*** a/doc/src/sgml/ref/pg_rewind.sgml
--- b/doc/src/sgml/ref/pg_rewind.sgml
***************
*** 50,61 **** PostgreSQL documentation
The result is equivalent to replacing the target data directory with the
source one. Only changed blocks from relation files are copied;
! all other files are copied in full, including configuration files. The
! advantage of pg_rewind> over taking a new base backup, or
! tools like rsync>, is that pg_rewind> does
! not require reading through unchanged blocks in the cluster. This makes
! it a lot faster when the database is large and only a small
! fraction of blocks differ between the clusters.
--- 50,63 ----
The result is equivalent to replacing the target data directory with the
source one. Only changed blocks from relation files are copied;
! all other files except WAL are copied in full, including configuration
! files. Only the WAL files between the point of divergence and the current
! WAL insert location of the source server are copied, other WAL files are
! useless for the target server. The advantage of pg_rewind>
! over taking a new base backup, or tools like rsync>,
! is that pg_rewind> does not require reading through unchanged
! blocks in the cluster. This makes it a lot faster when the database is
! large and only a small fraction of blocks differ between the clusters.
***************
*** 231,237 **** PostgreSQL documentation
Copy all other files such as pg_xact and
configuration files from the source cluster to the target cluster
! (everything except the relation files).
--- 233,239 ----
Copy all other files such as pg_xact and
configuration files from the source cluster to the target cluster
! (everything except the relation files and some WAL files).
*** a/src/bin/pg_rewind/filemap.c
--- b/src/bin/pg_rewind/filemap.c
***************
*** 21,26 ****
--- 21,27 ----
#include "common/string.h"
#include "catalog/pg_tablespace.h"
#include "storage/fd.h"
+ #include "access/xlog_internal.h"
filemap_t *filemap = NULL;
***************
*** 67,72 **** process_source_file(const char *path, file_type_t type, size_t newsize,
--- 68,75 ----
file_action_t action = FILE_ACTION_NONE;
size_t oldsize = 0;
file_entry_t *entry;
+ uint32 tli;
+ XLogSegNo segno;
Assert(map->array == NULL);
***************
*** 168,179 **** process_source_file(const char *path, file_type_t type, size_t newsize,
--- 171,201 ----
*
* An exception: PG_VERSIONs should be identical, but avoid
* overwriting it for paranoia.
+ *
+ * Another exception: Do not copy WAL files before the
+ * divergence and the WAL files after the current WAL insert
+ * location of the source server for performance reasons.
*/
if (pg_str_endswith(path, "PG_VERSION"))
{
action = FILE_ACTION_NONE;
oldsize = statbuf.st_size;
}
+ else if (strncmp(path, XLOGDIR "/", strlen(XLOGDIR "/")) == 0 &&
+ IsXLogFileName(path + strlen(XLOGDIR "/")))
+ {
+ XLogFromFileName(path + strlen(XLOGDIR "/"), &tli, &segno, WalSegSz);
+ if (segno >= divergence_segno && segno <= last_source_segno)
+ {
+ action = FILE_ACTION_COPY;
+ oldsize = 0;
+ }
+ else
+ {
+ action = FILE_ACTION_NONE;
+ oldsize = exists ? statbuf.st_size : 0;
+ }
+ }
else
{
action = FILE_ACTION_COPY;
***************
*** 258,263 **** process_target_file(const char *path, file_type_t type, size_t oldsize,
--- 280,288 ----
file_entry_t *key_ptr;
filemap_t *map = filemap;
file_entry_t *entry;
+ bool reserve = false;
+ uint32 tli;
+ XLogSegNo segno;
snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
if (lstat(localpath, &statbuf) < 0)
***************
*** 303,310 **** process_target_file(const char *path, file_type_t type, size_t oldsize,
exists = (bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
path_cmp) != NULL);
! /* Remove any file or folder that doesn't exist in the source system. */
! if (!exists)
{
entry = pg_malloc(sizeof(file_entry_t));
entry->path = pg_strdup(path);
--- 328,346 ----
exists = (bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
path_cmp) != NULL);
! if(strncmp(path, XLOGDIR "/", strlen(XLOGDIR "/")) == 0 &&
! IsXLogFileName(path + strlen(XLOGDIR "/")))
! {
! XLogFromFileName(path + strlen(XLOGDIR "/"), &tli, &segno, WalSegSz);
! if(segno < divergence_segno)
! reserve = true;
! }
!
! /*
! * Remove any file or folder that doesn't exist in the source system
! * except the WAL files before the divergence.
! */
! if (!exists && !reserve)
{
entry = pg_malloc(sizeof(file_entry_t));
entry->path = pg_strdup(path);
*** a/src/bin/pg_rewind/pg_rewind.c
--- b/src/bin/pg_rewind/pg_rewind.c
***************
*** 27,32 ****
--- 27,33 ----
#include "common/restricted_token.h"
#include "getopt_long.h"
#include "storage/bufpage.h"
+ #include "access/xlog_internal.h"
static void usage(const char *progname);
***************
*** 59,64 **** bool dry_run = false;
--- 60,69 ----
TimeLineHistoryEntry *targetHistory;
int targetNentries;
+ /* WAL location */
+ XLogSegNo divergence_segno;
+ XLogSegNo last_source_segno;
+
static void
usage(const char *progname)
{
***************
*** 281,286 **** main(int argc, char **argv)
--- 286,313 ----
chkpttli);
/*
+ * Save the WAL segment numbers of the divergence and the current WAL insert
+ * location of the source server. Later only the WAL files between those
+ * would be copied to the target data directory.
+ *
+ * Note: The later generated WAL files in the source server before the end
+ * of the copy of the data files must be made available when the target
+ * server is started. This can be done by configuring the target server as
+ * a standby of the source server.
+ */
+ if (connstr_source)
+ {
+ endrec = libpqGetCurrentXlogInsertLocation();
+ }
+ else
+ {
+ endrec = ControlFile_source.checkPoint;
+ }
+
+ XLByteToSeg(divergerec, divergence_segno, WalSegSz);
+ XLByteToPrevSeg(endrec, last_source_segno, WalSegSz);
+
+ /*
* Build the filemap, by comparing the source and target data directories.
*/
filemap_create();
*** a/src/bin/pg_rewind/pg_rewind.h
--- b/src/bin/pg_rewind/pg_rewind.h
***************
*** 30,35 **** extern int WalSegSz;
--- 30,39 ----
extern TimeLineHistoryEntry *targetHistory;
extern int targetNentries;
+ /* WAL location */
+ extern XLogSegNo divergence_segno;
+ extern XLogSegNo last_source_segno;
+
/* in parsexlog.c */
extern void extractPageMap(const char *datadir, XLogRecPtr startpoint,
int tliIndex, XLogRecPtr endpoint);
*** /dev/null
--- b/src/bin/pg_rewind/t/006_wal_copy.pl
***************
*** 0 ****
--- 1,129 ----
+ #
+ # Test pg_rewind only copy needed WALs from the source.
+ #
+ use strict;
+ use warnings;
+ use TestLib;
+ use Test::More tests => 12;
+
+ use RewindTest;
+
+ sub run_test
+ {
+ my $test_mode = shift;
+
+ RewindTest::setup_cluster($test_mode);
+ RewindTest::start_master();
+
+ # Setup parameters for WAL reclaim
+ master_psql("ALTER SYSTEM SET checkpoint_timeout = '1d'");
+ master_psql("ALTER SYSTEM SET min_wal_size = '80MB'");
+ master_psql("ALTER SYSTEM SET wal_keep_segments = 4");
+ master_psql("ALTER SYSTEM SET log_checkpoints = on");
+ master_psql("SELECT pg_reload_conf()");
+
+ RewindTest::create_standby($test_mode);
+
+ # Create a test table and insert rows in master.
+ master_psql("CREATE TABLE tbl1 (d text)");
+ master_psql("INSERT INTO tbl1 VALUES ('in master, wal 1')");
+
+ master_psql("SELECT pg_switch_wal()");
+ master_psql("INSERT INTO tbl1 VALUES ('in master, wal 2')");
+
+ master_psql("SELECT pg_switch_wal()");
+ master_psql("INSERT INTO tbl1 VALUES ('in master, wal 3')");
+
+ master_psql("SELECT pg_switch_wal()");
+ master_psql("INSERT INTO tbl1 VALUES ('in master, wal 4')");
+
+ master_psql("SELECT pg_switch_wal()");
+ master_psql("INSERT INTO tbl1 VALUES ('in master, wal 5, checkpoint')");
+ master_psql("CHECKPOINT");
+
+ master_psql("SELECT pg_switch_wal()");
+ master_psql("INSERT INTO tbl1 VALUES ('in master, wal 6, before promotion')");
+
+ # Promote standby
+ my $master_divergence_wal = $node_master->safe_psql("postgres",
+ "SELECT pg_walfile_name(pg_current_wal_insert_lsn())");
+ RewindTest::promote_standby();
+
+ # Insert rows in master after promotion
+ master_psql("INSERT INTO tbl1 VALUES ('in master, wal 6, after promotion')");
+
+ master_psql("SELECT pg_switch_wal()");
+ master_psql("INSERT INTO tbl1 VALUES ('in master, wal 7, after promotion')");
+
+ master_psql("CHECKPOINT");
+
+ # Insert rows in standby after promotion
+ standby_psql("INSERT INTO tbl1 VALUES ('in standby, wal 6, after promotion')");
+
+ standby_psql("SELECT pg_switch_wal()");
+ standby_psql("INSERT INTO tbl1 VALUES ('in standby, wal 7, after promotion')");
+
+ standby_psql("SELECT pg_switch_wal()");
+ standby_psql("INSERT INTO tbl1 VALUES ('in standby, wal 8, after promotion')");
+
+ standby_psql("CHECKPOINT");
+
+ # Check WALs before running pg_rewind
+ master_psql("SELECT * from pg_ls_waldir()");
+ print("master_divergence_wal: $master_divergence_wal\n");
+ my $master_wal_count_before_divergence = $node_master->safe_psql("postgres",
+ "SELECT count(*) FROM pg_ls_waldir() WHERE name ~ '^[0-9A-F]{24}\$' AND name < '$master_divergence_wal'");
+ ok( $master_wal_count_before_divergence > 0, 'master_wal_count_before_divergence > 0');
+
+ standby_psql("SELECT * from pg_ls_waldir()");
+ my $standby_current_wal = $node_standby->safe_psql("postgres",
+ "SELECT pg_walfile_name(pg_current_wal_insert_lsn())");
+ print("standby_current_wal: $standby_current_wal\n");
+ my $standby_reclaimed_wal_count = $node_standby->safe_psql("postgres",
+ "SELECT count(*) FROM pg_ls_waldir() WHERE name ~ '^[0-9A-F]{24}\$' AND name > '$standby_current_wal'");
+ ok( $standby_reclaimed_wal_count > 0, 'standby_reclaimed_wal_count > 0');
+
+ # The accuracy of imodification from pg_ls_waldir() is seconds, so sleep one second
+ sleep(1);
+ my $pg_rewind_time = $node_master->safe_psql("postgres", "SELECT now()");
+ print("pg_rewind_time: $pg_rewind_time\n");
+
+ # Run pg_rewind and check
+ RewindTest::run_pg_rewind($test_mode);
+
+ master_psql("SELECT * from pg_ls_waldir()");
+
+ check_query(
+ 'SELECT * FROM tbl1',
+ qq(in master, wal 1
+ in master, wal 2
+ in master, wal 3
+ in master, wal 4
+ in master, wal 5, checkpoint
+ in master, wal 6, before promotion
+ in standby, wal 6, after promotion
+ in standby, wal 7, after promotion
+ in standby, wal 8, after promotion
+ ),
+ 'table content');
+
+ check_query(
+ "SELECT count(*) FROM pg_ls_waldir() WHERE name ~ '^[0-9A-F]{24}\$' AND name < '$master_divergence_wal' AND modification >= '$pg_rewind_time'",
+ qq(0
+ ),
+ 'do not copy WALs before divergence');
+
+ check_query(
+ "SELECT count(*) FROM pg_ls_waldir() WHERE name ~ '^[0-9A-F]{24}\$' AND name > '$standby_current_wal' AND modification >= '$pg_rewind_time'",
+ qq(0
+ ),
+ 'do not copy reclaimed WALs from the source server');
+
+ RewindTest::clean_rewind_test();
+ }
+
+ # Run the test in both modes
+ run_test('local');
+ run_test('remote');
+
+ exit(0);