pg_upgrade with parallel tablespace copying
Pg_upgrade by default (without --link) copies heap/index files from the
old to new cluster. This patch implements parallel heap/index file
copying in pg_upgrade using the --jobs option. It uses the same
infrastructure used for pg_upgrade parallel dump/restore. Here are the
performance results:
--- seconds ---
GB git patched
2 62.09 63.75
4 95.93 107.22
8 194.96 195.29
16 494.38 348.93
32 983.28 644.23
64 2227.73 1244.08
128 4735.83 2547.09
Because of the kernel cache, you only see a big win when the amount of
copy data exceeds the kernel cache. For testing, I used a 24GB, 16-core
machine with two magnetic disks with one tablespace on each. Using more
tablespaces would yield larger improvements. My test script is
attached.
I consider this patch ready for application. This is the last
pg_upgrade performance improvement idea I am considering.
--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ It's impossible for everything to be true. +
Slightly modified patch applied. This is my last planned pg_upgrade
change for 9.3.
---------------------------------------------------------------------------
On Mon, Jan 7, 2013 at 10:51:21PM -0500, Bruce Momjian wrote:
Pg_upgrade by default (without --link) copies heap/index files from the
old to new cluster. This patch implements parallel heap/index file
copying in pg_upgrade using the --jobs option. It uses the same
infrastructure used for pg_upgrade parallel dump/restore. Here are the
performance results:--- seconds --- GB git patched 2 62.09 63.75 4 95.93 107.22 8 194.96 195.29 16 494.38 348.93 32 983.28 644.23 64 2227.73 1244.08 128 4735.83 2547.09Because of the kernel cache, you only see a big win when the amount of
copy data exceeds the kernel cache. For testing, I used a 24GB, 16-core
machine with two magnetic disks with one tablespace on each. Using more
tablespaces would yield larger improvements. My test script is
attached.I consider this patch ready for application. This is the last
pg_upgrade performance improvement idea I am considering.--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com+ It's impossible for everything to be true. +
diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c new file mode 100644 index 59f8fd0..1780788 *** a/contrib/pg_upgrade/check.c --- b/contrib/pg_upgrade/check.c *************** create_script_for_old_cluster_deletion(c *** 606,612 **** fprintf(script, RMDIR_CMD " %s\n", fix_path_separator(old_cluster.pgdata));/* delete old cluster's alternate tablespaces */ ! for (tblnum = 0; tblnum < os_info.num_tablespaces; tblnum++) { /* * Do the old cluster's per-database directories share a directory --- 606,612 ---- fprintf(script, RMDIR_CMD " %s\n", fix_path_separator(old_cluster.pgdata));/* delete old cluster's alternate tablespaces */
! for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++)
{
/*
* Do the old cluster's per-database directories share a directory
*************** create_script_for_old_cluster_deletion(c
*** 621,634 ****
/* remove PG_VERSION? */
if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
fprintf(script, RM_CMD " %s%s%cPG_VERSION\n",
! fix_path_separator(os_info.tablespaces[tblnum]),
fix_path_separator(old_cluster.tablespace_suffix),
PATH_SEPARATOR);for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++) { fprintf(script, RMDIR_CMD " %s%s%c%d\n", ! fix_path_separator(os_info.tablespaces[tblnum]), fix_path_separator(old_cluster.tablespace_suffix), PATH_SEPARATOR, old_cluster.dbarr.dbs[dbnum].db_oid); } --- 621,634 ---- /* remove PG_VERSION? */ if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804) fprintf(script, RM_CMD " %s%s%cPG_VERSION\n", ! fix_path_separator(os_info.old_tablespaces[tblnum]), fix_path_separator(old_cluster.tablespace_suffix), PATH_SEPARATOR);for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
{
fprintf(script, RMDIR_CMD " %s%s%c%d\n",
! fix_path_separator(os_info.old_tablespaces[tblnum]),
fix_path_separator(old_cluster.tablespace_suffix),
PATH_SEPARATOR, old_cluster.dbarr.dbs[dbnum].db_oid);
}
*************** create_script_for_old_cluster_deletion(c
*** 640,646 ****
* or a version-specific subdirectory.
*/
fprintf(script, RMDIR_CMD " %s%s\n",
! fix_path_separator(os_info.tablespaces[tblnum]),
fix_path_separator(old_cluster.tablespace_suffix));
}--- 640,646 ---- * or a version-specific subdirectory. */ fprintf(script, RMDIR_CMD " %s%s\n", ! fix_path_separator(os_info.old_tablespaces[tblnum]), fix_path_separator(old_cluster.tablespace_suffix)); }diff --git a/contrib/pg_upgrade/info.c b/contrib/pg_upgrade/info.c new file mode 100644 index 0c11ff8..7fd4584 *** a/contrib/pg_upgrade/info.c --- b/contrib/pg_upgrade/info.c *************** create_rel_filename_map(const char *old_ *** 106,125 **** * relation belongs to the default tablespace, hence relfiles should * exist in the data directories. */ ! snprintf(map->old_dir, sizeof(map->old_dir), "%s/base/%u", old_data, ! old_db->db_oid); ! snprintf(map->new_dir, sizeof(map->new_dir), "%s/base/%u", new_data, ! new_db->db_oid); } else { /* relation belongs to a tablespace, so use the tablespace location */ ! snprintf(map->old_dir, sizeof(map->old_dir), "%s%s/%u", old_rel->tablespace, ! old_cluster.tablespace_suffix, old_db->db_oid); ! snprintf(map->new_dir, sizeof(map->new_dir), "%s%s/%u", new_rel->tablespace, ! new_cluster.tablespace_suffix, new_db->db_oid); }/* * old_relfilenode might differ from pg_class.oid (and hence * new_relfilenode) because of CLUSTER, REINDEX, or VACUUM FULL. --- 106,130 ---- * relation belongs to the default tablespace, hence relfiles should * exist in the data directories. */ ! strlcpy(map->old_tablespace, old_data, sizeof(map->old_tablespace)); ! strlcpy(map->new_tablespace, new_data, sizeof(map->new_tablespace)); ! strlcpy(map->old_tablespace_suffix, "/base", sizeof(map->old_tablespace_suffix)); ! strlcpy(map->new_tablespace_suffix, "/base", sizeof(map->new_tablespace_suffix)); } else { /* relation belongs to a tablespace, so use the tablespace location */ ! strlcpy(map->old_tablespace, old_rel->tablespace, sizeof(map->old_tablespace)); ! strlcpy(map->new_tablespace, new_rel->tablespace, sizeof(map->new_tablespace)); ! strlcpy(map->old_tablespace_suffix, old_cluster.tablespace_suffix, ! sizeof(map->old_tablespace_suffix)); ! strlcpy(map->new_tablespace_suffix, new_cluster.tablespace_suffix, ! sizeof(map->new_tablespace_suffix)); }+ map->old_db_oid = old_db->db_oid; + map->new_db_oid = new_db->db_oid; + /* * old_relfilenode might differ from pg_class.oid (and hence * new_relfilenode) because of CLUSTER, REINDEX, or VACUUM FULL. diff --git a/contrib/pg_upgrade/parallel.c b/contrib/pg_upgrade/parallel.c new file mode 100644 index 8ea36bc..d157511 *** a/contrib/pg_upgrade/parallel.c --- b/contrib/pg_upgrade/parallel.c *************** typedef struct { *** 34,44 **** char log_file[MAXPGPATH]; char opt_log_file[MAXPGPATH]; char cmd[MAX_STRING]; ! } thread_arg;! thread_arg **thread_args;
! DWORD win32_exec_prog(thread_arg *args);
#endif
--- 34,57 ---- char log_file[MAXPGPATH]; char opt_log_file[MAXPGPATH]; char cmd[MAX_STRING]; ! } exec_thread_arg;! typedef struct {
! DbInfoArr *old_db_arr;
! DbInfoArr *new_db_arr;
! char old_pgdata[MAXPGPATH];
! char new_pgdata[MAXPGPATH];
! char old_tablespace[MAXPGPATH];
! } transfer_thread_arg;! exec_thread_arg **exec_thread_args;
! transfer_thread_arg **transfer_thread_args;
!
! /* track current thread_args struct so reap_child() can be used for all cases */
! void **cur_thread_args;
!
! DWORD win32_exec_prog(exec_thread_arg *args);
! DWORD win32_transfer_all_new_dbs(transfer_thread_arg *args);#endif
*************** parallel_exec_prog(const char *log_file,
*** 58,64 ****
pid_t child;
#else
HANDLE child;
! thread_arg *new_arg;
#endifva_start(args, fmt); --- 71,77 ---- pid_t child; #else HANDLE child; ! exec_thread_arg *new_arg; #endifva_start(args, fmt); *************** parallel_exec_prog(const char *log_file, *** 71,77 **** else { /* parallel */ ! /* harvest any dead children */ while (reap_child(false) == true) ; --- 84,92 ---- else { /* parallel */ ! #ifdef WIN32 ! cur_thread_args = (void **)exec_thread_args; ! #endif /* harvest any dead children */ while (reap_child(false) == true) ; *************** parallel_exec_prog(const char *log_file, *** 100,106 **** int i;thread_handles = pg_malloc(user_opts.jobs * sizeof(HANDLE));
! thread_args = pg_malloc(user_opts.jobs * sizeof(thread_arg *));/* * For safety and performance, we keep the args allocated during --- 115,121 ---- int i;thread_handles = pg_malloc(user_opts.jobs * sizeof(HANDLE));
! exec_thread_args = pg_malloc(user_opts.jobs * sizeof(exec_thread_arg *));/*
* For safety and performance, we keep the args allocated during
*************** parallel_exec_prog(const char *log_file,
*** 108,118 ****
* in a thread different from the one that allocated it.
*/
for (i = 0; i < user_opts.jobs; i++)
! thread_args[i] = pg_malloc(sizeof(thread_arg));
}/* use first empty array element */
! new_arg = thread_args[parallel_jobs-1];/* Can only pass one pointer into the function, so use a struct */ strcpy(new_arg->log_file, log_file); --- 123,133 ---- * in a thread different from the one that allocated it. */ for (i = 0; i < user_opts.jobs; i++) ! exec_thread_args[i] = pg_malloc(sizeof(exec_thread_arg)); }/* use first empty array element */
! new_arg = exec_thread_args[parallel_jobs-1];/* Can only pass one pointer into the function, so use a struct */
strcpy(new_arg->log_file, log_file);
*************** parallel_exec_prog(const char *log_file,
*** 134,140 ****#ifdef WIN32
DWORD
! win32_exec_prog(thread_arg *args)
{
int ret;--- 149,155 ----#ifdef WIN32
DWORD
! win32_exec_prog(exec_thread_arg *args)
{
int ret;*************** win32_exec_prog(thread_arg *args) *** 147,152 **** --- 162,273 ----/* + * parallel_transfer_all_new_dbs + * + * This has the same API as transfer_all_new_dbs, except it does parallel execution + * by transfering multiple tablespaces in parallel + */ + void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, + char *old_pgdata, char *new_pgdata, + char *old_tablespace) + { + #ifndef WIN32 + pid_t child; + #else + HANDLE child; + transfer_thread_arg *new_arg; + #endif + + if (user_opts.jobs <= 1) + /* throw_error must be true to allow jobs */ + transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, NULL); + else + { + /* parallel */ + #ifdef WIN32 + cur_thread_args = (void **)transfer_thread_args; + #endif + /* harvest any dead children */ + while (reap_child(false) == true) + ; + + /* must we wait for a dead child? */ + if (parallel_jobs >= user_opts.jobs) + reap_child(true); + + /* set this before we start the job */ + parallel_jobs++; + + /* Ensure stdio state is quiesced before forking */ + fflush(NULL); + + #ifndef WIN32 + child = fork(); + if (child == 0) + { + transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, + old_tablespace); + /* if we take another exit path, it will be non-zero */ + /* use _exit to skip atexit() functions */ + _exit(0); + } + else if (child < 0) + /* fork failed */ + pg_log(PG_FATAL, "could not create worker process: %s\n", strerror(errno)); + #else + if (thread_handles == NULL) + { + int i; + + thread_handles = pg_malloc(user_opts.jobs * sizeof(HANDLE)); + transfer_thread_args = pg_malloc(user_opts.jobs * sizeof(transfer_thread_arg *)); + + /* + * For safety and performance, we keep the args allocated during + * the entire life of the process, and we don't free the args + * in a thread different from the one that allocated it. + */ + for (i = 0; i < user_opts.jobs; i++) + transfer_thread_args[i] = pg_malloc(sizeof(transfer_thread_arg)); + } + + /* use first empty array element */ + new_arg = transfer_thread_args[parallel_jobs-1]; + + /* Can only pass one pointer into the function, so use a struct */ + new_arg->old_db_arr = old_db_arr; + new_arg->new_db_arr = new_db_arr; + strcpy(new_arg->old_pgdata, old_pgdata); + strcpy(new_arg->new_pgdata, new_pgdata); + strcpy(new_arg->old_tablespace, old_tablespace); + + child = (HANDLE) _beginthreadex(NULL, 0, (void *) win32_exec_prog, + new_arg, 0, NULL); + if (child == 0) + pg_log(PG_FATAL, "could not create worker thread: %s\n", strerror(errno)); + + thread_handles[parallel_jobs-1] = child; + #endif + } + + return; + } + + + #ifdef WIN32 + DWORD + win32_transfer_all_new_dbs(transfer_thread_arg *args) + { + transfer_all_new_dbs(args->old_db_arr, args->new_db_arr, args->old_pgdata, + args->new_pgdata, args->old_tablespace); + + /* terminates thread */ + return 0; + } + #endif + + + /* * collect status from a completed worker child */ bool *************** reap_child(bool wait_for_child) *** 195,201 **** /* Move last slot into dead child's position */ if (thread_num != parallel_jobs - 1) { ! thread_arg *tmp_args;thread_handles[thread_num] = thread_handles[parallel_jobs - 1];
--- 316,322 ---- /* Move last slot into dead child's position */ if (thread_num != parallel_jobs - 1) { ! void *tmp_args;thread_handles[thread_num] = thread_handles[parallel_jobs - 1];
*************** reap_child(bool wait_for_child)
*** 205,213 ****
* reused by the next created thread. Instead, the new thread
* will use the arg struct of the thread that just died.
*/
! tmp_args = thread_args[thread_num];
! thread_args[thread_num] = thread_args[parallel_jobs - 1];
! thread_args[parallel_jobs - 1] = tmp_args;
}
#endif--- 326,334 ---- * reused by the next created thread. Instead, the new thread * will use the arg struct of the thread that just died. */ ! tmp_args = cur_thread_args[thread_num]; ! cur_thread_args[thread_num] = cur_thread_args[parallel_jobs - 1]; ! cur_thread_args[parallel_jobs - 1] = tmp_args; } #endifdiff --git a/contrib/pg_upgrade/pg_upgrade.c b/contrib/pg_upgrade/pg_upgrade.c new file mode 100644 index 70c749d..85997e5 *** a/contrib/pg_upgrade/pg_upgrade.c --- b/contrib/pg_upgrade/pg_upgrade.c *************** main(int argc, char **argv) *** 133,139 **** if (user_opts.transfer_mode == TRANSFER_MODE_LINK) disable_old_cluster();! transfer_all_new_dbs(&old_cluster.dbarr, &new_cluster.dbarr,
old_cluster.pgdata, new_cluster.pgdata);/* --- 133,139 ---- if (user_opts.transfer_mode == TRANSFER_MODE_LINK) disable_old_cluster();! transfer_all_new_tablespaces(&old_cluster.dbarr, &new_cluster.dbarr,
old_cluster.pgdata, new_cluster.pgdata);/* diff --git a/contrib/pg_upgrade/pg_upgrade.h b/contrib/pg_upgrade/pg_upgrade.h new file mode 100644 index c1a2f53..d5c3fa9 *** a/contrib/pg_upgrade/pg_upgrade.h --- b/contrib/pg_upgrade/pg_upgrade.h *************** typedef struct *** 134,141 **** */ typedef struct { ! char old_dir[MAXPGPATH]; ! char new_dir[MAXPGPATH];/* * old/new relfilenodes might differ for pg_largeobject(_metadata) indexes --- 134,145 ---- */ typedef struct { ! char old_tablespace[MAXPGPATH]; ! char new_tablespace[MAXPGPATH]; ! char old_tablespace_suffix[MAXPGPATH]; ! char new_tablespace_suffix[MAXPGPATH]; ! Oid old_db_oid; ! Oid new_db_oid;/* * old/new relfilenodes might differ for pg_largeobject(_metadata) indexes *************** typedef struct *** 276,283 **** const char *progname; /* complete pathname for this program */ char *exec_path; /* full path to my executable */ char *user; /* username for clusters */ ! char **tablespaces; /* tablespaces */ ! int num_tablespaces; char **libraries; /* loadable libraries */ int num_libraries; ClusterInfo *running_cluster; --- 280,287 ---- const char *progname; /* complete pathname for this program */ char *exec_path; /* full path to my executable */ char *user; /* username for clusters */ ! char **old_tablespaces; /* tablespaces */ ! int num_old_tablespaces; char **libraries; /* loadable libraries */ int num_libraries; ClusterInfo *running_cluster; *************** void get_sock_dir(ClusterInfo *cluster, *** 398,406 **** /* relfilenode.c */void get_pg_database_relfilenode(ClusterInfo *cluster);
! void transfer_all_new_dbs(DbInfoArr *olddb_arr,
! DbInfoArr *newdb_arr, char *old_pgdata, char *new_pgdata);
!/* tablespace.c */
--- 402,412 ---- /* relfilenode.c */void get_pg_database_relfilenode(ClusterInfo *cluster);
! void transfer_all_new_tablespaces(DbInfoArr *old_db_arr,
! DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata);
! void transfer_all_new_dbs(DbInfoArr *old_db_arr,
! DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata,
! char *old_tablespace);/* tablespace.c */
*************** void old_8_3_invalidate_bpchar_pattern_o
*** 464,472 ****
char *old_8_3_create_sequence_script(ClusterInfo *cluster);/* parallel.c */
! void parallel_exec_prog(const char *log_file, const char *opt_log_file,
const char *fmt,...)
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
!
! bool reap_child(bool wait_for_child);--- 470,480 ---- char *old_8_3_create_sequence_script(ClusterInfo *cluster);/* parallel.c */
! void parallel_exec_prog(const char *log_file, const char *opt_log_file,
const char *fmt,...)
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
! void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
! char *old_pgdata, char *new_pgdata,
! char *old_tablespace);
! bool reap_child(bool wait_for_child);diff --git a/contrib/pg_upgrade/relfilenode.c b/contrib/pg_upgrade/relfilenode.c new file mode 100644 index 9d0d5a0..cfdd39c *** a/contrib/pg_upgrade/relfilenode.c --- b/contrib/pg_upgrade/relfilenode.c *************** *** 16,42 ****static void transfer_single_new_db(pageCnvCtx *pageConverter,
! FileNameMap *maps, int size);
static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
const char *suffix);/*
* transfer_all_new_dbs()
*
* Responsible for upgrading all database. invokes routines to generate mappings and then
* physically link the databases.
*/
void
! transfer_all_new_dbs(DbInfoArr *old_db_arr,
! DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata)
{
int old_dbnum,
new_dbnum;- pg_log(PG_REPORT, "%s user relation files\n", - user_opts.transfer_mode == TRANSFER_MODE_LINK ? "Linking" : "Copying"); - /* Scan the old cluster databases and transfer their files */ for (old_dbnum = new_dbnum = 0; old_dbnum < old_db_arr->ndbs; --- 16,81 ----static void transfer_single_new_db(pageCnvCtx *pageConverter,
! FileNameMap *maps, int size, char *old_tablespace);
static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
const char *suffix);/* + * transfer_all_new_tablespaces() + * + * Responsible for upgrading all database. invokes routines to generate mappings and then + * physically link the databases. + */ + void + transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, + char *old_pgdata, char *new_pgdata) + { + pg_log(PG_REPORT, "%s user relation files\n", + user_opts.transfer_mode == TRANSFER_MODE_LINK ? "Linking" : "Copying"); + + /* + * Transfering files by tablespace is tricky because a single database + * can use multiple tablespaces. For non-parallel mode, we just pass a + * NULL tablespace path, which matches all tablespaces. In parallel mode, + * we pass the default tablespace and all user-created tablespaces + * and let those operations happen in parallel. + */ + if (user_opts.jobs <= 1) + parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, + new_pgdata, NULL); + else + { + int tblnum; + + for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, + new_pgdata, os_info.old_tablespaces[tblnum]); + /* reap all children */ + while (reap_child(true) == true) + ; + } + + end_progress_output(); + check_ok(); + + return; + } + + + /* * transfer_all_new_dbs() * * Responsible for upgrading all database. invokes routines to generate mappings and then * physically link the databases. */ void ! transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, ! char *old_pgdata, char *new_pgdata, char *old_tablespace) { int old_dbnum, new_dbnum;/* Scan the old cluster databases and transfer their files */
for (old_dbnum = new_dbnum = 0;
old_dbnum < old_db_arr->ndbs;
*************** transfer_all_new_dbs(DbInfoArr *old_db_a
*** 75,89 ****
#ifdef PAGE_CONVERSION
pageConverter = setupPageConverter();
#endif
! transfer_single_new_db(pageConverter, mappings, n_maps);pg_free(mappings);
}
}- end_progress_output();
- check_ok();
-
return;
}--- 114,126 ---- #ifdef PAGE_CONVERSION pageConverter = setupPageConverter(); #endif ! transfer_single_new_db(pageConverter, mappings, n_maps, ! old_tablespace);pg_free(mappings);
}
}return;
}*************** get_pg_database_relfilenode(ClusterInfo *** 125,131 **** */ static void transfer_single_new_db(pageCnvCtx *pageConverter, ! FileNameMap *maps, int size) { int mapnum; bool vm_crashsafe_match = true; --- 162,168 ---- */ static void transfer_single_new_db(pageCnvCtx *pageConverter, ! FileNameMap *maps, int size, char *old_tablespace) { int mapnum; bool vm_crashsafe_match = true; *************** transfer_single_new_db(pageCnvCtx *pageC *** 140,157 ****for (mapnum = 0; mapnum < size; mapnum++) { ! /* transfer primary file */ ! transfer_relfile(pageConverter, &maps[mapnum], ""); ! ! /* fsm/vm files added in PG 8.4 */ ! if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804) { ! /* ! * Copy/link any fsm and vm files, if they exist ! */ ! transfer_relfile(pageConverter, &maps[mapnum], "_fsm"); ! if (vm_crashsafe_match) ! transfer_relfile(pageConverter, &maps[mapnum], "_vm"); } } } --- 177,198 ----for (mapnum = 0; mapnum < size; mapnum++)
{
! if (old_tablespace == NULL ||
! strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
{
! /* transfer primary file */
! transfer_relfile(pageConverter, &maps[mapnum], "");
!
! /* fsm/vm files added in PG 8.4 */
! if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
! {
! /*
! * Copy/link any fsm and vm files, if they exist
! */
! transfer_relfile(pageConverter, &maps[mapnum], "_fsm");
! if (vm_crashsafe_match)
! transfer_relfile(pageConverter, &maps[mapnum], "_vm");
! }
}
}
}
*************** transfer_relfile(pageCnvCtx *pageConvert
*** 187,196 ****
else
snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno);! snprintf(old_file, sizeof(old_file), "%s/%u%s%s", map->old_dir,
! map->old_relfilenode, type_suffix, extent_suffix);
! snprintf(new_file, sizeof(new_file), "%s/%u%s%s", map->new_dir,
! map->new_relfilenode, type_suffix, extent_suffix);/* Is it an extent, fsm, or vm file? */ if (type_suffix[0] != '\0' || segno != 0) --- 228,239 ---- else snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno);! snprintf(old_file, sizeof(old_file), "%s%s/%u/%u%s%s", map->old_tablespace,
! map->old_tablespace_suffix, map->old_db_oid, map->old_relfilenode,
! type_suffix, extent_suffix);
! snprintf(new_file, sizeof(new_file), "%s%s/%u/%u%s%s", map->new_tablespace,
! map->new_tablespace_suffix, map->new_db_oid, map->new_relfilenode,
! type_suffix, extent_suffix);/* Is it an extent, fsm, or vm file? */ if (type_suffix[0] != '\0' || segno != 0) *************** transfer_relfile(pageCnvCtx *pageConvert *** 239,241 **** --- 282,285 ----return; } + diff --git a/contrib/pg_upgrade/tablespace.c b/contrib/pg_upgrade/tablespace.c new file mode 100644 index a93c517..321738d *** a/contrib/pg_upgrade/tablespace.c --- b/contrib/pg_upgrade/tablespace.c *************** init_tablespaces(void) *** 23,29 **** set_tablespace_directory_suffix(&old_cluster); set_tablespace_directory_suffix(&new_cluster);! if (os_info.num_tablespaces > 0 && strcmp(old_cluster.tablespace_suffix, new_cluster.tablespace_suffix) == 0) pg_log(PG_FATAL, "Cannot upgrade to/from the same system catalog version when\n" --- 23,29 ---- set_tablespace_directory_suffix(&old_cluster); set_tablespace_directory_suffix(&new_cluster);! if (os_info.num_old_tablespaces > 0 &&
strcmp(old_cluster.tablespace_suffix, new_cluster.tablespace_suffix) == 0)
pg_log(PG_FATAL,
"Cannot upgrade to/from the same system catalog version when\n"
*************** get_tablespace_paths(void)
*** 57,72 ****res = executeQueryOrDie(conn, "%s", query);
! if ((os_info.num_tablespaces = PQntuples(res)) != 0)
! os_info.tablespaces = (char **) pg_malloc(
! os_info.num_tablespaces * sizeof(char *));
else
! os_info.tablespaces = NULL;i_spclocation = PQfnumber(res, "spclocation");
! for (tblnum = 0; tblnum < os_info.num_tablespaces; tblnum++)
! os_info.tablespaces[tblnum] = pg_strdup(
PQgetvalue(res, tblnum, i_spclocation));PQclear(res); --- 57,72 ----res = executeQueryOrDie(conn, "%s", query);
! if ((os_info.num_old_tablespaces = PQntuples(res)) != 0)
! os_info.old_tablespaces = (char **) pg_malloc(
! os_info.num_old_tablespaces * sizeof(char *));
else
! os_info.old_tablespaces = NULL;i_spclocation = PQfnumber(res, "spclocation");
! for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++)
! os_info.old_tablespaces[tblnum] = pg_strdup(
PQgetvalue(res, tblnum, i_spclocation));PQclear(res); diff --git a/doc/src/sgml/pgupgrade.sgml b/doc/src/sgml/pgupgrade.sgml new file mode 100644 index 53781e4..3e5d548 *** a/doc/src/sgml/pgupgrade.sgml --- b/doc/src/sgml/pgupgrade.sgml *************** NET STOP pgsql-8.3 (<productname>Postgr *** 342,351 ****<para>
The <option>--jobs</> option allows multiple CPU cores to be used
! to dump and reload database schemas in parallel; a good place to
! start is the number of CPU cores on the server. This option can
! dramatically reduce the time to upgrade a multi-database server
! running on a multiprocessor machine.
</para><para> --- 342,353 ----<para>
The <option>--jobs</> option allows multiple CPU cores to be used
! for file copy operations and to dump and reload database schemas in
! parallel; a good place to start is the number of CPU cores on the
! server, or the number of tablespaces if not using the
! <option>--link</> option. This option can dramatically reduce the
! time to upgrade a multi-database server running on a multiprocessor
! machine.
</para><para>
:
. traprm
export QUIET=$((QUIET + 1))
/rtmp/out
OLDREL=9.2
NEWREL=9.3
BRANCH=tablespace
SSD="f"export PGOPTIONS="-c synchronous_commit=off"
for CYCLES in 2 4 8 16 32 64 128
do
echo "$CYCLES" >> /rtmp/out
for JOBLIMIT in 1 2
do
cd /pgsql/$REL
pgsw $BRANCH
cd -
tools/setup $OLDREL $NEWREL
sleep 2
[ "$SSD" = "f" ] && tools/mv_to_archive# need for +16k
for CONFIG in /u/pgsql.old/data/postgresql.conf /u/pgsql/data/postgresql.conf
do
pipe sed 's/#max_locks_per_transaction = 64/max_locks_per_transaction = 64000/' "$CONFIG"
pipe sed 's/shared_buffers = 128MB/shared_buffers = 1GB/' "$CONFIG"
donepgstart -w /u/pgsql.old/data
for DIR in /archive/tmp/t1 /backup0/tmp/t2
do rm -rf "$DIR"
mkdir "$DIR"
chown postgres "$DIR"
chmod 0700 "$DIR"
doneecho "CREATE TABLESPACE t1 LOCATION '/archive/tmp/t1';" | sql --echo-all test
echo "CREATE TABLESPACE t2 LOCATION '/backup0/tmp/t2';" | sql --echo-all testfor SPNO in $(jot 2)
do
for TBLNO in $(jot $(($CYCLES / 2)) )
do
echo "CREATE TABLE test${SPNO}_${TBLNO} (x TEXT) TABLESPACE t$SPNO;"
echo "ALTER TABLE test${SPNO}_${TBLNO} ALTER COLUMN x SET STORAGE EXTERNAL;"
echo "INSERT INTO test${SPNO}_${TBLNO} SELECT repeat('x', 999000000);"
done |
PGOPTIONS="-c synchronous_commit=off" sql --single-transaction --echo-all test
done
pgstop /u/pgsql.old/data
sleep 2
# clear cache
echo 3 > /proc/sys/vm/drop_caches
# allow system to repopulate
sleep 15
/usr/bin/time --output=/rtmp/out --append --format '%e' tools/upgrade -j $JOBLIMIT || exit
sleep 2
done
donebell
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ It's impossible for everything to be true. +
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers