Re: pg_basebackup vs. Windows and tablespaces
Magnus Hagander wrote:
On Mon, Aug 5, 2013 at 10:03 PM, Noah Misch <noah(at)leadboat(dot)com>
wrote:
On Thu, Aug 01, 2013 at 01:04:42PM -0400, Andrew Dunstan wrote:
On 08/01/2013 12:15 PM, Noah Misch wrote:
1. Include in the base backup a file listing symbolic links/junction
points,
then have archive recovery recreate them. This file would be managed
like the
backup label file; exclusive backups would actually write it to the
master
data directory, and non-exclusive backups would incorporate it on the
fly.
pg_basebackup could also omit the actual links from its backup.
Nearly any
tar or file copy utility would then suffice.
I like #1, it seems nice and workable.
Agreed. I'll lean in that direction for resolving the proximate problem.
+1.
I had implemented the above feature which will help to
restore symlinks during archive recovery.
Implementation details:
-----------------------------------
1. This feature is implemented only for tar format in windows
as native windows utilites are not able to create symlinks while
extracting files from tar (It might be possible to create symlinks
if cygwin is installed on your system, however I feel we need this
feature to work for native windows as well). Another reason to not
create it for non-tar (plain) format is that plain format can update the
symlinks via -T option and backing up symlink file during that
operation can lead to spurious symlinks after archive recovery.
2. Symlink file format:
<oid> <linkpath>
16387 E:\PostgreSQL\tbs
Symlink file will contain entries for all the tablspaces
under pg_tblspc directory. I have kept the file name as
symlink_label (suggestion are welcome if you want some
different name for this file).
3. While taking exclusive backup, write the symlink file
in master data directory similar to backup_label file.
4. Non-exclusive backups include the symlink file in archive.
5. Archive recovery will create symlinks if symlink_label file
is present and contain information about symlinks, it will rename
the file symlink_label.old after its done with the usage of file.
6. Cancel backup will rename the file symlink_label to
symlink_label.old to avoid server trying to create symlinks
during archive recovery.
Feedback?
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
Attachments:
extend_basebackup_to_include_symlink_v1.patchapplication/octet-stream; name=extend_basebackup_to_include_symlink_v1.patchDownload
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 9f417de..eef8854 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -40,6 +40,7 @@
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
+#include "replication/basebackup.h"
#include "replication/logical.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
@@ -802,6 +803,9 @@ static void xlog_outrec(StringInfo buf, XLogRecord *record);
static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired, bool *backupFromStandby);
+#ifdef WIN32
+static bool read_symlink_label(List **tablespaces);
+#endif
static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method);
@@ -5948,6 +5952,9 @@ StartupXLOG(void)
bool wasShutdown;
bool reachedStopPoint = false;
bool haveBackupLabel = false;
+#ifdef WIN32
+ bool haveSymlinkLabel = false;
+#endif
XLogRecPtr RecPtr,
checkPointLoc,
EndOfLog;
@@ -6022,16 +6029,6 @@ StartupXLOG(void)
ValidateXLOGDirectoryStructure();
/*
- * Clear out any old relcache cache files. This is *necessary* if we do
- * any WAL replay, since that would probably result in the cache files
- * being out of sync with database reality. In theory we could leave them
- * in place if the database had been cleanly shut down, but it seems
- * safest to just remove them always and let them be rebuilt during the
- * first backend startup.
- */
- RelationCacheInitFileRemove();
-
- /*
* Initialize on the assumption we want to recover to the latest timeline
* that's active according to pg_control.
*/
@@ -6100,6 +6097,9 @@ StartupXLOG(void)
if (read_backup_label(&checkPointLoc, &backupEndRequired,
&backupFromStandby))
{
+#ifdef WIN32
+ List *tablespaces = NIL;
+#endif
/*
* Archive recovery was requested, and thanks to the backup label
* file, we know how far we need to replay to reach consistency. Enter
@@ -6144,6 +6144,61 @@ StartupXLOG(void)
errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
wasShutdown = false; /* keep compiler quiet */
}
+
+#ifdef WIN32
+ /* read the symlink file if present and create symlinks */
+ if (read_symlink_label(&tablespaces))
+ {
+ ListCell *lc;
+ struct stat st;
+
+ foreach(lc, tablespaces)
+ {
+ tablespaceinfo *ti = lfirst(lc);
+ char *linkloc;
+
+ linkloc = psprintf("pg_tblspc/%s", ti->oid);
+
+ /*
+ * Remove the existing symlink if any and Create the symlink
+ * under PGDATA. We need to use rmtree instead of rmdir as
+ * the link location might contain directories/files corresponding
+ * to actual path, some tar utilities does that way while extracting
+ * symlinks.
+ */
+ if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode))
+ {
+ if (!rmtree(linkloc,true))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove directory \"%s\": %m",
+ linkloc)));
+ }
+ else
+ {
+ if (unlink(linkloc) < 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove symbolic link \"%s\": %m",
+ linkloc)));
+ }
+
+ if (symlink(ti->path, linkloc) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create symbolic link \"%s\": %m",
+ linkloc)));
+
+ pfree(ti->oid);
+ pfree(ti->path);
+ pfree(ti);
+ }
+
+ /* set flag to delete it later */
+ haveSymlinkLabel = true;
+ }
+#endif
+
/* set flag to delete it later */
haveBackupLabel = true;
}
@@ -6218,6 +6273,20 @@ StartupXLOG(void)
}
/*
+ * Clear out any old relcache cache files. This is *necessary* if we do
+ * any WAL replay, since that would probably result in the cache files
+ * being out of sync with database reality. In theory we could leave them
+ * in place if the database had been cleanly shut down, but it seems
+ * safest to just remove them always and let them be rebuilt during the
+ * first backend startup. These files needs to be removed from all
+ * directories including pg_tblspc, however for windows the symlinks are
+ * created only after reading symlink file in case of archive recovery
+ * from backup, so needs to clear old relcache files here after creating
+ * symlinks.
+ */
+ RelationCacheInitFileRemove();
+
+ /*
* If the location of the checkpoint record is not on the expected
* timeline in the history of the requested timeline, we cannot proceed:
* the backup is not part of the history of the requested timeline.
@@ -6470,6 +6539,25 @@ StartupXLOG(void)
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+#ifdef WIN32
+ /*
+ * If there was a symlink label file, it's done its job and the
+ * symlinks have been created. We must get rid of the label file
+ * so that if we crash during recovery, we don't create symlinks
+ * again. It seems prudent though to just rename the file out of
+ * the way rather than delete it completely.
+ */
+ if (haveSymlinkLabel)
+ {
+ unlink(SYMLINK_LABEL_OLD);
+ if (rename(SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not rename file \"%s\" to \"%s\": %m",
+ SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD)));
+ }
+#endif
+
/* Check that the GUCs used to generate the WAL allow recovery */
CheckRequiredParameterValues();
@@ -9607,16 +9695,21 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*
* There are two kind of backups: exclusive and non-exclusive. An exclusive
* backup is started with pg_start_backup(), and there can be only one active
- * at a time. The backup label file of an exclusive backup is written to
- * $PGDATA/backup_label, and it is removed by pg_stop_backup().
+ * at a time. The backup and symlink label files of an exclusive backup are
+ * written to $PGDATA/backup_label and $PGDATA/symlink_label, and they are
+ * removed by pg_stop_backup().
*
* A non-exclusive backup is used for the streaming base backups (see
* src/backend/replication/basebackup.c). The difference to exclusive backups
- * is that the backup label file is not written to disk. Instead, its would-be
- * contents are returned in *labelfile, and the caller is responsible for
- * including it in the backup archive as 'backup_label'. There can be many
- * non-exclusive backups active at the same time, and they don't conflict
- * with an exclusive backup either.
+ * is that the backup, symlink label files are not written to disk. Instead,
+ * there would-be contents are returned in *labelfile and *symlinkfile, and
+ * the caller is responsible for including them in the backup archive as
+ * 'backup_label' and 'symlink_label'. There can be many non-exclusive backups
+ * active at the same time, and they don't conflict with an exclusive backup
+ * either.
+ *
+ * symlinkfile is required only for tar format in windows as native windows
+ * utilites are not able create symlinks while extracting files from tar.
*
* Returns the minimum WAL position that must be present to restore from this
* backup, and the corresponding timeline ID in *starttli_p.
@@ -9629,7 +9722,8 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*/
XLogRecPtr
do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
- char **labelfile)
+ char **labelfile, DIR *tblspcdir, List **tablespaces,
+ char **symlinkfile, bool infotbssize)
{
bool exclusive = (labelfile == NULL);
bool backup_started_in_recovery = false;
@@ -9643,6 +9737,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
struct stat stat_buf;
FILE *fp;
StringInfoData labelfbuf;
+ StringInfoData symlinkfbuf;
backup_started_in_recovery = RecoveryInProgress();
@@ -9713,6 +9808,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
{
bool gotUniqueStartpoint = false;
+ struct dirent *de;
+ tablespaceinfo *ti;
+ int datadirpathlen;
/*
* Force an XLOG file switch before the checkpoint, to ensure that the
@@ -9835,6 +9933,78 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
XLogFileName(xlogfilename, ThisTimeLineID, _logSegNo);
/*
+ * Construct symlink file
+ */
+ initStringInfo(&symlinkfbuf);
+
+ datadirpathlen = strlen(DataDir);
+
+ /* Collect information about all tablespaces */
+ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+ {
+ char fullpath[MAXPGPATH];
+ char linkpath[MAXPGPATH];
+ char *relpath = NULL;
+ int rllen;
+
+ /* Skip special stuff */
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+#if defined(HAVE_READLINK) || defined(WIN32)
+ rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+ if (rllen < 0)
+ {
+ ereport(WARNING,
+ (errmsg("could not read symbolic link \"%s\": %m",
+ fullpath)));
+ continue;
+ }
+ else if (rllen >= sizeof(linkpath))
+ {
+ ereport(WARNING,
+ (errmsg("symbolic link \"%s\" target is too long",
+ fullpath)));
+ continue;
+ }
+ linkpath[rllen] = '\0';
+
+ /*
+ * Relpath holds the relative path of the tablespace directory
+ * when it's located within PGDATA, or NULL if it's located
+ * elsewhere.
+ */
+ if (rllen > datadirpathlen &&
+ strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+ IS_DIR_SEP(linkpath[datadirpathlen]))
+ relpath = linkpath + datadirpathlen + 1;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(de->d_name);
+ ti->path = pstrdup(linkpath);
+ ti->rpath = relpath ? pstrdup(relpath) : NULL;
+ ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+
+ if(tablespaces)
+ *tablespaces = lappend(*tablespaces, ti);
+
+ appendStringInfo(&symlinkfbuf, "%s %s\n", ti->oid, ti->path);
+#else
+
+ /*
+ * If the platform does not have symbolic links, it should not be
+ * possible to have tablespaces - clearly somebody else created
+ * them. Warn about it and ignore.
+ */
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+#endif
+ }
+
+ /*
* Construct backup label file
*/
initStringInfo(&labelfbuf);
@@ -9897,9 +10067,55 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
errmsg("could not write file \"%s\": %m",
BACKUP_LABEL_FILE)));
pfree(labelfbuf.data);
+
+#ifdef WIN32
+ /* Write backup symlink file. */
+ if (symlinkfbuf.len > 0)
+ {
+ if (stat(SYMLINK_LABEL_FILE, &stat_buf) != 0)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("a backup is already in progress"),
+ errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
+ SYMLINK_LABEL_FILE)));
+
+
+ fp = AllocateFile(SYMLINK_LABEL_FILE, "w");
+
+ if (!fp)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ if (fwrite(symlinkfbuf.data, symlinkfbuf.len, 1, fp) != 1 ||
+ fflush(fp) != 0 ||
+ pg_fsync(fileno(fp)) != 0 ||
+ ferror(fp) ||
+ FreeFile(fp))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ }
+#endif
+ pfree(symlinkfbuf.data);
}
else
+ {
*labelfile = labelfbuf.data;
+#ifdef WIN32
+ if (symlinkfbuf.len > 0)
+ *symlinkfile = symlinkfbuf.data;
+#endif
+ }
}
PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
@@ -10071,6 +10287,14 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
BACKUP_LABEL_FILE)));
+
+#ifdef WIN32
+ /*
+ * Remove symlink file if present, symlink file is created
+ * only if there are tablespaces.
+ */
+ unlink(SYMLINK_LABEL_FILE);
+#endif
}
/*
@@ -10481,6 +10705,74 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
return true;
}
+#ifdef WIN32
+/*
+ * read_symlink_label: check to see if a symlink_label file is present
+ *
+ * If we see a symlink_label during recovery, we assume that we are recovering
+ * from a backup dump file, and we therefore need to create symlinks as per
+ * the information present in symlink file.
+ *
+ * Returns TRUE if a symlink_label was found (and fills the link information
+ * for all the tablespace links present in file); returns FALSE if not.
+ */
+static bool
+read_symlink_label(List **tablespaces)
+{
+ tablespaceinfo *ti;
+ FILE *lfp;
+ char ch;
+ char tbsoid[MAXPGPATH];
+ char tbslinkpath[MAXPGPATH];
+
+ /*
+ * See if symlink file is present
+ */
+ lfp = AllocateFile(SYMLINK_LABEL_FILE, "r");
+ if (!lfp)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ return false; /* it's not there, all is fine */
+ }
+
+ /*
+ * Read and parse the link name and path lines from symlink file (this code
+ * is pretty crude, but we are not expecting any variability in the file
+ * format).
+ */
+ while (!feof(lfp))
+ {
+ ti = palloc(sizeof(tablespaceinfo));
+ if (fscanf(lfp, "%s %s%c", tbsoid, tbslinkpath, &ch) != 3 || ch != '\n')
+ {
+ if (feof(lfp))
+ break;
+ else
+ ereport(FATAL,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("invalid data in file \"%s\"", SYMLINK_LABEL_FILE)));
+ }
+
+ ti->oid = pstrdup(tbsoid);
+ ti->path = pstrdup(tbslinkpath);
+
+ *tablespaces = lappend(*tablespaces, ti);
+ }
+
+ if (ferror(lfp) || FreeFile(lfp))
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+
+ return true;
+}
+#endif
+
/*
* Error context callback for errors occurring during rm_redo().
*/
@@ -10514,11 +10806,14 @@ BackupInProgress(void)
}
/*
- * CancelBackup: rename the "backup_label" file to cancel backup mode
+ * CancelBackup: rename the "backup_label"/"symlink_label" file to cancel
+ * backup mode
*
* If the "backup_label" file exists, it will be renamed to "backup_label.old".
- * Note that this will render an online backup in progress useless.
- * To correctly finish an online backup, pg_stop_backup must be called.
+ * Similarly, if the "symlink_label" file exists, it will be renamed to
+ * "symlink_label.old". Note that this will render an online backup in progress
+ * useless. To correctly finish an online backup, pg_stop_backup must be
+ * called.
*/
void
CancelBackup(void)
@@ -10547,6 +10842,31 @@ CancelBackup(void)
errdetail("Could not rename \"%s\" to \"%s\": %m.",
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+
+#ifdef WIN32
+ /* if the file is not there, return */
+ if (stat(SYMLINK_LABEL_FILE, &stat_buf) < 0)
+ return;
+
+ /* remove leftover file from previously canceled backup if it exists */
+ unlink(SYMLINK_LABEL_OLD);
+
+ if (rename(SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD) == 0)
+ {
+ ereport(LOG,
+ (errmsg("online backup mode canceled"),
+ errdetail("\"%s\" was renamed to \"%s\".",
+ SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD)));
+ }
+ else
+ {
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("online backup mode was not canceled"),
+ errdetail("Could not rename \"%s\" to \"%s\": %m.",
+ SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD)));
+ }
+#endif
}
/*
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 8a87581..f66ade1 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -51,6 +51,7 @@ pg_start_backup(PG_FUNCTION_ARGS)
bool fast = PG_GETARG_BOOL(1);
char *backupidstr;
XLogRecPtr startpoint;
+ DIR *dir;
backupidstr = text_to_cstring(backupid);
@@ -59,7 +60,16 @@ pg_start_backup(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser or replication role to run a backup")));
- startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL);
+ /* Make sure we can open the directory with tablespaces in it */
+ dir = AllocateDir("pg_tblspc");
+ if (!dir)
+ ereport(ERROR,
+ (errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
+
+ startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL,
+ dir, NULL, NULL, false);
+
+ FreeDir(dir);
PG_RETURN_LSN(startpoint);
}
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index fbcecbb..8925148 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -46,11 +46,12 @@ typedef struct
bool nowait;
bool includewal;
uint32 maxrate;
+ bool sendsymlinkfile;
} basebackup_options;
-static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces);
-static int64 sendTablespace(char *path, bool sizeonly);
+static int64 sendDir(char *path, int basepathlen, bool sizeonly,
+ List *tablespaces, bool sendsymlinkinfo);
static bool sendFile(char *readfilename, char *tarfilename,
struct stat * statbuf, bool missing_ok);
static void sendFileWithContent(const char *filename, const char *content);
@@ -93,15 +94,6 @@ static int64 elapsed_min_unit;
/* The last check of the transfer rate. */
static int64 throttled_last;
-typedef struct
-{
- char *oid;
- char *path;
- char *rpath; /* relative path within PGDATA, or NULL */
- int64 size;
-} tablespaceinfo;
-
-
/*
* Called when ERROR or FATAL happens in perform_base_backup() after
* we have started the backup - make sure we end it!
@@ -126,14 +118,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
XLogRecPtr endptr;
TimeLineID endtli;
char *labelfile;
+ char *symlinkfile = NULL;
int datadirpathlen;
+ List *tablespaces = NIL;
datadirpathlen = strlen(DataDir);
backup_started_in_recovery = RecoveryInProgress();
startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
- &labelfile);
+ &labelfile, tblspcdir, &tablespaces,
+ &symlinkfile, opt->progress);
/*
* Once do_pg_start_backup has been called, ensure that any failure causes
* us to abort the backup so we don't "leak" a backup counter. For this reason,
@@ -143,9 +138,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
{
- List *tablespaces = NIL;
ListCell *lc;
- struct dirent *de;
tablespaceinfo *ti;
SendXlogRecPtrResult(startptr, starttli);
@@ -162,70 +155,9 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
else
statrelpath = pgstat_stat_directory;
- /* Collect information about all tablespaces */
- while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
- {
- char fullpath[MAXPGPATH];
- char linkpath[MAXPGPATH];
- char *relpath = NULL;
- int rllen;
-
- /* Skip special stuff */
- if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
- continue;
-
- snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-#if defined(HAVE_READLINK) || defined(WIN32)
- rllen = readlink(fullpath, linkpath, sizeof(linkpath));
- if (rllen < 0)
- {
- ereport(WARNING,
- (errmsg("could not read symbolic link \"%s\": %m",
- fullpath)));
- continue;
- }
- else if (rllen >= sizeof(linkpath))
- {
- ereport(WARNING,
- (errmsg("symbolic link \"%s\" target is too long",
- fullpath)));
- continue;
- }
- linkpath[rllen] = '\0';
-
- /*
- * Relpath holds the relative path of the tablespace directory
- * when it's located within PGDATA, or NULL if it's located
- * elsewhere.
- */
- if (rllen > datadirpathlen &&
- strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
- IS_DIR_SEP(linkpath[datadirpathlen]))
- relpath = linkpath + datadirpathlen + 1;
-
- ti = palloc(sizeof(tablespaceinfo));
- ti->oid = pstrdup(de->d_name);
- ti->path = pstrdup(linkpath);
- ti->rpath = relpath ? pstrdup(relpath) : NULL;
- ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
- tablespaces = lappend(tablespaces, ti);
-#else
-
- /*
- * If the platform does not have symbolic links, it should not be
- * possible to have tablespaces - clearly somebody else created
- * them. Warn about it and ignore.
- */
- ereport(WARNING,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("tablespaces are not supported on this platform")));
-#endif
- }
-
/* Add a node for the base directory at the end */
ti = palloc0(sizeof(tablespaceinfo));
- ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
+ ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
tablespaces = lappend(tablespaces, ti);
/* Send tablespace header */
@@ -274,8 +206,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* In the main tar, include the backup_label first... */
sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
- /* ... then the bulk of the files ... */
- sendDir(".", 1, false, tablespaces);
+ /*
+ * Send symlink file if required and then the bulk of the
+ * files
+ */
+ if (symlinkfile && opt->sendsymlinkfile)
+ {
+ sendFileWithContent(SYMLINK_LABEL_FILE, symlinkfile);
+ sendDir(".", 1, false, tablespaces, false);
+ }
+ else
+ sendDir(".", 1, false, tablespaces, true);
/* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -555,6 +496,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_nowait = false;
bool o_wal = false;
bool o_maxrate = false;
+ bool o_sendsymlinkfile = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -625,6 +567,20 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->maxrate = (uint32) maxrate;
o_maxrate = true;
}
+ else if (strcmp(defel->defname, "tar") == 0)
+ {
+ /*
+ * symlinkfile is required only for tar format in windows as
+ * native windows utilites are not able create symlinks while
+ * extracting files from tar.
+ */
+ if (o_sendsymlinkfile)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ opt->sendsymlinkfile = true;
+ o_sendsymlinkfile = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -853,7 +809,7 @@ sendFileWithContent(const char *filename, const char *content)
*
* Only used to send auxiliary tablespaces, not PGDATA.
*/
-static int64
+int64
sendTablespace(char *path, bool sizeonly)
{
int64 size;
@@ -887,7 +843,7 @@ sendTablespace(char *path, bool sizeonly)
size = 512; /* Size of the header just added */
/* Send all the files in the tablespace version directory */
- size += sendDir(pathbuf, strlen(path), sizeonly, NIL);
+ size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
return size;
}
@@ -899,9 +855,14 @@ sendTablespace(char *path, bool sizeonly)
*
* Omit any directory in the tablespaces list, to avoid backing up
* tablespaces twice when they were created inside PGDATA.
+ *
+ * If sendsymlinkinfo is false, we don't need to include symlink
+ * information inside tar file as it will be sent separately in
+ * symlink_label file.
*/
static int64
-sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
+sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
+ bool sendsymlinkinfo)
{
DIR *dir;
struct dirent *de;
@@ -929,13 +890,17 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
continue;
/*
- * If there's a backup_label file, it belongs to a backup started by
- * the user with pg_start_backup(). It is *not* correct for this
- * backup, our backup_label is injected into the tar separately.
+ * If there's a backup_label or symlink file, it belongs to a backup
+ * started by the user with pg_start_backup(). It is *not* correct for
+ * this backup, our backup_label/symlink is injected into the tar
+ * separately.
*/
if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
continue;
+ if (strcmp(de->d_name, SYMLINK_LABEL_FILE) == 0)
+ continue;
+
/*
* Check if the postmaster has signaled us to exit, and abort with an
* error in that case. The error handler further up will call
@@ -1026,6 +991,7 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
/* Allow symbolic links in pg_tblspc only */
if (strcmp(path, "./pg_tblspc") == 0 &&
+ sendsymlinkinfo &&
#ifndef WIN32
S_ISLNK(statbuf.st_mode)
#else
@@ -1100,7 +1066,7 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
}
}
if (!skip_this_dir)
- size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces);
+ size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, true);
}
else if (S_ISREG(statbuf.st_mode))
{
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 154aaac..25c7311 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -71,6 +71,7 @@ Node *replication_parse_result;
%token K_NOWAIT
%token K_MAX_RATE
%token K_WAL
+%token K_TAR
%token K_TIMELINE
%token K_PHYSICAL
%token K_LOGICAL
@@ -119,7 +120,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d] [TAR]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -168,6 +169,11 @@ base_backup_opt:
$$ = makeDefElem("max_rate",
(Node *)makeInteger($2));
}
+ | K_TAR
+ {
+ $$ = makeDefElem("tar",
+ (Node *)makeInteger(TRUE));
+ }
;
create_replication_slot:
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index a257124..e373d51 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -88,6 +88,7 @@ NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
+TAR { return K_TAR; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
CREATE_REPLICATION_SLOT { return K_CREATE_REPLICATION_SLOT; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index b119fc0..4786e68 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -1663,13 +1663,14 @@ BaseBackup(void)
maxrate_clause = psprintf("MAX_RATE %u", maxrate);
basebkp =
- psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
+ psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
includewal ? "NOWAIT" : "",
- maxrate_clause ? maxrate_clause : "");
+ maxrate_clause ? maxrate_clause : "",
+ format == 't' ? "TAR": "");
if (PQsendQuery(conn, basebkp) == 0)
{
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 85f9cb7..f18e5ea 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -15,7 +15,9 @@
#include "access/xlogdefs.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
+#include "nodes/pg_list.h"
#include "storage/buf.h"
+#include "storage/fd.h"
#include "utils/pg_crc.h"
/*
@@ -344,7 +346,8 @@ extern void SetWalWriterSleeping(bool sleeping);
* Starting/stopping a base backup
*/
extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
- TimeLineID *starttli_p, char **labelfile);
+ TimeLineID *starttli_p, char **labelfile, DIR *tblspcdir,
+ List **tablespaces, char **symlinkfile, bool infotbssize);
extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
TimeLineID *stoptli_p);
extern void do_pg_abort_backup(void);
@@ -353,4 +356,7 @@ extern void do_pg_abort_backup(void);
#define BACKUP_LABEL_FILE "backup_label"
#define BACKUP_LABEL_OLD "backup_label.old"
+#define SYMLINK_LABEL_FILE "symlink_label"
+#define SYMLINK_LABEL_OLD "symlink_label.old"
+
#endif /* XLOG_H */
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 988bce7..3540602 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -21,6 +21,16 @@
#define MAX_RATE_UPPER 1048576
+typedef struct
+{
+ char *oid;
+ char *path;
+ char *rpath; /* relative path within PGDATA, or NULL */
+ int64 size;
+} tablespaceinfo;
+
extern void SendBaseBackup(BaseBackupCmd *cmd);
+extern int64 sendTablespace(char *path, bool sizeonly);
+
#endif /* _BASEBACKUP_H */
15 July 2014 19:29 Amit Kapila Wrote,
Implementation details:
-----------------------------------
1. This feature is implemented only for tar format in windows
as native windows utilites are not able to create symlinks while
extracting files from tar (It might be possible to create symlinks
if cygwin is installed on your system, however I feel we need this
feature to work for native windows as well). Another reason to not
create it for non-tar (plain) format is that plain format can update the
symlinks via -T option and backing up symlink file during that
operation can lead to spurious symlinks after archive recovery.
I have reviewed the patch and did not find any major comments.
There are some comments I would like to share with you
1. Rebase the patch to current GIT head.
2. + * Construct symlink file
+ */
+ initStringInfo(&symlinkfbuf);
I think declaration and initialization of symlinkfbuf string can be moved under #ifdef WIN32 compile time macro,
for other platform it’s simply allocated and freed which can be avoided.
3. + /*
+ * native windows utilites are not able create symlinks while
+ * extracting files from tar.
+ */
Rephrase the above sentence and fix spelling mistake (utilities are not able to create)
I haven’t done the testing yet, once I finish with testing i will share the result with you.
Regards,
Dilip
On Wed, Aug 20, 2014 at 12:12 PM, Dilip kumar <dilip.kumar@huawei.com>
wrote:
I have reviewed the patch and did not find any major comments.
Thanks for the review.
There are some comments I would like to share with you
1. Rebase the patch to current GIT head.
Done.
2. + * Construct symlink file
+ */
+ initStringInfo(&symlinkfbuf);
I think declaration and initialization of symlinkfbuf string
can be moved under #ifdef WIN32 compile time macro,
for other platform it’s simply allocated and freed which can be avoided.
Agreed, I have changed the patch as per your suggestion.
3. + /*
+ * native windows utilites are not able
create symlinks while
+ * extracting files from tar.
+ */
Rephrase the above sentence and fix spelling mistake
(utilities are not able to create)
Done.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
Attachments:
extend_basebackup_to_include_symlink_v2.patchapplication/octet-stream; name=extend_basebackup_to_include_symlink_v2.patchDownload
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 34f2fc0..90f17e9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -40,6 +40,7 @@
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
+#include "replication/basebackup.h"
#include "replication/logical.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
@@ -802,6 +803,9 @@ static void xlog_outrec(StringInfo buf, XLogRecord *record);
static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired, bool *backupFromStandby);
+#ifdef WIN32
+static bool read_symlink_label(List **tablespaces);
+#endif
static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method);
@@ -6004,6 +6008,9 @@ StartupXLOG(void)
bool wasShutdown;
bool reachedStopPoint = false;
bool haveBackupLabel = false;
+#ifdef WIN32
+ bool haveSymlinkLabel = false;
+#endif
XLogRecPtr RecPtr,
checkPointLoc,
EndOfLog;
@@ -6078,16 +6085,6 @@ StartupXLOG(void)
ValidateXLOGDirectoryStructure();
/*
- * Clear out any old relcache cache files. This is *necessary* if we do
- * any WAL replay, since that would probably result in the cache files
- * being out of sync with database reality. In theory we could leave them
- * in place if the database had been cleanly shut down, but it seems
- * safest to just remove them always and let them be rebuilt during the
- * first backend startup.
- */
- RelationCacheInitFileRemove();
-
- /*
* Initialize on the assumption we want to recover to the latest timeline
* that's active according to pg_control.
*/
@@ -6156,6 +6153,9 @@ StartupXLOG(void)
if (read_backup_label(&checkPointLoc, &backupEndRequired,
&backupFromStandby))
{
+#ifdef WIN32
+ List *tablespaces = NIL;
+#endif
/*
* Archive recovery was requested, and thanks to the backup label
* file, we know how far we need to replay to reach consistency. Enter
@@ -6200,6 +6200,61 @@ StartupXLOG(void)
errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
wasShutdown = false; /* keep compiler quiet */
}
+
+#ifdef WIN32
+ /* read the symlink file if present and create symlinks */
+ if (read_symlink_label(&tablespaces))
+ {
+ ListCell *lc;
+ struct stat st;
+
+ foreach(lc, tablespaces)
+ {
+ tablespaceinfo *ti = lfirst(lc);
+ char *linkloc;
+
+ linkloc = psprintf("pg_tblspc/%s", ti->oid);
+
+ /*
+ * Remove the existing symlink if any and Create the symlink
+ * under PGDATA. We need to use rmtree instead of rmdir as
+ * the link location might contain directories/files corresponding
+ * to actual path, some tar utilities does that way while extracting
+ * symlinks.
+ */
+ if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode))
+ {
+ if (!rmtree(linkloc,true))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove directory \"%s\": %m",
+ linkloc)));
+ }
+ else
+ {
+ if (unlink(linkloc) < 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove symbolic link \"%s\": %m",
+ linkloc)));
+ }
+
+ if (symlink(ti->path, linkloc) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create symbolic link \"%s\": %m",
+ linkloc)));
+
+ pfree(ti->oid);
+ pfree(ti->path);
+ pfree(ti);
+ }
+
+ /* set flag to delete it later */
+ haveSymlinkLabel = true;
+ }
+#endif
+
/* set flag to delete it later */
haveBackupLabel = true;
}
@@ -6274,6 +6329,20 @@ StartupXLOG(void)
}
/*
+ * Clear out any old relcache cache files. This is *necessary* if we do
+ * any WAL replay, since that would probably result in the cache files
+ * being out of sync with database reality. In theory we could leave them
+ * in place if the database had been cleanly shut down, but it seems
+ * safest to just remove them always and let them be rebuilt during the
+ * first backend startup. These files needs to be removed from all
+ * directories including pg_tblspc, however for windows the symlinks are
+ * created only after reading symlink file in case of archive recovery
+ * from backup, so needs to clear old relcache files here after creating
+ * symlinks.
+ */
+ RelationCacheInitFileRemove();
+
+ /*
* If the location of the checkpoint record is not on the expected
* timeline in the history of the requested timeline, we cannot proceed:
* the backup is not part of the history of the requested timeline.
@@ -6526,6 +6595,25 @@ StartupXLOG(void)
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+#ifdef WIN32
+ /*
+ * If there was a symlink label file, it's done its job and the
+ * symlinks have been created. We must get rid of the label file
+ * so that if we crash during recovery, we don't create symlinks
+ * again. It seems prudent though to just rename the file out of
+ * the way rather than delete it completely.
+ */
+ if (haveSymlinkLabel)
+ {
+ unlink(SYMLINK_LABEL_OLD);
+ if (rename(SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not rename file \"%s\" to \"%s\": %m",
+ SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD)));
+ }
+#endif
+
/* Check that the GUCs used to generate the WAL allow recovery */
CheckRequiredParameterValues();
@@ -9780,16 +9868,21 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*
* There are two kind of backups: exclusive and non-exclusive. An exclusive
* backup is started with pg_start_backup(), and there can be only one active
- * at a time. The backup label file of an exclusive backup is written to
- * $PGDATA/backup_label, and it is removed by pg_stop_backup().
+ * at a time. The backup and symlink label files of an exclusive backup are
+ * written to $PGDATA/backup_label and $PGDATA/symlink_label, and they are
+ * removed by pg_stop_backup().
*
* A non-exclusive backup is used for the streaming base backups (see
* src/backend/replication/basebackup.c). The difference to exclusive backups
- * is that the backup label file is not written to disk. Instead, its would-be
- * contents are returned in *labelfile, and the caller is responsible for
- * including it in the backup archive as 'backup_label'. There can be many
- * non-exclusive backups active at the same time, and they don't conflict
- * with an exclusive backup either.
+ * is that the backup, symlink label files are not written to disk. Instead,
+ * there would-be contents are returned in *labelfile and *symlinkfile, and
+ * the caller is responsible for including them in the backup archive as
+ * 'backup_label' and 'symlink_label'. There can be many non-exclusive backups
+ * active at the same time, and they don't conflict with an exclusive backup
+ * either.
+ *
+ * symlinkfile is required only for tar format in windows as native windows
+ * utilities are not able to create symlinks while extracting files from tar.
*
* Returns the minimum WAL position that must be present to restore from this
* backup, and the corresponding timeline ID in *starttli_p.
@@ -9802,7 +9895,8 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*/
XLogRecPtr
do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
- char **labelfile)
+ char **labelfile, DIR *tblspcdir, List **tablespaces,
+ char **symlinkfile, bool infotbssize)
{
bool exclusive = (labelfile == NULL);
bool backup_started_in_recovery = false;
@@ -9816,6 +9910,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
struct stat stat_buf;
FILE *fp;
StringInfoData labelfbuf;
+#ifdef WIN32
+ StringInfoData symlinkfbuf;
+#endif
backup_started_in_recovery = RecoveryInProgress();
@@ -9886,6 +9983,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
{
bool gotUniqueStartpoint = false;
+ struct dirent *de;
+ tablespaceinfo *ti;
+ int datadirpathlen;
/*
* Force an XLOG file switch before the checkpoint, to ensure that the
@@ -10007,6 +10107,82 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
XLByteToSeg(startpoint, _logSegNo);
XLogFileName(xlogfilename, ThisTimeLineID, _logSegNo);
+#ifdef WIN32
+ /*
+ * Construct symlink file
+ */
+ initStringInfo(&symlinkfbuf);
+#endif
+
+ datadirpathlen = strlen(DataDir);
+
+ /* Collect information about all tablespaces */
+ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+ {
+ char fullpath[MAXPGPATH];
+ char linkpath[MAXPGPATH];
+ char *relpath = NULL;
+ int rllen;
+
+ /* Skip special stuff */
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+#if defined(HAVE_READLINK) || defined(WIN32)
+ rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+ if (rllen < 0)
+ {
+ ereport(WARNING,
+ (errmsg("could not read symbolic link \"%s\": %m",
+ fullpath)));
+ continue;
+ }
+ else if (rllen >= sizeof(linkpath))
+ {
+ ereport(WARNING,
+ (errmsg("symbolic link \"%s\" target is too long",
+ fullpath)));
+ continue;
+ }
+ linkpath[rllen] = '\0';
+
+ /*
+ * Relpath holds the relative path of the tablespace directory
+ * when it's located within PGDATA, or NULL if it's located
+ * elsewhere.
+ */
+ if (rllen > datadirpathlen &&
+ strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+ IS_DIR_SEP(linkpath[datadirpathlen]))
+ relpath = linkpath + datadirpathlen + 1;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(de->d_name);
+ ti->path = pstrdup(linkpath);
+ ti->rpath = relpath ? pstrdup(relpath) : NULL;
+ ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+
+ if(tablespaces)
+ *tablespaces = lappend(*tablespaces, ti);
+
+#ifdef WIN32
+ appendStringInfo(&symlinkfbuf, "%s %s\n", ti->oid, ti->path);
+#endif
+#else
+
+ /*
+ * If the platform does not have symbolic links, it should not be
+ * possible to have tablespaces - clearly somebody else created
+ * them. Warn about it and ignore.
+ */
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+#endif
+ }
+
/*
* Construct backup label file
*/
@@ -10070,9 +10246,55 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
errmsg("could not write file \"%s\": %m",
BACKUP_LABEL_FILE)));
pfree(labelfbuf.data);
+
+#ifdef WIN32
+ /* Write backup symlink file. */
+ if (symlinkfbuf.len > 0)
+ {
+ if (stat(SYMLINK_LABEL_FILE, &stat_buf) != 0)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("a backup is already in progress"),
+ errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
+ SYMLINK_LABEL_FILE)));
+
+ fp = AllocateFile(SYMLINK_LABEL_FILE, "w");
+
+ if (!fp)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ if (fwrite(symlinkfbuf.data, symlinkfbuf.len, 1, fp) != 1 ||
+ fflush(fp) != 0 ||
+ pg_fsync(fileno(fp)) != 0 ||
+ ferror(fp) ||
+ FreeFile(fp))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ }
+
+ pfree(symlinkfbuf.data);
+#endif
}
else
+ {
*labelfile = labelfbuf.data;
+#ifdef WIN32
+ if (symlinkfbuf.len > 0)
+ *symlinkfile = symlinkfbuf.data;
+#endif
+ }
}
PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
@@ -10244,6 +10466,14 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
BACKUP_LABEL_FILE)));
+
+#ifdef WIN32
+ /*
+ * Remove symlink file if present, symlink file is created
+ * only if there are tablespaces.
+ */
+ unlink(SYMLINK_LABEL_FILE);
+#endif
}
/*
@@ -10654,6 +10884,74 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
return true;
}
+#ifdef WIN32
+/*
+ * read_symlink_label: check to see if a symlink_label file is present
+ *
+ * If we see a symlink_label during recovery, we assume that we are recovering
+ * from a backup dump file, and we therefore need to create symlinks as per
+ * the information present in symlink file.
+ *
+ * Returns TRUE if a symlink_label was found (and fills the link information
+ * for all the tablespace links present in file); returns FALSE if not.
+ */
+static bool
+read_symlink_label(List **tablespaces)
+{
+ tablespaceinfo *ti;
+ FILE *lfp;
+ char ch;
+ char tbsoid[MAXPGPATH];
+ char tbslinkpath[MAXPGPATH];
+
+ /*
+ * See if symlink file is present
+ */
+ lfp = AllocateFile(SYMLINK_LABEL_FILE, "r");
+ if (!lfp)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ return false; /* it's not there, all is fine */
+ }
+
+ /*
+ * Read and parse the link name and path lines from symlink file (this code
+ * is pretty crude, but we are not expecting any variability in the file
+ * format).
+ */
+ while (!feof(lfp))
+ {
+ ti = palloc(sizeof(tablespaceinfo));
+ if (fscanf(lfp, "%s %s%c", tbsoid, tbslinkpath, &ch) != 3 || ch != '\n')
+ {
+ if (feof(lfp))
+ break;
+ else
+ ereport(FATAL,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("invalid data in file \"%s\"", SYMLINK_LABEL_FILE)));
+ }
+
+ ti->oid = pstrdup(tbsoid);
+ ti->path = pstrdup(tbslinkpath);
+
+ *tablespaces = lappend(*tablespaces, ti);
+ }
+
+ if (ferror(lfp) || FreeFile(lfp))
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+
+ return true;
+}
+#endif
+
/*
* Error context callback for errors occurring during rm_redo().
*/
@@ -10687,11 +10985,14 @@ BackupInProgress(void)
}
/*
- * CancelBackup: rename the "backup_label" file to cancel backup mode
+ * CancelBackup: rename the "backup_label"/"symlink_label" file to cancel
+ * backup mode
*
* If the "backup_label" file exists, it will be renamed to "backup_label.old".
- * Note that this will render an online backup in progress useless.
- * To correctly finish an online backup, pg_stop_backup must be called.
+ * Similarly, if the "symlink_label" file exists, it will be renamed to
+ * "symlink_label.old". Note that this will render an online backup in progress
+ * useless. To correctly finish an online backup, pg_stop_backup must be
+ * called.
*/
void
CancelBackup(void)
@@ -10720,6 +11021,31 @@ CancelBackup(void)
errdetail("Could not rename \"%s\" to \"%s\": %m.",
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+
+#ifdef WIN32
+ /* if the file is not there, return */
+ if (stat(SYMLINK_LABEL_FILE, &stat_buf) < 0)
+ return;
+
+ /* remove leftover file from previously canceled backup if it exists */
+ unlink(SYMLINK_LABEL_OLD);
+
+ if (rename(SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD) == 0)
+ {
+ ereport(LOG,
+ (errmsg("online backup mode canceled"),
+ errdetail("\"%s\" was renamed to \"%s\".",
+ SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD)));
+ }
+ else
+ {
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("online backup mode was not canceled"),
+ errdetail("Could not rename \"%s\" to \"%s\": %m.",
+ SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD)));
+ }
+#endif
}
/*
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index f186468..33a1ab2 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -51,6 +51,7 @@ pg_start_backup(PG_FUNCTION_ARGS)
bool fast = PG_GETARG_BOOL(1);
char *backupidstr;
XLogRecPtr startpoint;
+ DIR *dir;
backupidstr = text_to_cstring(backupid);
@@ -59,7 +60,16 @@ pg_start_backup(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser or replication role to run a backup")));
- startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL);
+ /* Make sure we can open the directory with tablespaces in it */
+ dir = AllocateDir("pg_tblspc");
+ if (!dir)
+ ereport(ERROR,
+ (errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
+
+ startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL,
+ dir, NULL, NULL, false);
+
+ FreeDir(dir);
PG_RETURN_LSN(startpoint);
}
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index fbcecbb..8925148 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -46,11 +46,12 @@ typedef struct
bool nowait;
bool includewal;
uint32 maxrate;
+ bool sendsymlinkfile;
} basebackup_options;
-static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces);
-static int64 sendTablespace(char *path, bool sizeonly);
+static int64 sendDir(char *path, int basepathlen, bool sizeonly,
+ List *tablespaces, bool sendsymlinkinfo);
static bool sendFile(char *readfilename, char *tarfilename,
struct stat * statbuf, bool missing_ok);
static void sendFileWithContent(const char *filename, const char *content);
@@ -93,15 +94,6 @@ static int64 elapsed_min_unit;
/* The last check of the transfer rate. */
static int64 throttled_last;
-typedef struct
-{
- char *oid;
- char *path;
- char *rpath; /* relative path within PGDATA, or NULL */
- int64 size;
-} tablespaceinfo;
-
-
/*
* Called when ERROR or FATAL happens in perform_base_backup() after
* we have started the backup - make sure we end it!
@@ -126,14 +118,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
XLogRecPtr endptr;
TimeLineID endtli;
char *labelfile;
+ char *symlinkfile = NULL;
int datadirpathlen;
+ List *tablespaces = NIL;
datadirpathlen = strlen(DataDir);
backup_started_in_recovery = RecoveryInProgress();
startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
- &labelfile);
+ &labelfile, tblspcdir, &tablespaces,
+ &symlinkfile, opt->progress);
/*
* Once do_pg_start_backup has been called, ensure that any failure causes
* us to abort the backup so we don't "leak" a backup counter. For this reason,
@@ -143,9 +138,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
{
- List *tablespaces = NIL;
ListCell *lc;
- struct dirent *de;
tablespaceinfo *ti;
SendXlogRecPtrResult(startptr, starttli);
@@ -162,70 +155,9 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
else
statrelpath = pgstat_stat_directory;
- /* Collect information about all tablespaces */
- while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
- {
- char fullpath[MAXPGPATH];
- char linkpath[MAXPGPATH];
- char *relpath = NULL;
- int rllen;
-
- /* Skip special stuff */
- if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
- continue;
-
- snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-#if defined(HAVE_READLINK) || defined(WIN32)
- rllen = readlink(fullpath, linkpath, sizeof(linkpath));
- if (rllen < 0)
- {
- ereport(WARNING,
- (errmsg("could not read symbolic link \"%s\": %m",
- fullpath)));
- continue;
- }
- else if (rllen >= sizeof(linkpath))
- {
- ereport(WARNING,
- (errmsg("symbolic link \"%s\" target is too long",
- fullpath)));
- continue;
- }
- linkpath[rllen] = '\0';
-
- /*
- * Relpath holds the relative path of the tablespace directory
- * when it's located within PGDATA, or NULL if it's located
- * elsewhere.
- */
- if (rllen > datadirpathlen &&
- strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
- IS_DIR_SEP(linkpath[datadirpathlen]))
- relpath = linkpath + datadirpathlen + 1;
-
- ti = palloc(sizeof(tablespaceinfo));
- ti->oid = pstrdup(de->d_name);
- ti->path = pstrdup(linkpath);
- ti->rpath = relpath ? pstrdup(relpath) : NULL;
- ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
- tablespaces = lappend(tablespaces, ti);
-#else
-
- /*
- * If the platform does not have symbolic links, it should not be
- * possible to have tablespaces - clearly somebody else created
- * them. Warn about it and ignore.
- */
- ereport(WARNING,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("tablespaces are not supported on this platform")));
-#endif
- }
-
/* Add a node for the base directory at the end */
ti = palloc0(sizeof(tablespaceinfo));
- ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
+ ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
tablespaces = lappend(tablespaces, ti);
/* Send tablespace header */
@@ -274,8 +206,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* In the main tar, include the backup_label first... */
sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
- /* ... then the bulk of the files ... */
- sendDir(".", 1, false, tablespaces);
+ /*
+ * Send symlink file if required and then the bulk of the
+ * files
+ */
+ if (symlinkfile && opt->sendsymlinkfile)
+ {
+ sendFileWithContent(SYMLINK_LABEL_FILE, symlinkfile);
+ sendDir(".", 1, false, tablespaces, false);
+ }
+ else
+ sendDir(".", 1, false, tablespaces, true);
/* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -555,6 +496,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_nowait = false;
bool o_wal = false;
bool o_maxrate = false;
+ bool o_sendsymlinkfile = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -625,6 +567,20 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->maxrate = (uint32) maxrate;
o_maxrate = true;
}
+ else if (strcmp(defel->defname, "tar") == 0)
+ {
+ /*
+ * symlinkfile is required only for tar format in windows as
+ * native windows utilites are not able create symlinks while
+ * extracting files from tar.
+ */
+ if (o_sendsymlinkfile)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ opt->sendsymlinkfile = true;
+ o_sendsymlinkfile = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -853,7 +809,7 @@ sendFileWithContent(const char *filename, const char *content)
*
* Only used to send auxiliary tablespaces, not PGDATA.
*/
-static int64
+int64
sendTablespace(char *path, bool sizeonly)
{
int64 size;
@@ -887,7 +843,7 @@ sendTablespace(char *path, bool sizeonly)
size = 512; /* Size of the header just added */
/* Send all the files in the tablespace version directory */
- size += sendDir(pathbuf, strlen(path), sizeonly, NIL);
+ size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
return size;
}
@@ -899,9 +855,14 @@ sendTablespace(char *path, bool sizeonly)
*
* Omit any directory in the tablespaces list, to avoid backing up
* tablespaces twice when they were created inside PGDATA.
+ *
+ * If sendsymlinkinfo is false, we don't need to include symlink
+ * information inside tar file as it will be sent separately in
+ * symlink_label file.
*/
static int64
-sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
+sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
+ bool sendsymlinkinfo)
{
DIR *dir;
struct dirent *de;
@@ -929,13 +890,17 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
continue;
/*
- * If there's a backup_label file, it belongs to a backup started by
- * the user with pg_start_backup(). It is *not* correct for this
- * backup, our backup_label is injected into the tar separately.
+ * If there's a backup_label or symlink file, it belongs to a backup
+ * started by the user with pg_start_backup(). It is *not* correct for
+ * this backup, our backup_label/symlink is injected into the tar
+ * separately.
*/
if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
continue;
+ if (strcmp(de->d_name, SYMLINK_LABEL_FILE) == 0)
+ continue;
+
/*
* Check if the postmaster has signaled us to exit, and abort with an
* error in that case. The error handler further up will call
@@ -1026,6 +991,7 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
/* Allow symbolic links in pg_tblspc only */
if (strcmp(path, "./pg_tblspc") == 0 &&
+ sendsymlinkinfo &&
#ifndef WIN32
S_ISLNK(statbuf.st_mode)
#else
@@ -1100,7 +1066,7 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
}
}
if (!skip_this_dir)
- size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces);
+ size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, true);
}
else if (S_ISREG(statbuf.st_mode))
{
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 154aaac..25c7311 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -71,6 +71,7 @@ Node *replication_parse_result;
%token K_NOWAIT
%token K_MAX_RATE
%token K_WAL
+%token K_TAR
%token K_TIMELINE
%token K_PHYSICAL
%token K_LOGICAL
@@ -119,7 +120,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d] [TAR]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -168,6 +169,11 @@ base_backup_opt:
$$ = makeDefElem("max_rate",
(Node *)makeInteger($2));
}
+ | K_TAR
+ {
+ $$ = makeDefElem("tar",
+ (Node *)makeInteger(TRUE));
+ }
;
create_replication_slot:
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index a257124..e373d51 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -88,6 +88,7 @@ NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
+TAR { return K_TAR; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
CREATE_REPLICATION_SLOT { return K_CREATE_REPLICATION_SLOT; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 3d26e22..6f3a024 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -1664,13 +1664,14 @@ BaseBackup(void)
maxrate_clause = psprintf("MAX_RATE %u", maxrate);
basebkp =
- psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
+ psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
includewal ? "NOWAIT" : "",
- maxrate_clause ? maxrate_clause : "");
+ maxrate_clause ? maxrate_clause : "",
+ format == 't' ? "TAR": "");
if (PQsendQuery(conn, basebkp) == 0)
{
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 7d6db49..5c70812 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -15,8 +15,10 @@
#include "access/xlogdefs.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
+#include "nodes/pg_list.h"
#include "storage/block.h"
#include "storage/buf.h"
+#include "storage/fd.h"
#include "storage/relfilenode.h"
#include "utils/pg_crc.h"
@@ -349,7 +351,8 @@ extern void SetWalWriterSleeping(bool sleeping);
* Starting/stopping a base backup
*/
extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
- TimeLineID *starttli_p, char **labelfile);
+ TimeLineID *starttli_p, char **labelfile, DIR *tblspcdir,
+ List **tablespaces, char **symlinkfile, bool infotbssize);
extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
TimeLineID *stoptli_p);
extern void do_pg_abort_backup(void);
@@ -358,4 +361,7 @@ extern void do_pg_abort_backup(void);
#define BACKUP_LABEL_FILE "backup_label"
#define BACKUP_LABEL_OLD "backup_label.old"
+#define SYMLINK_LABEL_FILE "symlink_label"
+#define SYMLINK_LABEL_OLD "symlink_label.old"
+
#endif /* XLOG_H */
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 988bce7..3540602 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -21,6 +21,16 @@
#define MAX_RATE_UPPER 1048576
+typedef struct
+{
+ char *oid;
+ char *path;
+ char *rpath; /* relative path within PGDATA, or NULL */
+ int64 size;
+} tablespaceinfo;
+
extern void SendBaseBackup(BaseBackupCmd *cmd);
+extern int64 sendTablespace(char *path, bool sizeonly);
+
#endif /* _BASEBACKUP_H */
On 20 August 2014 19:49, Amit Kapila Wrote
There are some comments I would like to share with you
1. Rebase the patch to current GIT head.
Done.
+ initStringInfo(&symlinkfbuf);
I think declaration and initialization of symlinkfbuf string can be moved under #ifdef WIN32 compile time macro,
for other platform it’s simply allocated and freed which can be avoided.
Agreed, I have changed the patch as per your suggestion.
I have done the testing and behavior is as per expectation,
Do we need to do some document change? I mean is this limitation on windows is mentioned anywhere ?
If no change then i will move the patch to “Ready For Committer”.
Thanks & Regards,
Dilip
On Thu, Sep 11, 2014 at 9:10 AM, Dilip kumar <dilip.kumar@huawei.com> wrote:
I have done the testing and behavior is as per expectation,
Do we need to do some document change? I mean is this limitation on
windows is mentioned anywhere ?
I don't think currently such a limitation is mentioned in docs,
however I think we can update the docs at below locations:
1. In description of pg_start_backup in below page:
http://www.postgresql.org/docs/devel/static/functions-admin.html#FUNCTIONS-ADMIN-BACKUP
2. In Explanation of Base Backup, basically under heading
Making a Base Backup Using the Low Level API at below
page:
http://www.postgresql.org/docs/devel/static/continuous-archiving.html#BACKUP-BASE-BACKUP
In general, we can explain about symlink_label file where ever
we are explaining about backup_label file.
If you think it is sufficient to explain about symlink_label in
above places, then I can update the patch.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
On 11 September 2014 10:21, Amit kapila Wrote,
I don't think currently such a limitation is mentioned in docs,
however I think we can update the docs at below locations:
1. In description of pg_start_backup in below page:
http://www.postgresql.org/docs/devel/static/functions-admin.html#FUNCTIONS-ADMIN-BACKUP
2. In Explanation of Base Backup, basically under heading
Making a Base Backup Using the Low Level API at below
page:
http://www.postgresql.org/docs/devel/static/continuous-archiving.html#BACKUP-BASE-BACKUP
In general, we can explain about symlink_label file where ever
we are explaining about backup_label file.
If you think it is sufficient to explain about symlink_label in
above places, then I can update the patch.
I think this will be sufficient….
Regards,
Dilip
On Fri, Sep 12, 2014 at 1:50 PM, Dilip kumar <dilip.kumar@huawei.com> wrote:
On 11 September 2014 10:21, Amit kapila Wrote,
I don't think currently such a limitation is mentioned in docs,
however I think we can update the docs at below locations:
1. In description of pg_start_backup in below page:
http://www.postgresql.org/docs/devel/static/functions-admin.html#FUNCTIONS-ADMIN-BACKUP
2. In Explanation of Base Backup, basically under heading
Making a Base Backup Using the Low Level API at below
page:
http://www.postgresql.org/docs/devel/static/continuous-archiving.html#BACKUP-BASE-BACKUP
In general, we can explain about symlink_label file where ever
we are explaining about backup_label file.
If you think it is sufficient to explain about symlink_label if
above places, then I can update the patch.I think this will be sufficient….
Please find updated patch to include those documentation changes.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
Attachments:
extend_basebackup_to_include_symlink_v3.patchapplication/octet-stream; name=extend_basebackup_to_include_symlink_v3.patchDownload
diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml
index 07ca0dc..8c15d62 100644
--- a/doc/src/sgml/backup.sgml
+++ b/doc/src/sgml/backup.sgml
@@ -834,8 +834,11 @@ SELECT pg_start_backup('label');
<function>pg_start_backup</> creates a <firstterm>backup label</> file,
called <filename>backup_label</>, in the cluster directory with
information about your backup, including the start time and label
- string. The file is critical to the integrity of the backup, should
- you need to restore from it.
+ string. On Windows, this function also creates a
+ <firstterm>symlink label</> file, called <filename>symlink_label</>,
+ in the cluster directory with information about symbolic links in
+ <filename>pg_tblspc/</>. Both the files are critical to the integrity of
+ the backup, should you need to restore from it.
</para>
<para>
@@ -963,17 +966,19 @@ SELECT pg_stop_backup();
<para>
It's also worth noting that the <function>pg_start_backup</> function
- makes a file named <filename>backup_label</> in the database cluster
- directory, which is removed by <function>pg_stop_backup</>.
- This file will of course be archived as a part of your backup dump file.
- The backup label file includes the label string you gave to
- <function>pg_start_backup</>, as well as the time at which
- <function>pg_start_backup</> was run, and the name of the starting WAL
- file. In case of confusion it is therefore possible to look inside a
- backup dump file and determine exactly which backup session the dump file
- came from. However, this file is not merely for your information; its
- presence and contents are critical to the proper operation of the system's
- recovery process.
+ makes a file named <filename>backup_label</> and on windows another
+ file named<filename>symlink_label</> in the database cluster directory,
+ which are removed by <function>pg_stop_backup</>. These files will of
+ course be archived as a part of your backup dump file. The backup label
+ file includes the label string you gave to <function>pg_start_backup</>,
+ as well as the time at which <function>pg_start_backup</> was run, and
+ the name of the starting WAL file. In case of confusion it is therefore
+ possible to look inside a backup dump file and determine exactly which
+ backup session the dump file came from. The symlink label file includes
+ the symbolic link name as it exists in the directory
+ <filename>pg_tblspc/</> and the full path of symolic link. These files
+ are not merely for your information; their presence and contents are critical
+ to the proper operation of the system's recovery process.
</para>
<para>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 7195df8..b1ecce6 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -16265,7 +16265,8 @@ SELECT set_config('log_statement_stats', 'off', false);
<function>pg_start_backup</> accepts an
arbitrary user-defined label for the backup. (Typically this would be
the name under which the backup dump file will be stored.) The function
- writes a backup label file (<filename>backup_label</>) into the
+ writes a backup label file (<filename>backup_label</>) and in case of
+ windows symlink label file (<filename>symlink_label</>) also into the
database cluster's data directory, performs a checkpoint,
and then returns the backup's starting transaction log location as text.
The user can ignore this result value, but it is
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 34f2fc0..90f17e9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -40,6 +40,7 @@
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
+#include "replication/basebackup.h"
#include "replication/logical.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
@@ -802,6 +803,9 @@ static void xlog_outrec(StringInfo buf, XLogRecord *record);
static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired, bool *backupFromStandby);
+#ifdef WIN32
+static bool read_symlink_label(List **tablespaces);
+#endif
static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method);
@@ -6004,6 +6008,9 @@ StartupXLOG(void)
bool wasShutdown;
bool reachedStopPoint = false;
bool haveBackupLabel = false;
+#ifdef WIN32
+ bool haveSymlinkLabel = false;
+#endif
XLogRecPtr RecPtr,
checkPointLoc,
EndOfLog;
@@ -6078,16 +6085,6 @@ StartupXLOG(void)
ValidateXLOGDirectoryStructure();
/*
- * Clear out any old relcache cache files. This is *necessary* if we do
- * any WAL replay, since that would probably result in the cache files
- * being out of sync with database reality. In theory we could leave them
- * in place if the database had been cleanly shut down, but it seems
- * safest to just remove them always and let them be rebuilt during the
- * first backend startup.
- */
- RelationCacheInitFileRemove();
-
- /*
* Initialize on the assumption we want to recover to the latest timeline
* that's active according to pg_control.
*/
@@ -6156,6 +6153,9 @@ StartupXLOG(void)
if (read_backup_label(&checkPointLoc, &backupEndRequired,
&backupFromStandby))
{
+#ifdef WIN32
+ List *tablespaces = NIL;
+#endif
/*
* Archive recovery was requested, and thanks to the backup label
* file, we know how far we need to replay to reach consistency. Enter
@@ -6200,6 +6200,61 @@ StartupXLOG(void)
errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
wasShutdown = false; /* keep compiler quiet */
}
+
+#ifdef WIN32
+ /* read the symlink file if present and create symlinks */
+ if (read_symlink_label(&tablespaces))
+ {
+ ListCell *lc;
+ struct stat st;
+
+ foreach(lc, tablespaces)
+ {
+ tablespaceinfo *ti = lfirst(lc);
+ char *linkloc;
+
+ linkloc = psprintf("pg_tblspc/%s", ti->oid);
+
+ /*
+ * Remove the existing symlink if any and Create the symlink
+ * under PGDATA. We need to use rmtree instead of rmdir as
+ * the link location might contain directories/files corresponding
+ * to actual path, some tar utilities does that way while extracting
+ * symlinks.
+ */
+ if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode))
+ {
+ if (!rmtree(linkloc,true))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove directory \"%s\": %m",
+ linkloc)));
+ }
+ else
+ {
+ if (unlink(linkloc) < 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove symbolic link \"%s\": %m",
+ linkloc)));
+ }
+
+ if (symlink(ti->path, linkloc) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create symbolic link \"%s\": %m",
+ linkloc)));
+
+ pfree(ti->oid);
+ pfree(ti->path);
+ pfree(ti);
+ }
+
+ /* set flag to delete it later */
+ haveSymlinkLabel = true;
+ }
+#endif
+
/* set flag to delete it later */
haveBackupLabel = true;
}
@@ -6274,6 +6329,20 @@ StartupXLOG(void)
}
/*
+ * Clear out any old relcache cache files. This is *necessary* if we do
+ * any WAL replay, since that would probably result in the cache files
+ * being out of sync with database reality. In theory we could leave them
+ * in place if the database had been cleanly shut down, but it seems
+ * safest to just remove them always and let them be rebuilt during the
+ * first backend startup. These files needs to be removed from all
+ * directories including pg_tblspc, however for windows the symlinks are
+ * created only after reading symlink file in case of archive recovery
+ * from backup, so needs to clear old relcache files here after creating
+ * symlinks.
+ */
+ RelationCacheInitFileRemove();
+
+ /*
* If the location of the checkpoint record is not on the expected
* timeline in the history of the requested timeline, we cannot proceed:
* the backup is not part of the history of the requested timeline.
@@ -6526,6 +6595,25 @@ StartupXLOG(void)
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+#ifdef WIN32
+ /*
+ * If there was a symlink label file, it's done its job and the
+ * symlinks have been created. We must get rid of the label file
+ * so that if we crash during recovery, we don't create symlinks
+ * again. It seems prudent though to just rename the file out of
+ * the way rather than delete it completely.
+ */
+ if (haveSymlinkLabel)
+ {
+ unlink(SYMLINK_LABEL_OLD);
+ if (rename(SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not rename file \"%s\" to \"%s\": %m",
+ SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD)));
+ }
+#endif
+
/* Check that the GUCs used to generate the WAL allow recovery */
CheckRequiredParameterValues();
@@ -9780,16 +9868,21 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*
* There are two kind of backups: exclusive and non-exclusive. An exclusive
* backup is started with pg_start_backup(), and there can be only one active
- * at a time. The backup label file of an exclusive backup is written to
- * $PGDATA/backup_label, and it is removed by pg_stop_backup().
+ * at a time. The backup and symlink label files of an exclusive backup are
+ * written to $PGDATA/backup_label and $PGDATA/symlink_label, and they are
+ * removed by pg_stop_backup().
*
* A non-exclusive backup is used for the streaming base backups (see
* src/backend/replication/basebackup.c). The difference to exclusive backups
- * is that the backup label file is not written to disk. Instead, its would-be
- * contents are returned in *labelfile, and the caller is responsible for
- * including it in the backup archive as 'backup_label'. There can be many
- * non-exclusive backups active at the same time, and they don't conflict
- * with an exclusive backup either.
+ * is that the backup, symlink label files are not written to disk. Instead,
+ * there would-be contents are returned in *labelfile and *symlinkfile, and
+ * the caller is responsible for including them in the backup archive as
+ * 'backup_label' and 'symlink_label'. There can be many non-exclusive backups
+ * active at the same time, and they don't conflict with an exclusive backup
+ * either.
+ *
+ * symlinkfile is required only for tar format in windows as native windows
+ * utilities are not able to create symlinks while extracting files from tar.
*
* Returns the minimum WAL position that must be present to restore from this
* backup, and the corresponding timeline ID in *starttli_p.
@@ -9802,7 +9895,8 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*/
XLogRecPtr
do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
- char **labelfile)
+ char **labelfile, DIR *tblspcdir, List **tablespaces,
+ char **symlinkfile, bool infotbssize)
{
bool exclusive = (labelfile == NULL);
bool backup_started_in_recovery = false;
@@ -9816,6 +9910,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
struct stat stat_buf;
FILE *fp;
StringInfoData labelfbuf;
+#ifdef WIN32
+ StringInfoData symlinkfbuf;
+#endif
backup_started_in_recovery = RecoveryInProgress();
@@ -9886,6 +9983,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
{
bool gotUniqueStartpoint = false;
+ struct dirent *de;
+ tablespaceinfo *ti;
+ int datadirpathlen;
/*
* Force an XLOG file switch before the checkpoint, to ensure that the
@@ -10007,6 +10107,82 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
XLByteToSeg(startpoint, _logSegNo);
XLogFileName(xlogfilename, ThisTimeLineID, _logSegNo);
+#ifdef WIN32
+ /*
+ * Construct symlink file
+ */
+ initStringInfo(&symlinkfbuf);
+#endif
+
+ datadirpathlen = strlen(DataDir);
+
+ /* Collect information about all tablespaces */
+ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+ {
+ char fullpath[MAXPGPATH];
+ char linkpath[MAXPGPATH];
+ char *relpath = NULL;
+ int rllen;
+
+ /* Skip special stuff */
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+#if defined(HAVE_READLINK) || defined(WIN32)
+ rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+ if (rllen < 0)
+ {
+ ereport(WARNING,
+ (errmsg("could not read symbolic link \"%s\": %m",
+ fullpath)));
+ continue;
+ }
+ else if (rllen >= sizeof(linkpath))
+ {
+ ereport(WARNING,
+ (errmsg("symbolic link \"%s\" target is too long",
+ fullpath)));
+ continue;
+ }
+ linkpath[rllen] = '\0';
+
+ /*
+ * Relpath holds the relative path of the tablespace directory
+ * when it's located within PGDATA, or NULL if it's located
+ * elsewhere.
+ */
+ if (rllen > datadirpathlen &&
+ strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+ IS_DIR_SEP(linkpath[datadirpathlen]))
+ relpath = linkpath + datadirpathlen + 1;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(de->d_name);
+ ti->path = pstrdup(linkpath);
+ ti->rpath = relpath ? pstrdup(relpath) : NULL;
+ ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+
+ if(tablespaces)
+ *tablespaces = lappend(*tablespaces, ti);
+
+#ifdef WIN32
+ appendStringInfo(&symlinkfbuf, "%s %s\n", ti->oid, ti->path);
+#endif
+#else
+
+ /*
+ * If the platform does not have symbolic links, it should not be
+ * possible to have tablespaces - clearly somebody else created
+ * them. Warn about it and ignore.
+ */
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+#endif
+ }
+
/*
* Construct backup label file
*/
@@ -10070,9 +10246,55 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
errmsg("could not write file \"%s\": %m",
BACKUP_LABEL_FILE)));
pfree(labelfbuf.data);
+
+#ifdef WIN32
+ /* Write backup symlink file. */
+ if (symlinkfbuf.len > 0)
+ {
+ if (stat(SYMLINK_LABEL_FILE, &stat_buf) != 0)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("a backup is already in progress"),
+ errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
+ SYMLINK_LABEL_FILE)));
+
+ fp = AllocateFile(SYMLINK_LABEL_FILE, "w");
+
+ if (!fp)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ if (fwrite(symlinkfbuf.data, symlinkfbuf.len, 1, fp) != 1 ||
+ fflush(fp) != 0 ||
+ pg_fsync(fileno(fp)) != 0 ||
+ ferror(fp) ||
+ FreeFile(fp))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ }
+
+ pfree(symlinkfbuf.data);
+#endif
}
else
+ {
*labelfile = labelfbuf.data;
+#ifdef WIN32
+ if (symlinkfbuf.len > 0)
+ *symlinkfile = symlinkfbuf.data;
+#endif
+ }
}
PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
@@ -10244,6 +10466,14 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
BACKUP_LABEL_FILE)));
+
+#ifdef WIN32
+ /*
+ * Remove symlink file if present, symlink file is created
+ * only if there are tablespaces.
+ */
+ unlink(SYMLINK_LABEL_FILE);
+#endif
}
/*
@@ -10654,6 +10884,74 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
return true;
}
+#ifdef WIN32
+/*
+ * read_symlink_label: check to see if a symlink_label file is present
+ *
+ * If we see a symlink_label during recovery, we assume that we are recovering
+ * from a backup dump file, and we therefore need to create symlinks as per
+ * the information present in symlink file.
+ *
+ * Returns TRUE if a symlink_label was found (and fills the link information
+ * for all the tablespace links present in file); returns FALSE if not.
+ */
+static bool
+read_symlink_label(List **tablespaces)
+{
+ tablespaceinfo *ti;
+ FILE *lfp;
+ char ch;
+ char tbsoid[MAXPGPATH];
+ char tbslinkpath[MAXPGPATH];
+
+ /*
+ * See if symlink file is present
+ */
+ lfp = AllocateFile(SYMLINK_LABEL_FILE, "r");
+ if (!lfp)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ return false; /* it's not there, all is fine */
+ }
+
+ /*
+ * Read and parse the link name and path lines from symlink file (this code
+ * is pretty crude, but we are not expecting any variability in the file
+ * format).
+ */
+ while (!feof(lfp))
+ {
+ ti = palloc(sizeof(tablespaceinfo));
+ if (fscanf(lfp, "%s %s%c", tbsoid, tbslinkpath, &ch) != 3 || ch != '\n')
+ {
+ if (feof(lfp))
+ break;
+ else
+ ereport(FATAL,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("invalid data in file \"%s\"", SYMLINK_LABEL_FILE)));
+ }
+
+ ti->oid = pstrdup(tbsoid);
+ ti->path = pstrdup(tbslinkpath);
+
+ *tablespaces = lappend(*tablespaces, ti);
+ }
+
+ if (ferror(lfp) || FreeFile(lfp))
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+
+ return true;
+}
+#endif
+
/*
* Error context callback for errors occurring during rm_redo().
*/
@@ -10687,11 +10985,14 @@ BackupInProgress(void)
}
/*
- * CancelBackup: rename the "backup_label" file to cancel backup mode
+ * CancelBackup: rename the "backup_label"/"symlink_label" file to cancel
+ * backup mode
*
* If the "backup_label" file exists, it will be renamed to "backup_label.old".
- * Note that this will render an online backup in progress useless.
- * To correctly finish an online backup, pg_stop_backup must be called.
+ * Similarly, if the "symlink_label" file exists, it will be renamed to
+ * "symlink_label.old". Note that this will render an online backup in progress
+ * useless. To correctly finish an online backup, pg_stop_backup must be
+ * called.
*/
void
CancelBackup(void)
@@ -10720,6 +11021,31 @@ CancelBackup(void)
errdetail("Could not rename \"%s\" to \"%s\": %m.",
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+
+#ifdef WIN32
+ /* if the file is not there, return */
+ if (stat(SYMLINK_LABEL_FILE, &stat_buf) < 0)
+ return;
+
+ /* remove leftover file from previously canceled backup if it exists */
+ unlink(SYMLINK_LABEL_OLD);
+
+ if (rename(SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD) == 0)
+ {
+ ereport(LOG,
+ (errmsg("online backup mode canceled"),
+ errdetail("\"%s\" was renamed to \"%s\".",
+ SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD)));
+ }
+ else
+ {
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("online backup mode was not canceled"),
+ errdetail("Could not rename \"%s\" to \"%s\": %m.",
+ SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD)));
+ }
+#endif
}
/*
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 133143d..d8864d9 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -51,6 +51,7 @@ pg_start_backup(PG_FUNCTION_ARGS)
bool fast = PG_GETARG_BOOL(1);
char *backupidstr;
XLogRecPtr startpoint;
+ DIR *dir;
backupidstr = text_to_cstring(backupid);
@@ -59,7 +60,16 @@ pg_start_backup(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser or replication role to run a backup")));
- startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL);
+ /* Make sure we can open the directory with tablespaces in it */
+ dir = AllocateDir("pg_tblspc");
+ if (!dir)
+ ereport(ERROR,
+ (errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
+
+ startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL,
+ dir, NULL, NULL, false);
+
+ FreeDir(dir);
PG_RETURN_LSN(startpoint);
}
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index fbcecbb..8925148 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -46,11 +46,12 @@ typedef struct
bool nowait;
bool includewal;
uint32 maxrate;
+ bool sendsymlinkfile;
} basebackup_options;
-static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces);
-static int64 sendTablespace(char *path, bool sizeonly);
+static int64 sendDir(char *path, int basepathlen, bool sizeonly,
+ List *tablespaces, bool sendsymlinkinfo);
static bool sendFile(char *readfilename, char *tarfilename,
struct stat * statbuf, bool missing_ok);
static void sendFileWithContent(const char *filename, const char *content);
@@ -93,15 +94,6 @@ static int64 elapsed_min_unit;
/* The last check of the transfer rate. */
static int64 throttled_last;
-typedef struct
-{
- char *oid;
- char *path;
- char *rpath; /* relative path within PGDATA, or NULL */
- int64 size;
-} tablespaceinfo;
-
-
/*
* Called when ERROR or FATAL happens in perform_base_backup() after
* we have started the backup - make sure we end it!
@@ -126,14 +118,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
XLogRecPtr endptr;
TimeLineID endtli;
char *labelfile;
+ char *symlinkfile = NULL;
int datadirpathlen;
+ List *tablespaces = NIL;
datadirpathlen = strlen(DataDir);
backup_started_in_recovery = RecoveryInProgress();
startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
- &labelfile);
+ &labelfile, tblspcdir, &tablespaces,
+ &symlinkfile, opt->progress);
/*
* Once do_pg_start_backup has been called, ensure that any failure causes
* us to abort the backup so we don't "leak" a backup counter. For this reason,
@@ -143,9 +138,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
{
- List *tablespaces = NIL;
ListCell *lc;
- struct dirent *de;
tablespaceinfo *ti;
SendXlogRecPtrResult(startptr, starttli);
@@ -162,70 +155,9 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
else
statrelpath = pgstat_stat_directory;
- /* Collect information about all tablespaces */
- while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
- {
- char fullpath[MAXPGPATH];
- char linkpath[MAXPGPATH];
- char *relpath = NULL;
- int rllen;
-
- /* Skip special stuff */
- if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
- continue;
-
- snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-#if defined(HAVE_READLINK) || defined(WIN32)
- rllen = readlink(fullpath, linkpath, sizeof(linkpath));
- if (rllen < 0)
- {
- ereport(WARNING,
- (errmsg("could not read symbolic link \"%s\": %m",
- fullpath)));
- continue;
- }
- else if (rllen >= sizeof(linkpath))
- {
- ereport(WARNING,
- (errmsg("symbolic link \"%s\" target is too long",
- fullpath)));
- continue;
- }
- linkpath[rllen] = '\0';
-
- /*
- * Relpath holds the relative path of the tablespace directory
- * when it's located within PGDATA, or NULL if it's located
- * elsewhere.
- */
- if (rllen > datadirpathlen &&
- strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
- IS_DIR_SEP(linkpath[datadirpathlen]))
- relpath = linkpath + datadirpathlen + 1;
-
- ti = palloc(sizeof(tablespaceinfo));
- ti->oid = pstrdup(de->d_name);
- ti->path = pstrdup(linkpath);
- ti->rpath = relpath ? pstrdup(relpath) : NULL;
- ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
- tablespaces = lappend(tablespaces, ti);
-#else
-
- /*
- * If the platform does not have symbolic links, it should not be
- * possible to have tablespaces - clearly somebody else created
- * them. Warn about it and ignore.
- */
- ereport(WARNING,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("tablespaces are not supported on this platform")));
-#endif
- }
-
/* Add a node for the base directory at the end */
ti = palloc0(sizeof(tablespaceinfo));
- ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
+ ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
tablespaces = lappend(tablespaces, ti);
/* Send tablespace header */
@@ -274,8 +206,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* In the main tar, include the backup_label first... */
sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
- /* ... then the bulk of the files ... */
- sendDir(".", 1, false, tablespaces);
+ /*
+ * Send symlink file if required and then the bulk of the
+ * files
+ */
+ if (symlinkfile && opt->sendsymlinkfile)
+ {
+ sendFileWithContent(SYMLINK_LABEL_FILE, symlinkfile);
+ sendDir(".", 1, false, tablespaces, false);
+ }
+ else
+ sendDir(".", 1, false, tablespaces, true);
/* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -555,6 +496,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_nowait = false;
bool o_wal = false;
bool o_maxrate = false;
+ bool o_sendsymlinkfile = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -625,6 +567,20 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->maxrate = (uint32) maxrate;
o_maxrate = true;
}
+ else if (strcmp(defel->defname, "tar") == 0)
+ {
+ /*
+ * symlinkfile is required only for tar format in windows as
+ * native windows utilites are not able create symlinks while
+ * extracting files from tar.
+ */
+ if (o_sendsymlinkfile)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ opt->sendsymlinkfile = true;
+ o_sendsymlinkfile = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -853,7 +809,7 @@ sendFileWithContent(const char *filename, const char *content)
*
* Only used to send auxiliary tablespaces, not PGDATA.
*/
-static int64
+int64
sendTablespace(char *path, bool sizeonly)
{
int64 size;
@@ -887,7 +843,7 @@ sendTablespace(char *path, bool sizeonly)
size = 512; /* Size of the header just added */
/* Send all the files in the tablespace version directory */
- size += sendDir(pathbuf, strlen(path), sizeonly, NIL);
+ size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
return size;
}
@@ -899,9 +855,14 @@ sendTablespace(char *path, bool sizeonly)
*
* Omit any directory in the tablespaces list, to avoid backing up
* tablespaces twice when they were created inside PGDATA.
+ *
+ * If sendsymlinkinfo is false, we don't need to include symlink
+ * information inside tar file as it will be sent separately in
+ * symlink_label file.
*/
static int64
-sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
+sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
+ bool sendsymlinkinfo)
{
DIR *dir;
struct dirent *de;
@@ -929,13 +890,17 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
continue;
/*
- * If there's a backup_label file, it belongs to a backup started by
- * the user with pg_start_backup(). It is *not* correct for this
- * backup, our backup_label is injected into the tar separately.
+ * If there's a backup_label or symlink file, it belongs to a backup
+ * started by the user with pg_start_backup(). It is *not* correct for
+ * this backup, our backup_label/symlink is injected into the tar
+ * separately.
*/
if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
continue;
+ if (strcmp(de->d_name, SYMLINK_LABEL_FILE) == 0)
+ continue;
+
/*
* Check if the postmaster has signaled us to exit, and abort with an
* error in that case. The error handler further up will call
@@ -1026,6 +991,7 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
/* Allow symbolic links in pg_tblspc only */
if (strcmp(path, "./pg_tblspc") == 0 &&
+ sendsymlinkinfo &&
#ifndef WIN32
S_ISLNK(statbuf.st_mode)
#else
@@ -1100,7 +1066,7 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
}
}
if (!skip_this_dir)
- size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces);
+ size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, true);
}
else if (S_ISREG(statbuf.st_mode))
{
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 154aaac..25c7311 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -71,6 +71,7 @@ Node *replication_parse_result;
%token K_NOWAIT
%token K_MAX_RATE
%token K_WAL
+%token K_TAR
%token K_TIMELINE
%token K_PHYSICAL
%token K_LOGICAL
@@ -119,7 +120,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d] [TAR]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -168,6 +169,11 @@ base_backup_opt:
$$ = makeDefElem("max_rate",
(Node *)makeInteger($2));
}
+ | K_TAR
+ {
+ $$ = makeDefElem("tar",
+ (Node *)makeInteger(TRUE));
+ }
;
create_replication_slot:
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index a257124..e373d51 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -88,6 +88,7 @@ NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
+TAR { return K_TAR; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
CREATE_REPLICATION_SLOT { return K_CREATE_REPLICATION_SLOT; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 8b9acea..25d7491 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -1651,13 +1651,14 @@ BaseBackup(void)
maxrate_clause = psprintf("MAX_RATE %u", maxrate);
basebkp =
- psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
+ psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
includewal ? "NOWAIT" : "",
- maxrate_clause ? maxrate_clause : "");
+ maxrate_clause ? maxrate_clause : "",
+ format == 't' ? "TAR": "");
if (PQsendQuery(conn, basebkp) == 0)
{
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 7d6db49..5c70812 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -15,8 +15,10 @@
#include "access/xlogdefs.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
+#include "nodes/pg_list.h"
#include "storage/block.h"
#include "storage/buf.h"
+#include "storage/fd.h"
#include "storage/relfilenode.h"
#include "utils/pg_crc.h"
@@ -349,7 +351,8 @@ extern void SetWalWriterSleeping(bool sleeping);
* Starting/stopping a base backup
*/
extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
- TimeLineID *starttli_p, char **labelfile);
+ TimeLineID *starttli_p, char **labelfile, DIR *tblspcdir,
+ List **tablespaces, char **symlinkfile, bool infotbssize);
extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
TimeLineID *stoptli_p);
extern void do_pg_abort_backup(void);
@@ -358,4 +361,7 @@ extern void do_pg_abort_backup(void);
#define BACKUP_LABEL_FILE "backup_label"
#define BACKUP_LABEL_OLD "backup_label.old"
+#define SYMLINK_LABEL_FILE "symlink_label"
+#define SYMLINK_LABEL_OLD "symlink_label.old"
+
#endif /* XLOG_H */
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 988bce7..3540602 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -21,6 +21,16 @@
#define MAX_RATE_UPPER 1048576
+typedef struct
+{
+ char *oid;
+ char *path;
+ char *rpath; /* relative path within PGDATA, or NULL */
+ int64 size;
+} tablespaceinfo;
+
extern void SendBaseBackup(BaseBackupCmd *cmd);
+extern int64 sendTablespace(char *path, bool sizeonly);
+
#endif /* _BASEBACKUP_H */
On 12 September 2014 14:34, Amit Kapila Wrote
Please find updated patch to include those documentation changes.
Looks fine, Moved to Ready for committer.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com<http://www.enterprisedb.com/>
On Fri, Sep 12, 2014 at 5:07 PM, Dilip kumar <dilip.kumar@huawei.com> wrote:
On 12 September 2014 14:34, Amit Kapila Wrote
Please find updated patch to include those documentation changes.
Looks fine, Moved to Ready for committer.
Thanks a lot for the review.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
On Fri, Sep 12, 2014 at 6:12 PM, Amit Kapila <amit.kapila16@gmail.com>
wrote:
On Fri, Sep 12, 2014 at 5:07 PM, Dilip kumar <dilip.kumar@huawei.com>
wrote:
On 12 September 2014 14:34, Amit Kapila Wrote
Please find updated patch to include those documentation changes.
Looks fine, Moved to Ready for committer.
Thanks a lot for the review.
This patch is in "Ready for committer" stage for more than 1.5 months.
I believe this is an important functionality such that without this tar
format of pg_basebackup is not usable on Windows. I feel this
will add a value to pg_basebackup utility and moreover the need
and design has been agreed upon the list before development.
Can any Committer please have a look at this patch?
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
Amit Kapila wrote:
This patch is in "Ready for committer" stage for more than 1.5 months.
I believe this is an important functionality such that without this tar
format of pg_basebackup is not usable on Windows. I feel this
will add a value to pg_basebackup utility and moreover the need
and design has been agreed upon the list before development.Can any Committer please have a look at this patch?
Is this still relevant after this commit?
commit fb05f3ce83d225dd0f39f8860ce04082753e9e98
Author: Peter Eisentraut <peter_e@gmx.net>
Date: Sat Feb 22 13:38:06 2014 -0500
pg_basebackup: Add support for relocating tablespaces
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 11/13/14 11:52 AM, Alvaro Herrera wrote:
Amit Kapila wrote:
This patch is in "Ready for committer" stage for more than 1.5 months.
I believe this is an important functionality such that without this tar
format of pg_basebackup is not usable on Windows. I feel this
will add a value to pg_basebackup utility and moreover the need
and design has been agreed upon the list before development.Can any Committer please have a look at this patch?
Is this still relevant after this commit?
commit fb05f3ce83d225dd0f39f8860ce04082753e9e98
Author: Peter Eisentraut <peter_e@gmx.net>
Date: Sat Feb 22 13:38:06 2014 -0500pg_basebackup: Add support for relocating tablespaces
I believe so.
The commit only applies to "plain" output. Amit's complaint is that tar
utilities on Windows don't unpack symlinks, so the "tar" format isn't
useful on Windows when tablespaces are used. So he wants the recovery
mechanism to restore the symlinks.
I'm not fully on board with that premise. (Get a better tar tool.
Submit a patch.)
But this also ties in with the recent discovery that the tar format
cannot handle symlinks longer than 99 bytes. So this patch could also
fix that problem by putting the untruncated name of the symlink in the
WAL data.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Thu, Nov 13, 2014 at 4:33 PM, Peter Eisentraut <peter_e@gmx.net> wrote:
I'm not fully on board with that premise. (Get a better tar tool.
Submit a patch.)
Noah was unable to find one that works:
/messages/by-id/20130801161519.GA334956@tornado.leadboat.com
If most tar tools worked, and there was one that didn't, I think
that'd be a reasonable argument. But telling people to get a better
tool when they'd have to write it first seems rather unfriendly.
But this also ties in with the recent discovery that the tar format
cannot handle symlinks longer than 99 bytes. So this patch could also
fix that problem by putting the untruncated name of the symlink in the
WAL data.
Yeah, seems like a chance to kill two birds with one stone.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Fri, Nov 14, 2014 at 3:03 AM, Peter Eisentraut <peter_e@gmx.net> wrote:
On 11/13/14 11:52 AM, Alvaro Herrera wrote:
Amit Kapila wrote:
This patch is in "Ready for committer" stage for more than 1.5 months.
I believe this is an important functionality such that without this tar
format of pg_basebackup is not usable on Windows. I feel this
will add a value to pg_basebackup utility and moreover the need
and design has been agreed upon the list before development.Can any Committer please have a look at this patch?
Is this still relevant after this commit?
commit fb05f3ce83d225dd0f39f8860ce04082753e9e98
Author: Peter Eisentraut <peter_e@gmx.net>
Date: Sat Feb 22 13:38:06 2014 -0500pg_basebackup: Add support for relocating tablespaces
I believe so.
The commit only applies to "plain" output. Amit's complaint is that tar
utilities on Windows don't unpack symlinks, so the "tar" format isn't
useful on Windows when tablespaces are used. So he wants the recovery
mechanism to restore the symlinks.I'm not fully on board with that premise. (Get a better tar tool.
Submit a patch.)
For native Windows environment, I have checked all the tools I could find
(Winrar, tar, 7-zip, etc...) and none of them is working and even checked
a lot on google to try to find some workaround for this, but it seems there
is no way to reliably handle this issue. Refer link :
http://sourceforge.net/p/mingw/bugs/2002/
Then I started discussion in tar community to see if they can suggest
some way, but there also I could not find a reliable solution except that
it might work in some cases if cygwin is installed. You can refer below
thread:
https://lists.gnu.org/archive/html/bug-tar/2014-07/msg00007.html
After spending good amount of time for finding a workaround or alternative,
only I decided that it is important to write this patch to make tar format
for pg_basebackup usable for Windows users.
But this also ties in with the recent discovery that the tar format
cannot handle symlinks longer than 99 bytes. So this patch could also
fix that problem by putting the untruncated name of the symlink in the
WAL data.
I have mentioned that this can be usable for Linux users as well on that
thread, however I think we might want to provide it with an option for
linux users. In general, I think it is good to have this patch for Windows
users and later if we find that Linux users can also get the benefit with
this functionality, we can expose the same with an additional option.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
On Thu, Nov 13, 2014 at 10:37 PM, Amit Kapila <amit.kapila16@gmail.com> wrote:
On Fri, Nov 14, 2014 at 3:03 AM, Peter Eisentraut <peter_e@gmx.net> wrote:
On 11/13/14 11:52 AM, Alvaro Herrera wrote:
Amit Kapila wrote:
This patch is in "Ready for committer" stage for more than 1.5 months.
I believe this is an important functionality such that without this tar
format of pg_basebackup is not usable on Windows. I feel this
will add a value to pg_basebackup utility and moreover the need
and design has been agreed upon the list before development.Can any Committer please have a look at this patch?
Is this still relevant after this commit?
commit fb05f3ce83d225dd0f39f8860ce04082753e9e98
Author: Peter Eisentraut <peter_e@gmx.net>
Date: Sat Feb 22 13:38:06 2014 -0500pg_basebackup: Add support for relocating tablespaces
I believe so.
The commit only applies to "plain" output. Amit's complaint is that tar
utilities on Windows don't unpack symlinks, so the "tar" format isn't
useful on Windows when tablespaces are used. So he wants the recovery
mechanism to restore the symlinks.I'm not fully on board with that premise. (Get a better tar tool.
Submit a patch.)For native Windows environment, I have checked all the tools I could find
(Winrar, tar, 7-zip, etc...) and none of them is working and even checked
a lot on google to try to find some workaround for this, but it seems there
is no way to reliably handle this issue. Refer link :
http://sourceforge.net/p/mingw/bugs/2002/Then I started discussion in tar community to see if they can suggest
some way, but there also I could not find a reliable solution except that
it might work in some cases if cygwin is installed. You can refer below
thread:
https://lists.gnu.org/archive/html/bug-tar/2014-07/msg00007.htmlAfter spending good amount of time for finding a workaround or alternative,
only I decided that it is important to write this patch to make tar format
for pg_basebackup usable for Windows users.But this also ties in with the recent discovery that the tar format
cannot handle symlinks longer than 99 bytes. So this patch could also
fix that problem by putting the untruncated name of the symlink in the
WAL data.I have mentioned that this can be usable for Linux users as well on that
thread, however I think we might want to provide it with an option for
linux users. In general, I think it is good to have this patch for Windows
users and later if we find that Linux users can also get the benefit with
this functionality, we can expose the same with an additional option.
Why make it an option instead of just always doing it this way?
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Fri, Nov 14, 2014 at 9:11 AM, Robert Haas <robertmhaas@gmail.com> wrote:
On Thu, Nov 13, 2014 at 10:37 PM, Amit Kapila <amit.kapila16@gmail.com>
wrote:
I have mentioned that this can be usable for Linux users as well on that
thread, however I think we might want to provide it with an option for
linux users. In general, I think it is good to have this patch for
Windows
users and later if we find that Linux users can also get the benefit
with
this functionality, we can expose the same with an additional option.
Why make it an option instead of just always doing it this way?
To avoid extra work during archive recovery if it is not required. I
understand that this might not create any measurable difference, but
still there is addition I/O involved (read from file) which can be avoided.
OTOH, if that is okay, then I think we can avoid few #ifdef WIN32 that
this patch introduces and can have consistency for this operation on
both linux and Windows.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
On Fri, Nov 14, 2014 at 2:55 AM, Amit Kapila <amit.kapila16@gmail.com> wrote:
On Fri, Nov 14, 2014 at 9:11 AM, Robert Haas <robertmhaas@gmail.com> wrote:
On Thu, Nov 13, 2014 at 10:37 PM, Amit Kapila <amit.kapila16@gmail.com>
wrote:I have mentioned that this can be usable for Linux users as well on that
thread, however I think we might want to provide it with an option for
linux users. In general, I think it is good to have this patch for
Windows
users and later if we find that Linux users can also get the benefit
with
this functionality, we can expose the same with an additional option.Why make it an option instead of just always doing it this way?
To avoid extra work during archive recovery if it is not required. I
understand that this might not create any measurable difference, but
still there is addition I/O involved (read from file) which can be avoided.
Yeah, but it's trivial. We're not going create enough tablespaces on
one cluster for the cost of dropping a few extra symlinks in place to
matter.
OTOH, if that is okay, then I think we can avoid few #ifdef WIN32 that
this patch introduces and can have consistency for this operation on
both linux and Windows.
Having one code path for everything seems appealing to me, but what do
others think?
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Robert Haas <robertmhaas@gmail.com> writes:
On Fri, Nov 14, 2014 at 2:55 AM, Amit Kapila <amit.kapila16@gmail.com> wrote:
OTOH, if that is okay, then I think we can avoid few #ifdef WIN32 that
this patch introduces and can have consistency for this operation on
both linux and Windows.
Having one code path for everything seems appealing to me, but what do
others think?
Generally I'd be in favor of avoiding platform-dependent code where
possible, but that doesn't represent a YES vote for this particular
patch. It looks pretty messy in a quick look, even granting that the
#ifdef WIN32's would all go away.
A larger question here is about forward/backward compatibility of the
basebackup files. Changing the representation of symlinks like this
would break that. Maybe we don't care, not sure (is there already a
catversion check for these things?). Changing the file format for only
some platforms seems like definitely a bad idea though.
regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Fri, Nov 14, 2014 at 1:15 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
Generally I'd be in favor of avoiding platform-dependent code where
possible, but that doesn't represent a YES vote for this particular
patch. It looks pretty messy in a quick look, even granting that the
#ifdef WIN32's would all go away.
Hmm, OK. I have not read the patch. Hopefully that's something that
could be fixed.
A larger question here is about forward/backward compatibility of the
basebackup files. Changing the representation of symlinks like this
would break that. Maybe we don't care, not sure (is there already a
catversion check for these things?). Changing the file format for only
some platforms seems like definitely a bad idea though.
What are the practical consequences of changing the file format? I
think that an old backup containing symlinks could be made to work on
a new server that knows how to create them, and we should probably
design it that way, but a physical backup isn't compatible across
major versions anyway, so it doesn't have the same kinds of
repercussions as changing something like the pg_dump file format.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Amit Kapila wrote:
2. Symlink file format:
<oid> <linkpath>
16387 E:\PostgreSQL\tbsSymlink file will contain entries for all the tablspaces
under pg_tblspc directory. I have kept the file name as
symlink_label (suggestion are welcome if you want some
different name for this file).
I think symlink_label isn't a very good name. This file is not a label
in the sense that backup_label is; it seems more a "catalog" to me. And
it's not, in essence, about symlinks either, but rather about
tablespaces. I would name it following the term "tablespace catalog" or
some variation thereof.
I know we don't expect that users would have to look at the file or edit
it in normal cases, but it seems better to make it be human-readable. I
would think that the file needs to have tablespace names too, then, not
just OIDs. Maybe we don't use the tablespace name for anything other
than "documentation" purposes if someone decides to look at the file, so
perhaps it should look like a comment:
<oid> <link path> ; <tablespace name>
We already do this in pg_restore -l output IIRC.
One use case mentioned upthread is having the clone be created in the
same machine as the source server. Does your proposal help with it?
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 11/13/14 4:33 PM, Peter Eisentraut wrote:
Is this still relevant after this commit?
commit fb05f3ce83d225dd0f39f8860ce04082753e9e98
Author: Peter Eisentraut <peter_e@gmx.net>
Date: Sat Feb 22 13:38:06 2014 -0500pg_basebackup: Add support for relocating tablespaces
I believe so.
The commit only applies to "plain" output. Amit's complaint is that tar
utilities on Windows don't unpack symlinks, so the "tar" format isn't
useful on Windows when tablespaces are used. So he wants the recovery
mechanism to restore the symlinks.
Um, wouldn't accepting this patch break the above-mentioned
tablespace-relocation feature, because pg_basebackup wouldn't see any
more symlinks sent down?
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Sat, Nov 15, 2014 at 2:21 AM, Peter Eisentraut <peter_e@gmx.net> wrote:
On 11/13/14 4:33 PM, Peter Eisentraut wrote:
Is this still relevant after this commit?
commit fb05f3ce83d225dd0f39f8860ce04082753e9e98
Author: Peter Eisentraut <peter_e@gmx.net>
Date: Sat Feb 22 13:38:06 2014 -0500pg_basebackup: Add support for relocating tablespaces
I believe so.
The commit only applies to "plain" output. Amit's complaint is that tar
utilities on Windows don't unpack symlinks, so the "tar" format isn't
useful on Windows when tablespaces are used. So he wants the recovery
mechanism to restore the symlinks.Um, wouldn't accepting this patch break the above-mentioned
tablespace-relocation feature, because pg_basebackup wouldn't see any
more symlinks sent down?
No, the new feature is implemented only for tar format and above feature
works only with plain format. It will still send the symlink information as
previously for plain format.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
On Sat, Nov 15, 2014 at 12:03 AM, Alvaro Herrera <alvherre@2ndquadrant.com>
wrote:
Amit Kapila wrote:
2. Symlink file format:
<oid> <linkpath>
16387 E:\PostgreSQL\tbsSymlink file will contain entries for all the tablspaces
under pg_tblspc directory. I have kept the file name as
symlink_label (suggestion are welcome if you want some
different name for this file).I think symlink_label isn't a very good name. This file is not a label
in the sense that backup_label is; it seems more a "catalog" to me. And
it's not, in essence, about symlinks either, but rather about
tablespaces. I would name it following the term "tablespace catalog" or
some variation thereof.
This file is going to provide the symlink path for each tablespace, so
it not be bad to have that in file name. I agree with you that it's more
about tablespaces. So how about:
tablespace_symlink
symlink_tablespace
tablespace_info
I know we don't expect that users would have to look at the file or edit
it in normal cases, but it seems better to make it be human-readable. I
would think that the file needs to have tablespace names too, then, not
just OIDs. Maybe we don't use the tablespace name for anything other
than "documentation" purposes if someone decides to look at the file, so
perhaps it should look like a comment:<oid> <link path> ; <tablespace name>
We already do this in pg_restore -l output IIRC.
Okay, I will take care of doing this in next version of patch if no one
objects to this idea or more people are in favour of doing so.
One use case mentioned upthread is having the clone be created in the
same machine as the source server. Does your proposal help with it?
Sorry, but I am not getting which proposal exactly you are referring here,
Could you explain in more detail?
In general, if user took the backup (in tar format) using pg_basebackup,
this
patch will be able to restore such a backup even on the same server.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
On Sat, Nov 15, 2014 at 12:01 AM, Robert Haas <robertmhaas@gmail.com> wrote:
On Fri, Nov 14, 2014 at 1:15 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
Generally I'd be in favor of avoiding platform-dependent code where
possible, but that doesn't represent a YES vote for this particular
patch. It looks pretty messy in a quick look, even granting that the
#ifdef WIN32's would all go away.Hmm, OK. I have not read the patch. Hopefully that's something that
could be fixed.A larger question here is about forward/backward compatibility of the
basebackup files. Changing the representation of symlinks like this
would break that. Maybe we don't care, not sure (is there already a
catversion check for these things?). Changing the file format for only
some platforms seems like definitely a bad idea though.What are the practical consequences of changing the file format? I
think that an old backup containing symlinks could be made to work on
a new server that knows how to create them,
So if I understand correctly, by *old backup* you mean backup created
by 9.5 and by *new server*, you mean server > 9.5, if yes the current
design should handle it.
However if the backup is created on version < 9.5 using pg_basebackup
of same version and trying to restore it with server >=9.5 won't work,
because server won't have the information about symlinks.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
Amit Kapila wrote:
On Sat, Nov 15, 2014 at 12:03 AM, Alvaro Herrera <alvherre@2ndquadrant.com>
wrote:Amit Kapila wrote:
I think symlink_label isn't a very good name. This file is not a label
in the sense that backup_label is; it seems more a "catalog" to me. And
it's not, in essence, about symlinks either, but rather about
tablespaces. I would name it following the term "tablespace catalog" or
some variation thereof.This file is going to provide the symlink path for each tablespace, so
it not be bad to have that in file name. I agree with you that it's more
about tablespaces. So how about:tablespace_symlink
symlink_tablespace
tablespace_info
I think the fact that we use symlinks is an implementation detail;
aren't them actually junction points, not symlinks, in some Windows
cases? The The pg_tablespace catalog uses (or used to use)
"spclocation" for this, not "spcsymlink".
One use case mentioned upthread is having the clone be created in the
same machine as the source server. Does your proposal help with it?Sorry, but I am not getting which proposal exactly you are referring here,
Could you explain in more detail?
In the first message of this thread[1]/messages/by-id/20130801161519.GA334956@tornado.leadboat.com, Noah said:
: A "pg_basebackup -Fp" running on the same system as the target cluster will
: fail in the presence of tablespaces; it would backup each tablespace to its
: original path, and those paths are in use locally for the very originals we're
: copying.
In general, if user took the backup (in tar format) using pg_basebackup,
this
patch will be able to restore such a backup even on the same server.
I must be misunderstanding either you or Noah.
[1]: /messages/by-id/20130801161519.GA334956@tornado.leadboat.com
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Sun, Nov 16, 2014 at 6:15 AM, Alvaro Herrera <alvherre@2ndquadrant.com>
wrote:
Amit Kapila wrote:
On Sat, Nov 15, 2014 at 12:03 AM, Alvaro Herrera <
alvherre@2ndquadrant.com>
wrote:
Amit Kapila wrote:
I think symlink_label isn't a very good name. This file is not a
label
in the sense that backup_label is; it seems more a "catalog" to me.
And
it's not, in essence, about symlinks either, but rather about
tablespaces. I would name it following the term "tablespace catalog"
or
some variation thereof.
This file is going to provide the symlink path for each tablespace, so
it not be bad to have that in file name. I agree with you that it's
more
about tablespaces. So how about:
tablespace_symlink
symlink_tablespace
tablespace_infoI think the fact that we use symlinks is an implementation detail;
aren't them actually junction points, not symlinks, in some Windows
cases?
Right, but they provide same functionality as symlinks and now we
are even planing to provide this feature for both linux and windows as
both Tom and Robert seems to feel, it's better that way. Anyhow,
I think naming any entity generally differs based on individual's
perspective, so we can go with the name which appeals to more people.
In case, nobody else has any preference, I will change it to what both
of us can agree upon (either 'tablespace catalog', 'tablespace_info' ...).
One use case mentioned upthread is having the clone be created in the
same machine as the source server. Does your proposal help with it?Sorry, but I am not getting which proposal exactly you are referring
here,
Could you explain in more detail?
In the first message of this thread[1], Noah said:
: A "pg_basebackup -Fp" running on the same system as the target cluster
will
: fail in the presence of tablespaces; it would backup each tablespace to
its
: original path, and those paths are in use locally for the very
originals we're
: copying.
That use case got addressed with -T option with which user can relocate
tablespace directory (Commit: fb05f3c; pg_basebackup: Add support for
relocating tablespaces)
In general, if user took the backup (in tar format) using pg_basebackup,
this
patch will be able to restore such a backup even on the same server.I must be misunderstanding either you or Noah.
Does the above information addressed your question?
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
Amit Kapila wrote:
On Sun, Nov 16, 2014 at 6:15 AM, Alvaro Herrera <alvherre@2ndquadrant.com>
wrote:
I think the fact that we use symlinks is an implementation detail;
aren't them actually junction points, not symlinks, in some Windows
cases?Right, but they provide same functionality as symlinks and now we
are even planing to provide this feature for both linux and windows as
both Tom and Robert seems to feel, it's better that way. Anyhow,
I think naming any entity generally differs based on individual's
perspective, so we can go with the name which appeals to more people.
In case, nobody else has any preference, I will change it to what both
of us can agree upon (either 'tablespace catalog', 'tablespace_info' ...).
Well, I have made my argument. Since you're the submitter, feel free to
select what you think is the best name.
One use case mentioned upthread is having the clone be created in the
same machine as the source server. Does your proposal help with it?
That use case got addressed with -T option with which user can relocate
tablespace directory (Commit: fb05f3c; pg_basebackup: Add support for
relocating tablespaces)
Okay. As far as I know, -T only works for plain mode, right? I wonder
if we should make -T modify the tablespace catalog, so that the
resulting file in tar output outputs names mangled per the map; that
would make -T work in tar mode too. Does that make sense? (Maybe it
already works that way? I didn't research.)
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Tue, Nov 18, 2014 at 7:49 PM, Alvaro Herrera <alvherre@2ndquadrant.com>
wrote:
Amit Kapila wrote:
On Sun, Nov 16, 2014 at 6:15 AM, Alvaro Herrera <
alvherre@2ndquadrant.com>
wrote:
One use case mentioned upthread is having the clone be created in
the
same machine as the source server. Does your proposal help with
it?
That use case got addressed with -T option with which user can relocate
tablespace directory (Commit: fb05f3c; pg_basebackup: Add support for
relocating tablespaces)Okay. As far as I know, -T only works for plain mode, right?
Yes.
I wonder
if we should make -T modify the tablespace catalog, so that the
resulting file in tar output outputs names mangled per the map; that
would make -T work in tar mode too. Does that make sense?
tablepspace catalog (I assume it is new file you are talking about) is
formed on the server where as handling for -T is completely in
pg_basebackup, we might be able to make it work, but I am not
sure if it is worth because the main usecase for -T option is for plain
format. I think even if there is some use case for -T to work with tar
format, it is a separate project.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
On Tue, Nov 18, 2014 at 9:19 AM, Alvaro Herrera
<alvherre@2ndquadrant.com> wrote:
Right, but they provide same functionality as symlinks and now we
are even planing to provide this feature for both linux and windows as
both Tom and Robert seems to feel, it's better that way. Anyhow,
I think naming any entity generally differs based on individual's
perspective, so we can go with the name which appeals to more people.
In case, nobody else has any preference, I will change it to what both
of us can agree upon (either 'tablespace catalog', 'tablespace_info' ...).Well, I have made my argument. Since you're the submitter, feel free to
select what you think is the best name.
For what it's worth, I, too, dislike having symlink in the name.
Maybe "tablespace_map"?
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Wed, Nov 19, 2014 at 11:46 PM, Robert Haas <robertmhaas@gmail.com> wrote:
On Tue, Nov 18, 2014 at 9:19 AM, Alvaro Herrera
<alvherre@2ndquadrant.com> wrote:Right, but they provide same functionality as symlinks and now we
are even planing to provide this feature for both linux and windows as
both Tom and Robert seems to feel, it's better that way. Anyhow,
I think naming any entity generally differs based on individual's
perspective, so we can go with the name which appeals to more people.
In case, nobody else has any preference, I will change it to what both
of us can agree upon (either 'tablespace catalog', 'tablespace_info'
...).
Well, I have made my argument. Since you're the submitter, feel free to
select what you think is the best name.For what it's worth, I, too, dislike having symlink in the name.
Maybe "tablespace_map"?
Sounds good to me as well.
To summarize the situation of this patch, I have received below comments
on which I am planning to work:
1. Change the name of file containing tablespace path information.
2. Store tablespace name as well along with oid and path to make the
information Human readable.
3. Make the code generic (Remove #ifdef Win32 macro's and change
comments referring this functionality for windows and see if any more
changes are required to make it work on linux.)
Now the part where I would like to receive feedback before revising the
patch is on the coding style. It seems to me from Tom's comments that
he is not happy with the code, now I am not sure which part of the patch
he thinks needs change. Tom if possible, could you be slightly more
specific about your concern w.r.t code?
I have attached a rebased (on top of commit-8d7af8f) patch, just incase
some one wants to apply and check it.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
Attachments:
extend_basebackup_to_include_symlink_v4.patchapplication/octet-stream; name=extend_basebackup_to_include_symlink_v4.patchDownload
diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml
index 07ca0dc..8c15d62 100644
--- a/doc/src/sgml/backup.sgml
+++ b/doc/src/sgml/backup.sgml
@@ -834,8 +834,11 @@ SELECT pg_start_backup('label');
<function>pg_start_backup</> creates a <firstterm>backup label</> file,
called <filename>backup_label</>, in the cluster directory with
information about your backup, including the start time and label
- string. The file is critical to the integrity of the backup, should
- you need to restore from it.
+ string. On Windows, this function also creates a
+ <firstterm>symlink label</> file, called <filename>symlink_label</>,
+ in the cluster directory with information about symbolic links in
+ <filename>pg_tblspc/</>. Both the files are critical to the integrity of
+ the backup, should you need to restore from it.
</para>
<para>
@@ -963,17 +966,19 @@ SELECT pg_stop_backup();
<para>
It's also worth noting that the <function>pg_start_backup</> function
- makes a file named <filename>backup_label</> in the database cluster
- directory, which is removed by <function>pg_stop_backup</>.
- This file will of course be archived as a part of your backup dump file.
- The backup label file includes the label string you gave to
- <function>pg_start_backup</>, as well as the time at which
- <function>pg_start_backup</> was run, and the name of the starting WAL
- file. In case of confusion it is therefore possible to look inside a
- backup dump file and determine exactly which backup session the dump file
- came from. However, this file is not merely for your information; its
- presence and contents are critical to the proper operation of the system's
- recovery process.
+ makes a file named <filename>backup_label</> and on windows another
+ file named<filename>symlink_label</> in the database cluster directory,
+ which are removed by <function>pg_stop_backup</>. These files will of
+ course be archived as a part of your backup dump file. The backup label
+ file includes the label string you gave to <function>pg_start_backup</>,
+ as well as the time at which <function>pg_start_backup</> was run, and
+ the name of the starting WAL file. In case of confusion it is therefore
+ possible to look inside a backup dump file and determine exactly which
+ backup session the dump file came from. The symlink label file includes
+ the symbolic link name as it exists in the directory
+ <filename>pg_tblspc/</> and the full path of symolic link. These files
+ are not merely for your information; their presence and contents are critical
+ to the proper operation of the system's recovery process.
</para>
<para>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 90a3460..798a574 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -16278,7 +16278,8 @@ SELECT set_config('log_statement_stats', 'off', false);
<function>pg_start_backup</> accepts an
arbitrary user-defined label for the backup. (Typically this would be
the name under which the backup dump file will be stored.) The function
- writes a backup label file (<filename>backup_label</>) into the
+ writes a backup label file (<filename>backup_label</>) and in case of
+ windows symlink label file (<filename>symlink_label</>) also into the
database cluster's data directory, performs a checkpoint,
and then returns the backup's starting transaction log location as text.
The user can ignore this result value, but it is
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6053127..0b1c094 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -41,6 +41,7 @@
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
+#include "replication/basebackup.h"
#include "replication/logical.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
@@ -813,6 +814,9 @@ static void xlog_outdesc(StringInfo buf, RmgrId rmid, XLogRecord *record);
static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired, bool *backupFromStandby);
+#ifdef WIN32
+static bool read_symlink_label(List **tablespaces);
+#endif
static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method);
@@ -5528,6 +5532,9 @@ StartupXLOG(void)
bool wasShutdown;
bool reachedStopPoint = false;
bool haveBackupLabel = false;
+#ifdef WIN32
+ bool haveSymlinkLabel = false;
+#endif
XLogRecPtr RecPtr,
checkPointLoc,
EndOfLog;
@@ -5602,16 +5609,6 @@ StartupXLOG(void)
ValidateXLOGDirectoryStructure();
/*
- * Clear out any old relcache cache files. This is *necessary* if we do
- * any WAL replay, since that would probably result in the cache files
- * being out of sync with database reality. In theory we could leave them
- * in place if the database had been cleanly shut down, but it seems
- * safest to just remove them always and let them be rebuilt during the
- * first backend startup.
- */
- RelationCacheInitFileRemove();
-
- /*
* Initialize on the assumption we want to recover to the latest timeline
* that's active according to pg_control.
*/
@@ -5680,6 +5677,9 @@ StartupXLOG(void)
if (read_backup_label(&checkPointLoc, &backupEndRequired,
&backupFromStandby))
{
+#ifdef WIN32
+ List *tablespaces = NIL;
+#endif
/*
* Archive recovery was requested, and thanks to the backup label
* file, we know how far we need to replay to reach consistency. Enter
@@ -5724,6 +5724,61 @@ StartupXLOG(void)
errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
wasShutdown = false; /* keep compiler quiet */
}
+
+#ifdef WIN32
+ /* read the symlink file if present and create symlinks */
+ if (read_symlink_label(&tablespaces))
+ {
+ ListCell *lc;
+ struct stat st;
+
+ foreach(lc, tablespaces)
+ {
+ tablespaceinfo *ti = lfirst(lc);
+ char *linkloc;
+
+ linkloc = psprintf("pg_tblspc/%s", ti->oid);
+
+ /*
+ * Remove the existing symlink if any and Create the symlink
+ * under PGDATA. We need to use rmtree instead of rmdir as
+ * the link location might contain directories/files corresponding
+ * to actual path, some tar utilities does that way while extracting
+ * symlinks.
+ */
+ if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode))
+ {
+ if (!rmtree(linkloc,true))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove directory \"%s\": %m",
+ linkloc)));
+ }
+ else
+ {
+ if (unlink(linkloc) < 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove symbolic link \"%s\": %m",
+ linkloc)));
+ }
+
+ if (symlink(ti->path, linkloc) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create symbolic link \"%s\": %m",
+ linkloc)));
+
+ pfree(ti->oid);
+ pfree(ti->path);
+ pfree(ti);
+ }
+
+ /* set flag to delete it later */
+ haveSymlinkLabel = true;
+ }
+#endif
+
/* set flag to delete it later */
haveBackupLabel = true;
}
@@ -5798,6 +5853,20 @@ StartupXLOG(void)
}
/*
+ * Clear out any old relcache cache files. This is *necessary* if we do
+ * any WAL replay, since that would probably result in the cache files
+ * being out of sync with database reality. In theory we could leave them
+ * in place if the database had been cleanly shut down, but it seems
+ * safest to just remove them always and let them be rebuilt during the
+ * first backend startup. These files needs to be removed from all
+ * directories including pg_tblspc, however for windows the symlinks are
+ * created only after reading symlink file in case of archive recovery
+ * from backup, so needs to clear old relcache files here after creating
+ * symlinks.
+ */
+ RelationCacheInitFileRemove();
+
+ /*
* If the location of the checkpoint record is not on the expected
* timeline in the history of the requested timeline, we cannot proceed:
* the backup is not part of the history of the requested timeline.
@@ -6048,6 +6117,25 @@ StartupXLOG(void)
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+#ifdef WIN32
+ /*
+ * If there was a symlink label file, it's done its job and the
+ * symlinks have been created. We must get rid of the label file
+ * so that if we crash during recovery, we don't create symlinks
+ * again. It seems prudent though to just rename the file out of
+ * the way rather than delete it completely.
+ */
+ if (haveSymlinkLabel)
+ {
+ unlink(SYMLINK_LABEL_OLD);
+ if (rename(SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not rename file \"%s\" to \"%s\": %m",
+ SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD)));
+ }
+#endif
+
/* Check that the GUCs used to generate the WAL allow recovery */
CheckRequiredParameterValues();
@@ -9060,16 +9148,21 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*
* There are two kind of backups: exclusive and non-exclusive. An exclusive
* backup is started with pg_start_backup(), and there can be only one active
- * at a time. The backup label file of an exclusive backup is written to
- * $PGDATA/backup_label, and it is removed by pg_stop_backup().
+ * at a time. The backup and symlink label files of an exclusive backup are
+ * written to $PGDATA/backup_label and $PGDATA/symlink_label, and they are
+ * removed by pg_stop_backup().
*
* A non-exclusive backup is used for the streaming base backups (see
* src/backend/replication/basebackup.c). The difference to exclusive backups
- * is that the backup label file is not written to disk. Instead, its would-be
- * contents are returned in *labelfile, and the caller is responsible for
- * including it in the backup archive as 'backup_label'. There can be many
- * non-exclusive backups active at the same time, and they don't conflict
- * with an exclusive backup either.
+ * is that the backup, symlink label files are not written to disk. Instead,
+ * there would-be contents are returned in *labelfile and *symlinkfile, and
+ * the caller is responsible for including them in the backup archive as
+ * 'backup_label' and 'symlink_label'. There can be many non-exclusive backups
+ * active at the same time, and they don't conflict with an exclusive backup
+ * either.
+ *
+ * symlinkfile is required only for tar format in windows as native windows
+ * utilities are not able to create symlinks while extracting files from tar.
*
* Returns the minimum WAL position that must be present to restore from this
* backup, and the corresponding timeline ID in *starttli_p.
@@ -9082,7 +9175,8 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*/
XLogRecPtr
do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
- char **labelfile)
+ char **labelfile, DIR *tblspcdir, List **tablespaces,
+ char **symlinkfile, bool infotbssize)
{
bool exclusive = (labelfile == NULL);
bool backup_started_in_recovery = false;
@@ -9096,6 +9190,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
struct stat stat_buf;
FILE *fp;
StringInfoData labelfbuf;
+#ifdef WIN32
+ StringInfoData symlinkfbuf;
+#endif
backup_started_in_recovery = RecoveryInProgress();
@@ -9167,6 +9264,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
{
bool gotUniqueStartpoint = false;
+ struct dirent *de;
+ tablespaceinfo *ti;
+ int datadirpathlen;
/*
* Force an XLOG file switch before the checkpoint, to ensure that the
@@ -9286,6 +9386,82 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
XLByteToSeg(startpoint, _logSegNo);
XLogFileName(xlogfilename, ThisTimeLineID, _logSegNo);
+#ifdef WIN32
+ /*
+ * Construct symlink file
+ */
+ initStringInfo(&symlinkfbuf);
+#endif
+
+ datadirpathlen = strlen(DataDir);
+
+ /* Collect information about all tablespaces */
+ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+ {
+ char fullpath[MAXPGPATH];
+ char linkpath[MAXPGPATH];
+ char *relpath = NULL;
+ int rllen;
+
+ /* Skip special stuff */
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+#if defined(HAVE_READLINK) || defined(WIN32)
+ rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+ if (rllen < 0)
+ {
+ ereport(WARNING,
+ (errmsg("could not read symbolic link \"%s\": %m",
+ fullpath)));
+ continue;
+ }
+ else if (rllen >= sizeof(linkpath))
+ {
+ ereport(WARNING,
+ (errmsg("symbolic link \"%s\" target is too long",
+ fullpath)));
+ continue;
+ }
+ linkpath[rllen] = '\0';
+
+ /*
+ * Relpath holds the relative path of the tablespace directory
+ * when it's located within PGDATA, or NULL if it's located
+ * elsewhere.
+ */
+ if (rllen > datadirpathlen &&
+ strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+ IS_DIR_SEP(linkpath[datadirpathlen]))
+ relpath = linkpath + datadirpathlen + 1;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(de->d_name);
+ ti->path = pstrdup(linkpath);
+ ti->rpath = relpath ? pstrdup(relpath) : NULL;
+ ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+
+ if(tablespaces)
+ *tablespaces = lappend(*tablespaces, ti);
+
+#ifdef WIN32
+ appendStringInfo(&symlinkfbuf, "%s %s\n", ti->oid, ti->path);
+#endif
+#else
+
+ /*
+ * If the platform does not have symbolic links, it should not be
+ * possible to have tablespaces - clearly somebody else created
+ * them. Warn about it and ignore.
+ */
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+#endif
+ }
+
/*
* Construct backup label file
*/
@@ -9349,9 +9525,55 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
errmsg("could not write file \"%s\": %m",
BACKUP_LABEL_FILE)));
pfree(labelfbuf.data);
+
+#ifdef WIN32
+ /* Write backup symlink file. */
+ if (symlinkfbuf.len > 0)
+ {
+ if (stat(SYMLINK_LABEL_FILE, &stat_buf) != 0)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("a backup is already in progress"),
+ errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
+ SYMLINK_LABEL_FILE)));
+
+ fp = AllocateFile(SYMLINK_LABEL_FILE, "w");
+
+ if (!fp)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ if (fwrite(symlinkfbuf.data, symlinkfbuf.len, 1, fp) != 1 ||
+ fflush(fp) != 0 ||
+ pg_fsync(fileno(fp)) != 0 ||
+ ferror(fp) ||
+ FreeFile(fp))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ }
+
+ pfree(symlinkfbuf.data);
+#endif
}
else
+ {
*labelfile = labelfbuf.data;
+#ifdef WIN32
+ if (symlinkfbuf.len > 0)
+ *symlinkfile = symlinkfbuf.data;
+#endif
+ }
}
PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
@@ -9523,6 +9745,14 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
BACKUP_LABEL_FILE)));
+
+#ifdef WIN32
+ /*
+ * Remove symlink file if present, symlink file is created
+ * only if there are tablespaces.
+ */
+ unlink(SYMLINK_LABEL_FILE);
+#endif
}
/*
@@ -9924,6 +10154,74 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
return true;
}
+#ifdef WIN32
+/*
+ * read_symlink_label: check to see if a symlink_label file is present
+ *
+ * If we see a symlink_label during recovery, we assume that we are recovering
+ * from a backup dump file, and we therefore need to create symlinks as per
+ * the information present in symlink file.
+ *
+ * Returns TRUE if a symlink_label was found (and fills the link information
+ * for all the tablespace links present in file); returns FALSE if not.
+ */
+static bool
+read_symlink_label(List **tablespaces)
+{
+ tablespaceinfo *ti;
+ FILE *lfp;
+ char ch;
+ char tbsoid[MAXPGPATH];
+ char tbslinkpath[MAXPGPATH];
+
+ /*
+ * See if symlink file is present
+ */
+ lfp = AllocateFile(SYMLINK_LABEL_FILE, "r");
+ if (!lfp)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+ return false; /* it's not there, all is fine */
+ }
+
+ /*
+ * Read and parse the link name and path lines from symlink file (this code
+ * is pretty crude, but we are not expecting any variability in the file
+ * format).
+ */
+ while (!feof(lfp))
+ {
+ ti = palloc(sizeof(tablespaceinfo));
+ if (fscanf(lfp, "%s %s%c", tbsoid, tbslinkpath, &ch) != 3 || ch != '\n')
+ {
+ if (feof(lfp))
+ break;
+ else
+ ereport(FATAL,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("invalid data in file \"%s\"", SYMLINK_LABEL_FILE)));
+ }
+
+ ti->oid = pstrdup(tbsoid);
+ ti->path = pstrdup(tbslinkpath);
+
+ *tablespaces = lappend(*tablespaces, ti);
+ }
+
+ if (ferror(lfp) || FreeFile(lfp))
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ SYMLINK_LABEL_FILE)));
+
+ return true;
+}
+#endif
+
/*
* Error context callback for errors occurring during rm_redo().
*/
@@ -9957,11 +10255,14 @@ BackupInProgress(void)
}
/*
- * CancelBackup: rename the "backup_label" file to cancel backup mode
+ * CancelBackup: rename the "backup_label"/"symlink_label" file to cancel
+ * backup mode
*
* If the "backup_label" file exists, it will be renamed to "backup_label.old".
- * Note that this will render an online backup in progress useless.
- * To correctly finish an online backup, pg_stop_backup must be called.
+ * Similarly, if the "symlink_label" file exists, it will be renamed to
+ * "symlink_label.old". Note that this will render an online backup in progress
+ * useless. To correctly finish an online backup, pg_stop_backup must be
+ * called.
*/
void
CancelBackup(void)
@@ -9990,6 +10291,31 @@ CancelBackup(void)
errdetail("Could not rename \"%s\" to \"%s\": %m.",
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+
+#ifdef WIN32
+ /* if the file is not there, return */
+ if (stat(SYMLINK_LABEL_FILE, &stat_buf) < 0)
+ return;
+
+ /* remove leftover file from previously canceled backup if it exists */
+ unlink(SYMLINK_LABEL_OLD);
+
+ if (rename(SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD) == 0)
+ {
+ ereport(LOG,
+ (errmsg("online backup mode canceled"),
+ errdetail("\"%s\" was renamed to \"%s\".",
+ SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD)));
+ }
+ else
+ {
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("online backup mode was not canceled"),
+ errdetail("Could not rename \"%s\" to \"%s\": %m.",
+ SYMLINK_LABEL_FILE, SYMLINK_LABEL_OLD)));
+ }
+#endif
}
/*
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 133143d..d8864d9 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -51,6 +51,7 @@ pg_start_backup(PG_FUNCTION_ARGS)
bool fast = PG_GETARG_BOOL(1);
char *backupidstr;
XLogRecPtr startpoint;
+ DIR *dir;
backupidstr = text_to_cstring(backupid);
@@ -59,7 +60,16 @@ pg_start_backup(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser or replication role to run a backup")));
- startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL);
+ /* Make sure we can open the directory with tablespaces in it */
+ dir = AllocateDir("pg_tblspc");
+ if (!dir)
+ ereport(ERROR,
+ (errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
+
+ startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL,
+ dir, NULL, NULL, false);
+
+ FreeDir(dir);
PG_RETURN_LSN(startpoint);
}
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index fbcecbb..8925148 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -46,11 +46,12 @@ typedef struct
bool nowait;
bool includewal;
uint32 maxrate;
+ bool sendsymlinkfile;
} basebackup_options;
-static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces);
-static int64 sendTablespace(char *path, bool sizeonly);
+static int64 sendDir(char *path, int basepathlen, bool sizeonly,
+ List *tablespaces, bool sendsymlinkinfo);
static bool sendFile(char *readfilename, char *tarfilename,
struct stat * statbuf, bool missing_ok);
static void sendFileWithContent(const char *filename, const char *content);
@@ -93,15 +94,6 @@ static int64 elapsed_min_unit;
/* The last check of the transfer rate. */
static int64 throttled_last;
-typedef struct
-{
- char *oid;
- char *path;
- char *rpath; /* relative path within PGDATA, or NULL */
- int64 size;
-} tablespaceinfo;
-
-
/*
* Called when ERROR or FATAL happens in perform_base_backup() after
* we have started the backup - make sure we end it!
@@ -126,14 +118,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
XLogRecPtr endptr;
TimeLineID endtli;
char *labelfile;
+ char *symlinkfile = NULL;
int datadirpathlen;
+ List *tablespaces = NIL;
datadirpathlen = strlen(DataDir);
backup_started_in_recovery = RecoveryInProgress();
startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
- &labelfile);
+ &labelfile, tblspcdir, &tablespaces,
+ &symlinkfile, opt->progress);
/*
* Once do_pg_start_backup has been called, ensure that any failure causes
* us to abort the backup so we don't "leak" a backup counter. For this reason,
@@ -143,9 +138,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
{
- List *tablespaces = NIL;
ListCell *lc;
- struct dirent *de;
tablespaceinfo *ti;
SendXlogRecPtrResult(startptr, starttli);
@@ -162,70 +155,9 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
else
statrelpath = pgstat_stat_directory;
- /* Collect information about all tablespaces */
- while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
- {
- char fullpath[MAXPGPATH];
- char linkpath[MAXPGPATH];
- char *relpath = NULL;
- int rllen;
-
- /* Skip special stuff */
- if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
- continue;
-
- snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-#if defined(HAVE_READLINK) || defined(WIN32)
- rllen = readlink(fullpath, linkpath, sizeof(linkpath));
- if (rllen < 0)
- {
- ereport(WARNING,
- (errmsg("could not read symbolic link \"%s\": %m",
- fullpath)));
- continue;
- }
- else if (rllen >= sizeof(linkpath))
- {
- ereport(WARNING,
- (errmsg("symbolic link \"%s\" target is too long",
- fullpath)));
- continue;
- }
- linkpath[rllen] = '\0';
-
- /*
- * Relpath holds the relative path of the tablespace directory
- * when it's located within PGDATA, or NULL if it's located
- * elsewhere.
- */
- if (rllen > datadirpathlen &&
- strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
- IS_DIR_SEP(linkpath[datadirpathlen]))
- relpath = linkpath + datadirpathlen + 1;
-
- ti = palloc(sizeof(tablespaceinfo));
- ti->oid = pstrdup(de->d_name);
- ti->path = pstrdup(linkpath);
- ti->rpath = relpath ? pstrdup(relpath) : NULL;
- ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
- tablespaces = lappend(tablespaces, ti);
-#else
-
- /*
- * If the platform does not have symbolic links, it should not be
- * possible to have tablespaces - clearly somebody else created
- * them. Warn about it and ignore.
- */
- ereport(WARNING,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("tablespaces are not supported on this platform")));
-#endif
- }
-
/* Add a node for the base directory at the end */
ti = palloc0(sizeof(tablespaceinfo));
- ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
+ ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
tablespaces = lappend(tablespaces, ti);
/* Send tablespace header */
@@ -274,8 +206,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* In the main tar, include the backup_label first... */
sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
- /* ... then the bulk of the files ... */
- sendDir(".", 1, false, tablespaces);
+ /*
+ * Send symlink file if required and then the bulk of the
+ * files
+ */
+ if (symlinkfile && opt->sendsymlinkfile)
+ {
+ sendFileWithContent(SYMLINK_LABEL_FILE, symlinkfile);
+ sendDir(".", 1, false, tablespaces, false);
+ }
+ else
+ sendDir(".", 1, false, tablespaces, true);
/* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -555,6 +496,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_nowait = false;
bool o_wal = false;
bool o_maxrate = false;
+ bool o_sendsymlinkfile = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -625,6 +567,20 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->maxrate = (uint32) maxrate;
o_maxrate = true;
}
+ else if (strcmp(defel->defname, "tar") == 0)
+ {
+ /*
+ * symlinkfile is required only for tar format in windows as
+ * native windows utilites are not able create symlinks while
+ * extracting files from tar.
+ */
+ if (o_sendsymlinkfile)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ opt->sendsymlinkfile = true;
+ o_sendsymlinkfile = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -853,7 +809,7 @@ sendFileWithContent(const char *filename, const char *content)
*
* Only used to send auxiliary tablespaces, not PGDATA.
*/
-static int64
+int64
sendTablespace(char *path, bool sizeonly)
{
int64 size;
@@ -887,7 +843,7 @@ sendTablespace(char *path, bool sizeonly)
size = 512; /* Size of the header just added */
/* Send all the files in the tablespace version directory */
- size += sendDir(pathbuf, strlen(path), sizeonly, NIL);
+ size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
return size;
}
@@ -899,9 +855,14 @@ sendTablespace(char *path, bool sizeonly)
*
* Omit any directory in the tablespaces list, to avoid backing up
* tablespaces twice when they were created inside PGDATA.
+ *
+ * If sendsymlinkinfo is false, we don't need to include symlink
+ * information inside tar file as it will be sent separately in
+ * symlink_label file.
*/
static int64
-sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
+sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
+ bool sendsymlinkinfo)
{
DIR *dir;
struct dirent *de;
@@ -929,13 +890,17 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
continue;
/*
- * If there's a backup_label file, it belongs to a backup started by
- * the user with pg_start_backup(). It is *not* correct for this
- * backup, our backup_label is injected into the tar separately.
+ * If there's a backup_label or symlink file, it belongs to a backup
+ * started by the user with pg_start_backup(). It is *not* correct for
+ * this backup, our backup_label/symlink is injected into the tar
+ * separately.
*/
if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
continue;
+ if (strcmp(de->d_name, SYMLINK_LABEL_FILE) == 0)
+ continue;
+
/*
* Check if the postmaster has signaled us to exit, and abort with an
* error in that case. The error handler further up will call
@@ -1026,6 +991,7 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
/* Allow symbolic links in pg_tblspc only */
if (strcmp(path, "./pg_tblspc") == 0 &&
+ sendsymlinkinfo &&
#ifndef WIN32
S_ISLNK(statbuf.st_mode)
#else
@@ -1100,7 +1066,7 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
}
}
if (!skip_this_dir)
- size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces);
+ size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, true);
}
else if (S_ISREG(statbuf.st_mode))
{
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 154aaac..25c7311 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -71,6 +71,7 @@ Node *replication_parse_result;
%token K_NOWAIT
%token K_MAX_RATE
%token K_WAL
+%token K_TAR
%token K_TIMELINE
%token K_PHYSICAL
%token K_LOGICAL
@@ -119,7 +120,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d] [TAR]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -168,6 +169,11 @@ base_backup_opt:
$$ = makeDefElem("max_rate",
(Node *)makeInteger($2));
}
+ | K_TAR
+ {
+ $$ = makeDefElem("tar",
+ (Node *)makeInteger(TRUE));
+ }
;
create_replication_slot:
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index a257124..e373d51 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -88,6 +88,7 @@ NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
+TAR { return K_TAR; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
CREATE_REPLICATION_SLOT { return K_CREATE_REPLICATION_SLOT; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index e7c2939..0b0ebb6 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -1636,13 +1636,14 @@ BaseBackup(void)
maxrate_clause = psprintf("MAX_RATE %u", maxrate);
basebkp =
- psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
+ psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
includewal ? "NOWAIT" : "",
- maxrate_clause ? maxrate_clause : "");
+ maxrate_clause ? maxrate_clause : "",
+ format == 't' ? "TAR": "");
if (PQsendQuery(conn, basebkp) == 0)
{
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 6f8b5f4..73e25ef 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -17,6 +17,8 @@
#include "access/xlogrecord.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
+#include "nodes/pg_list.h"
+#include "storage/fd.h"
/* Sync methods */
@@ -247,7 +249,8 @@ extern void SetWalWriterSleeping(bool sleeping);
* Starting/stopping a base backup
*/
extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
- TimeLineID *starttli_p, char **labelfile);
+ TimeLineID *starttli_p, char **labelfile, DIR *tblspcdir,
+ List **tablespaces, char **symlinkfile, bool infotbssize);
extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
TimeLineID *stoptli_p);
extern void do_pg_abort_backup(void);
@@ -256,4 +259,7 @@ extern void do_pg_abort_backup(void);
#define BACKUP_LABEL_FILE "backup_label"
#define BACKUP_LABEL_OLD "backup_label.old"
+#define SYMLINK_LABEL_FILE "symlink_label"
+#define SYMLINK_LABEL_OLD "symlink_label.old"
+
#endif /* XLOG_H */
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 988bce7..3540602 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -21,6 +21,16 @@
#define MAX_RATE_UPPER 1048576
+typedef struct
+{
+ char *oid;
+ char *path;
+ char *rpath; /* relative path within PGDATA, or NULL */
+ int64 size;
+} tablespaceinfo;
+
extern void SendBaseBackup(BaseBackupCmd *cmd);
+extern int64 sendTablespace(char *path, bool sizeonly);
+
#endif /* _BASEBACKUP_H */
On 11/20/2014 02:27 AM, Amit Kapila wrote:
On Wed, Nov 19, 2014 at 11:46 PM, Robert Haas <robertmhaas@gmail.com
<mailto:robertmhaas@gmail.com>> wrote:On Tue, Nov 18, 2014 at 9:19 AM, Alvaro Herrera
<alvherre@2ndquadrant.com <mailto:alvherre@2ndquadrant.com>> wrote:Right, but they provide same functionality as symlinks and now we
are even planing to provide this feature for both linux andwindows as
both Tom and Robert seems to feel, it's better that way. Anyhow,
I think naming any entity generally differs based on individual's
perspective, so we can go with the name which appeals to more people.
In case, nobody else has any preference, I will change it to whatboth
of us can agree upon (either 'tablespace catalog',
'tablespace_info' ...).
Well, I have made my argument. Since you're the submitter, feel
free to
select what you think is the best name.
For what it's worth, I, too, dislike having symlink in the name.
Maybe "tablespace_map"?Sounds good to me as well.
To summarize the situation of this patch, I have received below comments
on which I am planning to work:1. Change the name of file containing tablespace path information.
2. Store tablespace name as well along with oid and path to make the
information Human readable.
3. Make the code generic (Remove #ifdef Win32 macro's and change
comments referring this functionality for windows and see if any more
changes are required to make it work on linux.)Now the part where I would like to receive feedback before revising the
patch is on the coding style. It seems to me from Tom's comments that
he is not happy with the code, now I am not sure which part of the patch
he thinks needs change. Tom if possible, could you be slightly more
specific about your concern w.r.t code?I have attached a rebased (on top of commit-8d7af8f) patch, just incase
some one wants to apply and check it.
In view of the request above for comments from Tom, I have moved this
back to "Needs Review".
cheers
andrew
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Andrew Dunstan <andrew@dunslane.net> writes:
On 11/20/2014 02:27 AM, Amit Kapila wrote:
Now the part where I would like to receive feedback before revising the
patch is on the coding style. It seems to me from Tom's comments that
he is not happy with the code, now I am not sure which part of the patch
he thinks needs change. Tom if possible, could you be slightly more
specific about your concern w.r.t code?
In view of the request above for comments from Tom, I have moved this
back to "Needs Review".
Sorry, I was not paying very close attention to this thread and missed
the request for comments. A few such:
1. The patch is completely naive about what might be in the symlink
path string; eg embedded spaces in the path would break it. On at
least some platforms, newlines could be in the path as well. I'm not
sure about how to guard against this while maintaining human readability
of the file.
2. There seems to be more going on here than what is advertised, eg
why do we need to add an option to the BASE_BACKUP command (and if
we do need it, doesn't it need to be documented in protocol.sgml)?
And why is the RelationCacheInitFileRemove call relocated?
3. Not terribly happy with the changes made to the API of
do_pg_start_backup, eg having to be able to parse "DIR *" in its
arguments seems like a lot of #include creep. xlog.h is pretty
central so I'm not happy about plastering more #includes in it.
4. In the same vein, publicly declaring a struct with a name as
generic as "tablespaceinfo" doesn't seem like a great idea, when
its usage is far from generic.
regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Sat, Dec 13, 2014 at 10:48 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
Andrew Dunstan <andrew@dunslane.net> writes:
On 11/20/2014 02:27 AM, Amit Kapila wrote:
Now the part where I would like to receive feedback before revising the
patch is on the coding style. It seems to me from Tom's comments that
he is not happy with the code, now I am not sure which part of the
patch
he thinks needs change. Tom if possible, could you be slightly more
specific about your concern w.r.t code?In view of the request above for comments from Tom, I have moved this
back to "Needs Review".Sorry, I was not paying very close attention to this thread and missed
the request for comments. A few such:1. The patch is completely naive about what might be in the symlink
path string; eg embedded spaces in the path would break it. On at
least some platforms, newlines could be in the path as well. I'm not
sure about how to guard against this while maintaining human readability
of the file.
I will look into this and see what best can be done.
2. There seems to be more going on here than what is advertised, eg
why do we need to add an option to the BASE_BACKUP command
This is to ensure that symlink file is generated only for tar format;
server is not aware of whether the backup is generated for plain format
or tar format. We don't want to do it for plain format as for that
client (pg_basebackup) can update the symlinks via -T option and backing
up symlink file during that operation can lead to spurious symlinks after
archive recovery. I have given the reason why we want to accomplish it
only for tar format in my initial mail.
(and if
we do need it, doesn't it need to be documented in protocol.sgml)?
I shall take care of it in next version of patch.
And why is the RelationCacheInitFileRemove call relocated?
Because it assumes that tablespace directory pg_tblspc is in
place and it tries to remove the files by reading pg_tblspc
directory as well. Now as we setup the symlinks in pg_tblspc
after reading symlink file, so we should remove relcache init
file once the symlinks are setup in pg_tblspc directory.
3. Not terribly happy with the changes made to the API of
do_pg_start_backup, eg having to be able to parse "DIR *" in its
arguments seems like a lot of #include creep. xlog.h is pretty
central so I'm not happy about plastering more #includes in it.
The reason of adding new include in xlog.c is for use of tablespaceinfo
structure which I have now kept in basebackup.h.
The reason why I have done this way is because do_pg_start_backup has
some functionality common to both non-exclusive and exclusive backups and
for this feature we have to do some work common for both non-exclusive
and exclusive backup which is to generate the symlink label file for
non-exclusive backups and write the symlink label file for exclusive
backups using that information. Doing this way seems right to me
as we are already doing something like that for backup label file.
Another possible way could be to write a new function in xlogutils.c
to do the symlink label stuff and then use the same in xlog.c, I think
that way we could avoid any new include in xlog.c. However for this we
need to have include in xlogutils.c to make it aware of tablespaceinfo
structure.
4. In the same vein, publicly declaring a struct with a name as
generic as "tablespaceinfo" doesn't seem like a great idea, when
its usage is far from generic.
This is related to above point, we need to use this for both
non-exclusive and exclusive backups and the work for exclusive
backups is done outside of basebackup.c due to which we need
to expose the same.
Any other better idea to address points 3 and 4?
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
On Sun, Dec 14, 2014 at 11:54 AM, Amit Kapila <amit.kapila16@gmail.com>
wrote:
On Sat, Dec 13, 2014 at 10:48 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
Andrew Dunstan <andrew@dunslane.net> writes:
On 11/20/2014 02:27 AM, Amit Kapila wrote:
Now the part where I would like to receive feedback before revising
the
patch is on the coding style. It seems to me from Tom's comments
that
he is not happy with the code, now I am not sure which part of the
patch
he thinks needs change. Tom if possible, could you be slightly more
specific about your concern w.r.t code?In view of the request above for comments from Tom, I have moved this
back to "Needs Review".Sorry, I was not paying very close attention to this thread and missed
the request for comments. A few such:1. The patch is completely naive about what might be in the symlink
path string; eg embedded spaces in the path would break it. On at
least some platforms, newlines could be in the path as well. I'm not
sure about how to guard against this while maintaining human readability
of the file.I will look into this and see what best can be done.
One way to deal with this could be to append a delimiter(which is not
allowed
in tablespace path like quote (\')) at the end of tablespace path while
writing the same to symlink label file and then use that as end marker while
reading it from file. I think that might defeat the human readable aspect
of
this file to an extent, but I am not sure if it is too important to keep it
human readable. I think even if we want to provide some information to
user from internal files, it is always better to provide it via some
utility/tool.
Do we support newline in tablespace path?
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
Amit Kapila wrote:
One way to deal with this could be to append a delimiter(which is not
allowed
in tablespace path like quote (\')) at the end of tablespace path while
writing the same to symlink label file and then use that as end marker while
reading it from file.
Some GNU tools such as xargs and find use a null char as item delimiter;
see find -print0 and xargs -0. IIRC one of our tools also allow that
(psql?). Doing the same here would make human reading a bit more
difficult, but not completely impossible.
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Amit Kapila <amit.kapila16@gmail.com> writes:
On Sat, Dec 13, 2014 at 10:48 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
1. The patch is completely naive about what might be in the symlink
path string; eg embedded spaces in the path would break it. On at
least some platforms, newlines could be in the path as well. I'm not
sure about how to guard against this while maintaining human readability
of the file.
One way to deal with this could be to append a delimiter(which is not
allowed
in tablespace path like quote (\')) at the end of tablespace path while
writing the same to symlink label file and then use that as end marker while
reading it from file.
What makes you think quote isn't allowed in tablespace paths? Even if we
were to disallow it at the SQL level, there'd be nothing stopping a DBA
from changing the path after the fact by redefining the symlink outside
SQL --- something I believe we specifically meant to allow, considering
we went to the trouble of getting rid of the pg_tablespace.spclocation
column.
Pretty much the only character we can be entirely certain is not in a
symlink's value is \0. As Alvaro mentioned, using that in the file
is a possible alternative, although it could easily confuse some users
and/or text editors. The only other alternatives I can see are:
* Go over to a byte-count-then-value format. Also possible, also rather
unfriendly from a user's standpoint.
* Establish an escaping convention, eg backslash before any funny
characters. Unfortunately backslash wouldn't be too nice from the
viewpoint of Windows users.
* Make pg_basebackup check for and fail on symlinks containing characters
it can't handle. Pretty icky, though I suppose there's some argument
that things like newlines wouldn't be in any rational tablespace path.
But I doubt you can make that argument for spaces, quotes, or backslashes.
regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 12/14/2014 07:09 PM, Tom Lane wrote:
Amit Kapila <amit.kapila16@gmail.com> writes:
On Sat, Dec 13, 2014 at 10:48 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
1. The patch is completely naive about what might be in the symlink
path string; eg embedded spaces in the path would break it. On at
least some platforms, newlines could be in the path as well. I'm not
sure about how to guard against this while maintaining human readability
of the file.One way to deal with this could be to append a delimiter(which is not
allowed
in tablespace path like quote (\')) at the end of tablespace path while
writing the same to symlink label file and then use that as end marker while
reading it from file.What makes you think quote isn't allowed in tablespace paths? Even if we
were to disallow it at the SQL level, there'd be nothing stopping a DBA
from changing the path after the fact by redefining the symlink outside
SQL --- something I believe we specifically meant to allow, considering
we went to the trouble of getting rid of the pg_tablespace.spclocation
column.Pretty much the only character we can be entirely certain is not in a
symlink's value is \0. As Alvaro mentioned, using that in the file
is a possible alternative, although it could easily confuse some users
and/or text editors. The only other alternatives I can see are:* Go over to a byte-count-then-value format. Also possible, also rather
unfriendly from a user's standpoint.* Establish an escaping convention, eg backslash before any funny
characters. Unfortunately backslash wouldn't be too nice from the
viewpoint of Windows users.* Make pg_basebackup check for and fail on symlinks containing characters
it can't handle. Pretty icky, though I suppose there's some argument
that things like newlines wouldn't be in any rational tablespace path.
But I doubt you can make that argument for spaces, quotes, or backslashes.
Using an escaping convention makes by far the most sense to me. It's
what occurred to me earlier today even before I read the above. We could
adopt the URL convention of %xx for escapable characters - that would
avoid \ nastiness.
cheers
andrew
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Mon, Dec 15, 2014 at 5:39 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
Amit Kapila <amit.kapila16@gmail.com> writes:
On Sat, Dec 13, 2014 at 10:48 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
1. The patch is completely naive about what might be in the symlink
path string; eg embedded spaces in the path would break it. On at
least some platforms, newlines could be in the path as well. I'm not
sure about how to guard against this while maintaining human
readability
of the file.
One way to deal with this could be to append a delimiter(which is not
allowed
in tablespace path like quote (\')) at the end of tablespace path while
writing the same to symlink label file and then use that as end marker
while
reading it from file.
What makes you think quote isn't allowed in tablespace paths?
Below part of code makes me think that quote is not allowed.
Oid
CreateTableSpace(CreateTableSpaceStmt *stmt)
{
..
/* disallow quotes, else CREATE DATABASE would be at risk */
if (strchr(location, '\''))
ereport(ERROR,
(errcode(ERRCODE_INVALID_NAME),
errmsg("tablespace location cannot contain single quotes")));
}
Even if we
were to disallow it at the SQL level, there'd be nothing stopping a DBA
from changing the path after the fact by redefining the symlink outside
SQL --- something I believe we specifically meant to allow, considering
we went to the trouble of getting rid of the pg_tablespace.spclocation
column.Pretty much the only character we can be entirely certain is not in a
symlink's value is \0. As Alvaro mentioned, using that in the file
is a possible alternative, although it could easily confuse some users
and/or text editors. The only other alternatives I can see are:* Go over to a byte-count-then-value format. Also possible, also rather
unfriendly from a user's standpoint.* Establish an escaping convention, eg backslash before any funny
characters. Unfortunately backslash wouldn't be too nice from the
viewpoint of Windows users.* Make pg_basebackup check for and fail on symlinks containing characters
it can't handle. Pretty icky, though I suppose there's some argument
that things like newlines wouldn't be in any rational tablespace path.
Yeah, another thing is that during tablespace creation, we use below
code to form tablespace path which prompted me to ask question that
do we allow newline in create tablespace path.
create_tablespace_directories()
{
..
location_with_version_dir = psprintf("%s/%s", location,
TABLESPACE_VERSION_DIRECTORY);
..
}
Now if above code understand newline in path, then can't we make
some arrangement during file read?
But I doubt you can make that argument for spaces, quotes, or
backslashes.
If we disallow newline in symlink path via pg_basebackup path, then we
might be able to use 'Negated scanset' format specifier of fscanf
("%[^\n]s")
to handle other characters.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
Amit Kapila <amit.kapila16@gmail.com> writes:
On Mon, Dec 15, 2014 at 5:39 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
What makes you think quote isn't allowed in tablespace paths?
Below part of code makes me think that quote is not allowed.
Oid
CreateTableSpace(CreateTableSpaceStmt *stmt)
{
..
/* disallow quotes, else CREATE DATABASE would be at risk */
if (strchr(location, '\''))
ereport(ERROR,
(errcode(ERRCODE_INVALID_NAME),
errmsg("tablespace location cannot contain single quotes")));
}
Hm, I think that's left over from defending a *very* ancient version
of CREATE DATABASE. In any case, as I mentioned, any limitations
we might be putting on tablespace paths during SQL-level operation
are pretty much a dead letter.
regards, tom lane
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Sat, Dec 13, 2014 at 9:41 PM, Andrew Dunstan <andrew@dunslane.net> wrote:
On 11/20/2014 02:27 AM, Amit Kapila wrote:
On Wed, Nov 19, 2014 at 11:46 PM, Robert Haas <robertmhaas@gmail.com
<mailto:robertmhaas@gmail.com>> wrote:
On Tue, Nov 18, 2014 at 9:19 AM, Alvaro Herrera
<alvherre@2ndquadrant.com <mailto:alvherre@2ndquadrant.com>> wrote:Right, but they provide same functionality as symlinks and now we
are even planing to provide this feature for both linux and windows
as
both Tom and Robert seems to feel, it's better that way. Anyhow,
I think naming any entity generally differs based on individual's
perspective, so we can go with the name which appeals to more
people.
In case, nobody else has any preference, I will change it to what
both
of us can agree upon (either 'tablespace catalog',
'tablespace_info' ...).
Well, I have made my argument. Since you're the submitter, feel
free to
select what you think is the best name.
For what it's worth, I, too, dislike having symlink in the name.
Maybe "tablespace_map"?Sounds good to me as well.
To summarize the situation of this patch, I have received below comments
on which I am planning to work:1. Change the name of file containing tablespace path information.
2. Store tablespace name as well along with oid and path to make the
information Human readable.
3. Make the code generic (Remove #ifdef Win32 macro's and change
comments referring this functionality for windows and see if any more
changes are required to make it work on linux.)Now the part where I would like to receive feedback before revising the
patch is on the coding style. It seems to me from Tom's comments that
he is not happy with the code, now I am not sure which part of the patch
he thinks needs change. Tom if possible, could you be slightly more
specific about your concern w.r.t code?I have attached a rebased (on top of commit-8d7af8f) patch, just incase
some one wants to apply and check it.In view of the request above for comments from Tom, I have moved this
back to "Needs Review".
I am working on fixing the review comments, but I think I won't be
able to handle all as still there is discussion going on for some of
the comments, but I am intended to work on it for CF starting today.
So I have moved this patch to CF (2014-12).
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
On Sun, Dec 14, 2014 at 11:54 AM, Amit Kapila <amit.kapila16@gmail.com>
wrote:
On Sat, Dec 13, 2014 at 10:48 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
Andrew Dunstan <andrew@dunslane.net> writes:
On 11/20/2014 02:27 AM, Amit Kapila wrote:
Now the part where I would like to receive feedback before revising
the
patch is on the coding style. It seems to me from Tom's comments
that
he is not happy with the code, now I am not sure which part of the
patch
he thinks needs change. Tom if possible, could you be slightly more
specific about your concern w.r.t code?In view of the request above for comments from Tom, I have moved this
back to "Needs Review".
I have updated the patch and handled the review comments as below:
1. Change the name of file containing tablespace path information to
tablespace_map. I have changed the reference to file name in whole patch.
2. I have not added tablespace name in tablespace_map file as I am not
sure how important it is for user readability aspect and what format should
we use and another point is not many people have asked for it. However
if you feel it is important to have the same for this patch, then I will
propose some new format.
3. Made the code generic (for all platforms) such that a tablespace_map
file will be created to restore tablespaces for base backup.
Sorry, I was not paying very close attention to this thread and missed
the request for comments. A few such:1. The patch is completely naive about what might be in the symlink
path string; eg embedded spaces in the path would break it. On at
least some platforms, newlines could be in the path as well. I'm not
sure about how to guard against this while maintaining human readability
of the file.I will look into this and see what best can be done.
I have chosen #3 (Make pg_basebackup check for and fail on symlinks
containing characters (currently newline only) it can't handle) from the
different options suggested by Tom. This keeps the format same as
previous and human readable.
2. There seems to be more going on here than what is advertised, eg
why do we need to add an option to the BASE_BACKUP commandThis is to ensure that symlink file is generated only for tar format;
server is not aware of whether the backup is generated for plain format
or tar format. We don't want to do it for plain format as for that
client (pg_basebackup) can update the symlinks via -T option and backing
up symlink file during that operation can lead to spurious symlinks after
archive recovery. I have given the reason why we want to accomplish it
only for tar format in my initial mail.(and if
we do need it, doesn't it need to be documented in protocol.sgml)?I shall take care of it in next version of patch.
Added the description in protocol.sgml
And why is the RelationCacheInitFileRemove call relocated?
Because it assumes that tablespace directory pg_tblspc is in
place and it tries to remove the files by reading pg_tblspc
directory as well. Now as we setup the symlinks in pg_tblspc
after reading symlink file, so we should remove relcache init
file once the symlinks are setup in pg_tblspc directory.3. Not terribly happy with the changes made to the API of
do_pg_start_backup, eg having to be able to parse "DIR *" in its
arguments seems like a lot of #include creep. xlog.h is pretty
central so I'm not happy about plastering more #includes in it.The reason of adding new include in xlog.c is for use of tablespaceinfo
structure which I have now kept in basebackup.h.The reason why I have done this way is because do_pg_start_backup has
some functionality common to both non-exclusive and exclusive backups and
for this feature we have to do some work common for both non-exclusive
and exclusive backup which is to generate the symlink label file for
non-exclusive backups and write the symlink label file for exclusive
backups using that information. Doing this way seems right to me
as we are already doing something like that for backup label file.Another possible way could be to write a new function in xlogutils.c
to do the symlink label stuff and then use the same in xlog.c, I think
that way we could avoid any new include in xlog.c. However for this we
need to have include in xlogutils.c to make it aware of tablespaceinfo
structure.
Are you okay with the alternative I have suggested to avoid the
new include in xlog.c or do you feel the alternative will make the
code worse than the current patch?
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
Attachments:
extend_basebackup_to_include_symlink_v5.patchapplication/octet-stream; name=extend_basebackup_to_include_symlink_v5.patchDownload
diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml
index 07ca0dc..8741fae 100644
--- a/doc/src/sgml/backup.sgml
+++ b/doc/src/sgml/backup.sgml
@@ -834,8 +834,11 @@ SELECT pg_start_backup('label');
<function>pg_start_backup</> creates a <firstterm>backup label</> file,
called <filename>backup_label</>, in the cluster directory with
information about your backup, including the start time and label
- string. The file is critical to the integrity of the backup, should
- you need to restore from it.
+ string. This function also creates a <firstterm>tablespace map</> file,
+ called <filename>tablespace_map</>, in the cluster directory with
+ information about tablespace symbolic links in <filename>pg_tblspc/</>
+ even if one such link is present. Both the files are critical to the
+ integrity of the backup, should you need to restore from it.
</para>
<para>
@@ -963,17 +966,19 @@ SELECT pg_stop_backup();
<para>
It's also worth noting that the <function>pg_start_backup</> function
- makes a file named <filename>backup_label</> in the database cluster
- directory, which is removed by <function>pg_stop_backup</>.
- This file will of course be archived as a part of your backup dump file.
- The backup label file includes the label string you gave to
- <function>pg_start_backup</>, as well as the time at which
- <function>pg_start_backup</> was run, and the name of the starting WAL
- file. In case of confusion it is therefore possible to look inside a
- backup dump file and determine exactly which backup session the dump file
- came from. However, this file is not merely for your information; its
- presence and contents are critical to the proper operation of the system's
- recovery process.
+ make the files named <filename>backup_label</> and
+ <filename>tablesapce_map</> in the database cluster directory,
+ which are removed by <function>pg_stop_backup</>. These files will of
+ course be archived as a part of your backup dump file. The backup label
+ file includes the label string you gave to <function>pg_start_backup</>,
+ as well as the time at which <function>pg_start_backup</> was run, and
+ the name of the starting WAL file. In case of confusion it is therefore
+ possible to look inside a backup dump file and determine exactly which
+ backup session the dump file came from. The tablespace map file includes
+ the symbolic link name as it exists in the directory
+ <filename>pg_tblspc/</> and the full path of symolic link. These files
+ are not merely for your information; their presence and contents are critical
+ to the proper operation of the system's recovery process.
</para>
<para>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index ef69b94..ee0a417 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -16402,11 +16402,11 @@ SELECT set_config('log_statement_stats', 'off', false);
<function>pg_start_backup</> accepts an
arbitrary user-defined label for the backup. (Typically this would be
the name under which the backup dump file will be stored.) The function
- writes a backup label file (<filename>backup_label</>) into the
- database cluster's data directory, performs a checkpoint,
- and then returns the backup's starting transaction log location as text.
- The user can ignore this result value, but it is
- provided in case it is useful.
+ writes a backup label file (<filename>backup_label</>) and tablespace map
+ file (<filename>tablespace_map</>) into the database cluster's data
+ directory, performs a checkpoint, and then returns the backup's starting
+ transaction log location as text. The user can ignore this result value,
+ but it is provided in case it is useful.
<programlisting>
postgres=# select pg_start_backup('label_goes_here');
pg_start_backup
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 705bddb..2b59324 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1882,8 +1882,8 @@ The commands accepted in walsender mode are:
</varlistentry>
<varlistentry>
- <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal> <replaceable>rate</replaceable>]
- <indexterm><primary>BASE_BACKUP</primary></indexterm>
+ <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal> <replaceable>rate</replaceable>] [<literal>TAR</literal>]
+ <indexterm><primary>BASE_BACKUP</primary></indexterm>
</term>
<listitem>
<para>
@@ -1968,6 +1968,19 @@ The commands accepted in walsender mode are:
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><literal>TAR</literal></term>
+ <listitem>
+ <para>
+ Include the information about symbolic links present in directory
+ <filename>pg_tblspc</filename> in a file named
+ <filename>tablespace_map</filename>. The tablespace map file includes
+ the symbolic link name as it exists in the directory
+ <filename>pg_tblspc/</> and the full path of symolic link.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</para>
<para>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 0f09add..0fc7311 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -42,6 +42,7 @@
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
+#include "replication/basebackup.h"
#include "replication/logical.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
@@ -814,6 +815,8 @@ static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired, bool *backupFromStandby);
+static bool read_tablespace_map(List **tablespaces);
+
static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method);
@@ -5596,6 +5599,7 @@ StartupXLOG(void)
bool wasShutdown;
bool reachedStopPoint = false;
bool haveBackupLabel = false;
+ bool haveTblspcMap = false;
XLogRecPtr RecPtr,
checkPointLoc,
EndOfLog;
@@ -5670,16 +5674,6 @@ StartupXLOG(void)
ValidateXLOGDirectoryStructure();
/*
- * Clear out any old relcache cache files. This is *necessary* if we do
- * any WAL replay, since that would probably result in the cache files
- * being out of sync with database reality. In theory we could leave them
- * in place if the database had been cleanly shut down, but it seems
- * safest to just remove them always and let them be rebuilt during the
- * first backend startup.
- */
- RelationCacheInitFileRemove();
-
- /*
* Initialize on the assumption we want to recover to the latest timeline
* that's active according to pg_control.
*/
@@ -5748,6 +5742,8 @@ StartupXLOG(void)
if (read_backup_label(&checkPointLoc, &backupEndRequired,
&backupFromStandby))
{
+ List *tablespaces = NIL;
+
/*
* Archive recovery was requested, and thanks to the backup label
* file, we know how far we need to replay to reach consistency. Enter
@@ -5792,6 +5788,59 @@ StartupXLOG(void)
errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
wasShutdown = false; /* keep compiler quiet */
}
+
+ /* read the tablespace_map file if present and create symlinks. */
+ if (read_tablespace_map(&tablespaces))
+ {
+ ListCell *lc;
+ struct stat st;
+
+ foreach(lc, tablespaces)
+ {
+ tablespaceinfo *ti = lfirst(lc);
+ char *linkloc;
+
+ linkloc = psprintf("pg_tblspc/%s", ti->oid);
+
+ /*
+ * Remove the existing symlink if any and Create the symlink
+ * under PGDATA. We need to use rmtree instead of rmdir as
+ * the link location might contain directories/files corresponding
+ * to actual path, some tar utilities does that way while extracting
+ * symlinks.
+ */
+ if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode))
+ {
+ if (!rmtree(linkloc,true))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove directory \"%s\": %m",
+ linkloc)));
+ }
+ else
+ {
+ if (unlink(linkloc) < 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove symbolic link \"%s\": %m",
+ linkloc)));
+ }
+
+ if (symlink(ti->path, linkloc) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create symbolic link \"%s\": %m",
+ linkloc)));
+
+ pfree(ti->oid);
+ pfree(ti->path);
+ pfree(ti);
+ }
+
+ /* set flag to delete it later */
+ haveTblspcMap = true;
+ }
+
/* set flag to delete it later */
haveBackupLabel = true;
}
@@ -5866,6 +5915,20 @@ StartupXLOG(void)
}
/*
+ * Clear out any old relcache cache files. This is *necessary* if we do
+ * any WAL replay, since that would probably result in the cache files
+ * being out of sync with database reality. In theory we could leave them
+ * in place if the database had been cleanly shut down, but it seems
+ * safest to just remove them always and let them be rebuilt during the
+ * first backend startup. These files needs to be removed from all
+ * directories including pg_tblspc, however the symlinks are created
+ * only after reading tablesapce_map file in case of archive recovery
+ * from backup, so needs to clear old relcache files here after creating
+ * symlinks.
+ */
+ RelationCacheInitFileRemove();
+
+ /*
* If the location of the checkpoint record is not on the expected
* timeline in the history of the requested timeline, we cannot proceed:
* the backup is not part of the history of the requested timeline.
@@ -6122,6 +6185,23 @@ StartupXLOG(void)
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+ /*
+ * If there was a tablespace_map file, it's done its job and the
+ * symlinks have been created. We must get rid of the map file
+ * so that if we crash during recovery, we don't create symlinks
+ * again. It seems prudent though to just rename the file out of
+ * the way rather than delete it completely.
+ */
+ if (haveTblspcMap)
+ {
+ unlink(TABLESPACE_MAP_OLD);
+ if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not rename file \"%s\" to \"%s\": %m",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
+
/* Check that the GUCs used to generate the WAL allow recovery */
CheckRequiredParameterValues();
@@ -9179,16 +9259,22 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*
* There are two kind of backups: exclusive and non-exclusive. An exclusive
* backup is started with pg_start_backup(), and there can be only one active
- * at a time. The backup label file of an exclusive backup is written to
- * $PGDATA/backup_label, and it is removed by pg_stop_backup().
+ * at a time. The backup and tablespace map files of an exclusive backup are
+ * written to $PGDATA/backup_label and $PGDATA/tablespace_map, and they are
+ * removed by pg_stop_backup().
*
* A non-exclusive backup is used for the streaming base backups (see
* src/backend/replication/basebackup.c). The difference to exclusive backups
- * is that the backup label file is not written to disk. Instead, its would-be
- * contents are returned in *labelfile, and the caller is responsible for
- * including it in the backup archive as 'backup_label'. There can be many
- * non-exclusive backups active at the same time, and they don't conflict
- * with an exclusive backup either.
+ * is that the backup label and tablespace map files are not written to disk.
+ * Instead, there would-be contents are returned in *labelfile and *tblspcmapfile,
+ * and the caller is responsible for including them in the backup archive as
+ * 'backup_label' and 'tablespace_map'. There can be many non-exclusive backups
+ * active at the same time, and they don't conflict with an exclusive backup
+ * either.
+ *
+ * tblspcmapfile is required mainly for tar format in windows as native windows
+ * utilities are not able to create symlinks while extracting files from tar.
+ * However for consistency, the same is used for all platforms.
*
* Returns the minimum WAL position that must be present to restore from this
* backup, and the corresponding timeline ID in *starttli_p.
@@ -9201,7 +9287,8 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*/
XLogRecPtr
do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
- char **labelfile)
+ char **labelfile, DIR *tblspcdir, List **tablespaces,
+ char **tblspcmapfile, bool infotbssize)
{
bool exclusive = (labelfile == NULL);
bool backup_started_in_recovery = false;
@@ -9215,6 +9302,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
struct stat stat_buf;
FILE *fp;
StringInfoData labelfbuf;
+ StringInfoData tblspc_mapfbuf;
backup_started_in_recovery = RecoveryInProgress();
@@ -9286,6 +9374,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
{
bool gotUniqueStartpoint = false;
+ struct dirent *de;
+ tablespaceinfo *ti;
+ int datadirpathlen;
/*
* Force an XLOG file switch before the checkpoint, to ensure that the
@@ -9406,6 +9497,93 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
XLogFileName(xlogfilename, ThisTimeLineID, _logSegNo);
/*
+ * Construct tablespace_map file
+ */
+ initStringInfo(&tblspc_mapfbuf);
+
+ datadirpathlen = strlen(DataDir);
+
+ /* Collect information about all tablespaces */
+ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+ {
+ char fullpath[MAXPGPATH];
+ char linkpath[MAXPGPATH];
+ char *relpath = NULL;
+ int rllen;
+
+ /* Skip special stuff */
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+#if defined(HAVE_READLINK) || defined(WIN32)
+ rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+ if (rllen < 0)
+ {
+ ereport(WARNING,
+ (errmsg("could not read symbolic link \"%s\": %m",
+ fullpath)));
+ continue;
+ }
+ else if (rllen >= sizeof(linkpath))
+ {
+ ereport(WARNING,
+ (errmsg("symbolic link \"%s\" target is too long",
+ fullpath)));
+ continue;
+ }
+ linkpath[rllen] = '\0';
+
+ /*
+ * skip the link path containing newline as we don't support the
+ * same in tablespace_map file. If we want to sacrifice the human
+ * readability aspect of tablespace_map file, then we can support
+ * paths containing newline in tablespace_map file, however any
+ * rational tablespace path wouldn't contain things like newline.
+ */
+ if (strchr(linkpath, '\n') || strchr(linkpath, '\r'))
+ {
+ ereport(WARNING,
+ (errmsg("skipping symbolic link \"%s\" containing newline",
+ linkpath)));
+ continue;
+ }
+
+ /*
+ * Relpath holds the relative path of the tablespace directory
+ * when it's located within PGDATA, or NULL if it's located
+ * elsewhere.
+ */
+ if (rllen > datadirpathlen &&
+ strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+ IS_DIR_SEP(linkpath[datadirpathlen]))
+ relpath = linkpath + datadirpathlen + 1;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(de->d_name);
+ ti->path = pstrdup(linkpath);
+ ti->rpath = relpath ? pstrdup(relpath) : NULL;
+ ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+
+ if(tablespaces)
+ *tablespaces = lappend(*tablespaces, ti);
+
+ appendStringInfo(&tblspc_mapfbuf, "%s %s\n", ti->oid, ti->path);
+#else
+
+ /*
+ * If the platform does not have symbolic links, it should not be
+ * possible to have tablespaces - clearly somebody else created
+ * them. Warn about it and ignore.
+ */
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+#endif
+ }
+
+ /*
* Construct backup label file
*/
initStringInfo(&labelfbuf);
@@ -9468,9 +9646,51 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
errmsg("could not write file \"%s\": %m",
BACKUP_LABEL_FILE)));
pfree(labelfbuf.data);
+
+ /* Write backup tablespace_map file. */
+ if (tblspc_mapfbuf.len > 0)
+ {
+ if (stat(TABLESPACE_MAP, &stat_buf) != 0)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ TABLESPACE_MAP)));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("a backup is already in progress"),
+ errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
+ TABLESPACE_MAP)));
+
+ fp = AllocateFile(TABLESPACE_MAP, "w");
+
+ if (!fp)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m",
+ TABLESPACE_MAP)));
+ if (fwrite(tblspc_mapfbuf.data, tblspc_mapfbuf.len, 1, fp) != 1 ||
+ fflush(fp) != 0 ||
+ pg_fsync(fileno(fp)) != 0 ||
+ ferror(fp) ||
+ FreeFile(fp))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ TABLESPACE_MAP)));
+ }
+
+ pfree(tblspc_mapfbuf.data);
}
else
+ {
*labelfile = labelfbuf.data;
+ if (tblspc_mapfbuf.len > 0)
+ *tblspcmapfile = tblspc_mapfbuf.data;
+ }
}
PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
@@ -9641,6 +9861,12 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
BACKUP_LABEL_FILE)));
+
+ /*
+ * Remove tablespace_map file if present, it is created
+ * only if there are tablespaces.
+ */
+ unlink(TABLESPACE_MAP);
}
/*
@@ -10041,6 +10267,73 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
}
/*
+ * read_tablespace_map: check to see if a tablespace_map file is present
+ *
+ * If we see a tablespace_map file during recovery, we assume that we are
+ * recovering from a backup dump file, and we therefore need to create symlinks
+ * as per the information present in tablespace_map file.
+ *
+ * Returns TRUE if a tablespace_map file was found (and fills the link
+ * information for all the tablespace links present in file); returns FALSE
+ * if not.
+ */
+static bool
+read_tablespace_map(List **tablespaces)
+{
+ tablespaceinfo *ti;
+ FILE *lfp;
+ char ch;
+ char tbsoid[MAXPGPATH];
+ char tbslinkpath[MAXPGPATH];
+
+ /*
+ * See if tablespace_map file is present
+ */
+ lfp = AllocateFile(TABLESPACE_MAP, "r");
+ if (!lfp)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ TABLESPACE_MAP)));
+ return false; /* it's not there, all is fine */
+ }
+
+ /*
+ * Read and parse the link name and path lines from tablespace_map file
+ * (this code is pretty crude, but we are not expecting any variability
+ * in the file format).
+ */
+ while (!feof(lfp))
+ {
+ ti = palloc(sizeof(tablespaceinfo));
+ if (fscanf(lfp, "%s %[^\n]%c", tbsoid, tbslinkpath, &ch) != 3 || ch != '\n')
+ {
+ if (feof(lfp))
+ break;
+ else
+ ereport(FATAL,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
+ }
+
+ ti->oid = pstrdup(tbsoid);
+ ti->path = pstrdup(tbslinkpath);
+
+ *tablespaces = lappend(*tablespaces, ti);
+ }
+
+ if (ferror(lfp) || FreeFile(lfp))
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ TABLESPACE_MAP)));
+
+ return true;
+}
+
+/*
* Error context callback for errors occurring during rm_redo().
*/
static void
@@ -10071,11 +10364,14 @@ BackupInProgress(void)
}
/*
- * CancelBackup: rename the "backup_label" file to cancel backup mode
+ * CancelBackup: rename the "backup_label"/"tablespace_map" file to cancel
+ * backup mode
*
* If the "backup_label" file exists, it will be renamed to "backup_label.old".
- * Note that this will render an online backup in progress useless.
- * To correctly finish an online backup, pg_stop_backup must be called.
+ * Similarly, if the "tablespace_map" file exists, it will be renamed to
+ * "tablespace_map.old". Note that this will render an online backup in progress
+ * useless. To correctly finish an online backup, pg_stop_backup must be
+ * called.
*/
void
CancelBackup(void)
@@ -10104,6 +10400,29 @@ CancelBackup(void)
errdetail("Could not rename \"%s\" to \"%s\": %m.",
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+
+ /* if the file is not there, return */
+ if (stat(TABLESPACE_MAP, &stat_buf) < 0)
+ return;
+
+ /* remove leftover file from previously canceled backup if it exists */
+ unlink(TABLESPACE_MAP_OLD);
+
+ if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) == 0)
+ {
+ ereport(LOG,
+ (errmsg("online backup mode canceled"),
+ errdetail("\"%s\" was renamed to \"%s\".",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
+ else
+ {
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("online backup mode was not canceled"),
+ errdetail("Could not rename \"%s\" to \"%s\": %m.",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
}
/*
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 133143d..d8864d9 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -51,6 +51,7 @@ pg_start_backup(PG_FUNCTION_ARGS)
bool fast = PG_GETARG_BOOL(1);
char *backupidstr;
XLogRecPtr startpoint;
+ DIR *dir;
backupidstr = text_to_cstring(backupid);
@@ -59,7 +60,16 @@ pg_start_backup(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser or replication role to run a backup")));
- startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL);
+ /* Make sure we can open the directory with tablespaces in it */
+ dir = AllocateDir("pg_tblspc");
+ if (!dir)
+ ereport(ERROR,
+ (errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
+
+ startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL,
+ dir, NULL, NULL, false);
+
+ FreeDir(dir);
PG_RETURN_LSN(startpoint);
}
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index fbcecbb..7069eac 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -46,11 +46,12 @@ typedef struct
bool nowait;
bool includewal;
uint32 maxrate;
+ bool sendtblspcmapfile;
} basebackup_options;
-static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces);
-static int64 sendTablespace(char *path, bool sizeonly);
+static int64 sendDir(char *path, int basepathlen, bool sizeonly,
+ List *tablespaces, bool sendtblspcmapinfo);
static bool sendFile(char *readfilename, char *tarfilename,
struct stat * statbuf, bool missing_ok);
static void sendFileWithContent(const char *filename, const char *content);
@@ -93,15 +94,6 @@ static int64 elapsed_min_unit;
/* The last check of the transfer rate. */
static int64 throttled_last;
-typedef struct
-{
- char *oid;
- char *path;
- char *rpath; /* relative path within PGDATA, or NULL */
- int64 size;
-} tablespaceinfo;
-
-
/*
* Called when ERROR or FATAL happens in perform_base_backup() after
* we have started the backup - make sure we end it!
@@ -126,14 +118,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
XLogRecPtr endptr;
TimeLineID endtli;
char *labelfile;
+ char *tblspc_map_file = NULL;
int datadirpathlen;
+ List *tablespaces = NIL;
datadirpathlen = strlen(DataDir);
backup_started_in_recovery = RecoveryInProgress();
startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
- &labelfile);
+ &labelfile, tblspcdir, &tablespaces,
+ &tblspc_map_file, opt->progress);
/*
* Once do_pg_start_backup has been called, ensure that any failure causes
* us to abort the backup so we don't "leak" a backup counter. For this reason,
@@ -143,9 +138,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
{
- List *tablespaces = NIL;
ListCell *lc;
- struct dirent *de;
tablespaceinfo *ti;
SendXlogRecPtrResult(startptr, starttli);
@@ -162,70 +155,9 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
else
statrelpath = pgstat_stat_directory;
- /* Collect information about all tablespaces */
- while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
- {
- char fullpath[MAXPGPATH];
- char linkpath[MAXPGPATH];
- char *relpath = NULL;
- int rllen;
-
- /* Skip special stuff */
- if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
- continue;
-
- snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-#if defined(HAVE_READLINK) || defined(WIN32)
- rllen = readlink(fullpath, linkpath, sizeof(linkpath));
- if (rllen < 0)
- {
- ereport(WARNING,
- (errmsg("could not read symbolic link \"%s\": %m",
- fullpath)));
- continue;
- }
- else if (rllen >= sizeof(linkpath))
- {
- ereport(WARNING,
- (errmsg("symbolic link \"%s\" target is too long",
- fullpath)));
- continue;
- }
- linkpath[rllen] = '\0';
-
- /*
- * Relpath holds the relative path of the tablespace directory
- * when it's located within PGDATA, or NULL if it's located
- * elsewhere.
- */
- if (rllen > datadirpathlen &&
- strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
- IS_DIR_SEP(linkpath[datadirpathlen]))
- relpath = linkpath + datadirpathlen + 1;
-
- ti = palloc(sizeof(tablespaceinfo));
- ti->oid = pstrdup(de->d_name);
- ti->path = pstrdup(linkpath);
- ti->rpath = relpath ? pstrdup(relpath) : NULL;
- ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
- tablespaces = lappend(tablespaces, ti);
-#else
-
- /*
- * If the platform does not have symbolic links, it should not be
- * possible to have tablespaces - clearly somebody else created
- * them. Warn about it and ignore.
- */
- ereport(WARNING,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("tablespaces are not supported on this platform")));
-#endif
- }
-
/* Add a node for the base directory at the end */
ti = palloc0(sizeof(tablespaceinfo));
- ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
+ ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
tablespaces = lappend(tablespaces, ti);
/* Send tablespace header */
@@ -274,8 +206,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* In the main tar, include the backup_label first... */
sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
- /* ... then the bulk of the files ... */
- sendDir(".", 1, false, tablespaces);
+ /*
+ * Send tablespace_map file if required and then the bulk of
+ * the files.
+ */
+ if (tblspc_map_file && opt->sendtblspcmapfile)
+ {
+ sendFileWithContent(TABLESPACE_MAP, tblspc_map_file);
+ sendDir(".", 1, false, tablespaces, false);
+ }
+ else
+ sendDir(".", 1, false, tablespaces, true);
/* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -555,6 +496,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_nowait = false;
bool o_wal = false;
bool o_maxrate = false;
+ bool o_tar = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -625,6 +567,20 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->maxrate = (uint32) maxrate;
o_maxrate = true;
}
+ else if (strcmp(defel->defname, "tar") == 0)
+ {
+ /*
+ * tablespcmapfile is required only for tar format mainly in
+ * windows as native windows utilites are not able create symlinks
+ * while extracting files from tar.
+ */
+ if (o_tar)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ opt->sendtblspcmapfile = true;
+ o_tar = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -853,7 +809,7 @@ sendFileWithContent(const char *filename, const char *content)
*
* Only used to send auxiliary tablespaces, not PGDATA.
*/
-static int64
+int64
sendTablespace(char *path, bool sizeonly)
{
int64 size;
@@ -887,7 +843,7 @@ sendTablespace(char *path, bool sizeonly)
size = 512; /* Size of the header just added */
/* Send all the files in the tablespace version directory */
- size += sendDir(pathbuf, strlen(path), sizeonly, NIL);
+ size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
return size;
}
@@ -899,9 +855,14 @@ sendTablespace(char *path, bool sizeonly)
*
* Omit any directory in the tablespaces list, to avoid backing up
* tablespaces twice when they were created inside PGDATA.
+ *
+ * If sendtblspcmapinfo is false, we don't need to include symlink
+ * information inside tar file as it will be sent separately in
+ * tablespace_map file.
*/
static int64
-sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
+sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
+ bool sendtblspcmapinfo)
{
DIR *dir;
struct dirent *de;
@@ -929,13 +890,17 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
continue;
/*
- * If there's a backup_label file, it belongs to a backup started by
- * the user with pg_start_backup(). It is *not* correct for this
- * backup, our backup_label is injected into the tar separately.
+ * If there's a backup_label or tablespace_map file, it belongs to a
+ * backup started by the user with pg_start_backup(). It is *not*
+ * correct for this backup, our backup_label/tablespace_map is injected
+ * into the tar separately.
*/
if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
continue;
+ if (strcmp(de->d_name, TABLESPACE_MAP) == 0)
+ continue;
+
/*
* Check if the postmaster has signaled us to exit, and abort with an
* error in that case. The error handler further up will call
@@ -1099,8 +1064,16 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
break;
}
}
+
+ /*
+ * skip sending directories inside pg_tblspc, unless tablespace_map
+ * is not sent.
+ */
+ if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspcmapinfo)
+ skip_this_dir = true;
+
if (!skip_this_dir)
- size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces);
+ size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspcmapinfo);
}
else if (S_ISREG(statbuf.st_mode))
{
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 154aaac..25c7311 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -71,6 +71,7 @@ Node *replication_parse_result;
%token K_NOWAIT
%token K_MAX_RATE
%token K_WAL
+%token K_TAR
%token K_TIMELINE
%token K_PHYSICAL
%token K_LOGICAL
@@ -119,7 +120,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d] [TAR]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -168,6 +169,11 @@ base_backup_opt:
$$ = makeDefElem("max_rate",
(Node *)makeInteger($2));
}
+ | K_TAR
+ {
+ $$ = makeDefElem("tar",
+ (Node *)makeInteger(TRUE));
+ }
;
create_replication_slot:
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index a257124..e373d51 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -88,6 +88,7 @@ NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
+TAR { return K_TAR; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
CREATE_REPLICATION_SLOT { return K_CREATE_REPLICATION_SLOT; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index e7c2939..0b0ebb6 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -1636,13 +1636,14 @@ BaseBackup(void)
maxrate_clause = psprintf("MAX_RATE %u", maxrate);
basebkp =
- psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
+ psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
includewal ? "NOWAIT" : "",
- maxrate_clause ? maxrate_clause : "");
+ maxrate_clause ? maxrate_clause : "",
+ format == 't' ? "TAR": "");
if (PQsendQuery(conn, basebkp) == 0)
{
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index d06fbc0..9699c00 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -17,6 +17,8 @@
#include "access/xlogreader.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
+#include "nodes/pg_list.h"
+#include "storage/fd.h"
/* Sync methods */
@@ -249,7 +251,8 @@ extern void SetWalWriterSleeping(bool sleeping);
* Starting/stopping a base backup
*/
extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
- TimeLineID *starttli_p, char **labelfile);
+ TimeLineID *starttli_p, char **labelfile, DIR *tblspcdir,
+ List **tablespaces, char **tblspcmapfile, bool infotbssize);
extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
TimeLineID *stoptli_p);
extern void do_pg_abort_backup(void);
@@ -258,4 +261,7 @@ extern void do_pg_abort_backup(void);
#define BACKUP_LABEL_FILE "backup_label"
#define BACKUP_LABEL_OLD "backup_label.old"
+#define TABLESPACE_MAP "tablespace_map"
+#define TABLESPACE_MAP_OLD "tablespace_map.old"
+
#endif /* XLOG_H */
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 988bce7..3540602 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -21,6 +21,16 @@
#define MAX_RATE_UPPER 1048576
+typedef struct
+{
+ char *oid;
+ char *path;
+ char *rpath; /* relative path within PGDATA, or NULL */
+ int64 size;
+} tablespaceinfo;
+
extern void SendBaseBackup(BaseBackupCmd *cmd);
+extern int64 sendTablespace(char *path, bool sizeonly);
+
#endif /* _BASEBACKUP_H */
On Tue, Dec 16, 2014 at 12:58 PM, Amit Kapila <amit.kapila16@gmail.com>
wrote:
On Sun, Dec 14, 2014 at 11:54 AM, Amit Kapila <amit.kapila16@gmail.com>
wrote:
On Sat, Dec 13, 2014 at 10:48 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
Sorry, I was not paying very close attention to this thread and missed
the request for comments. A few such:1. The patch is completely naive about what might be in the symlink
path string; eg embedded spaces in the path would break it. On at
least some platforms, newlines could be in the path as well. I'm not
sure about how to guard against this while maintaining human
readability
of the file.
I will look into this and see what best can be done.
I have chosen #3 (Make pg_basebackup check for and fail on symlinks
containing characters (currently newline only) it can't handle) from the
different options suggested by Tom. This keeps the format same as
previous and human readable.
Actually, here instead of an error a warning is issued and that particular
path (containing new line) will be skipped. This is similar to what
is already done for the cases when there is any problem in reading
link paths.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
On 12/16/2014 04:34 AM, Amit Kapila wrote:
On Tue, Dec 16, 2014 at 12:58 PM, Amit Kapila <amit.kapila16@gmail.com
<mailto:amit.kapila16@gmail.com>> wrote:On Sun, Dec 14, 2014 at 11:54 AM, Amit Kapila
<amit.kapila16@gmail.com <mailto:amit.kapila16@gmail.com>> wrote:
On Sat, Dec 13, 2014 at 10:48 PM, Tom Lane <tgl@sss.pgh.pa.us
<mailto:tgl@sss.pgh.pa.us>> wrote:
Sorry, I was not paying very close attention to this thread and
missed
the request for comments. A few such:
1. The patch is completely naive about what might be in the symlink
path string; eg embedded spaces in the path would break it. On at
least some platforms, newlines could be in the path as well.I'm not
sure about how to guard against this while maintaining human
readability
of the file.
I will look into this and see what best can be done.
I have chosen #3 (Make pg_basebackup check for and fail on symlinks
containing characters (currently newline only) it can't handle) from the
different options suggested by Tom. This keeps the format same as
previous and human readable.Actually, here instead of an error a warning is issued and that particular
path (containing new line) will be skipped. This is similar to what
is already done for the cases when there is any problem in reading
link paths.
I'm not clear why human readability is the major criterion here. As for
that, it will be quite difficult for a human to distinguish a name with
a space at the end from one without. I really think a simple encoding
scheme would be much the best. For normal cases it will preserve
readability completely, and for special cases it will preserve lack of
any ambiguity.
cheers
andrew
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 12/16/2014 06:30 PM, Andrew Dunstan wrote:
I'm not clear why human readability is the major criterion here. As for
that, it will be quite difficult for a human to distinguish a name with
a space at the end from one without. I really think a simple encoding
scheme would be much the best. For normal cases it will preserve
readability completely, and for special cases it will preserve lack of
any ambiguity.
Agreed. Besides, this:
16387 E:\\Program\ Files\\PostgreSQL\\tbs
is almost as human-readable as this:
16387 E:\Program Files\PostgreSQL\tbs
It's obvious how the escaping works, just by looking at the file.
- Heikki
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Tue, Dec 16, 2014 at 10:11 PM, Heikki Linnakangas <
hlinnakangas@vmware.com> wrote:
On 12/16/2014 06:30 PM, Andrew Dunstan wrote:
I'm not clear why human readability is the major criterion here. As for
that, it will be quite difficult for a human to distinguish a name with
a space at the end from one without. I really think a simple encoding
scheme would be much the best.
Yeah that could work, but we need the special encoding mainly for newline,
other's would work with current patch. However it might be worth to do
it for all kind of spaces. Currently it just reads the line upto newline
using
fscanf, but if we use special encoding, we might need to read the file
character by character and check for newline without backslash(or other
special encoding character); do you have something like that in mind?
Another thing is that we need to take care that we encode/decode link
path for tar format, as plain format might already be working.
Agreed. Besides, this:
16387 E:\\Program\ Files\\PostgreSQL\\tbs
is almost as human-readable as this:
16387 E:\Program Files\PostgreSQL\tbs
Yeah, that looks okay, but if there are multiple spaces like below,
then it might be slightly inconvenient.
16422 E:\WorkSpace\PostgreSQL\master\tbs 2 3
16422 E:\\WorkSpace\\PostgreSQL\\master\tbs\ \ \ \ \ \ \ \ \ \ \ \
\ \ 2\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 3
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
On Wed, Dec 17, 2014 at 11:32 AM, Amit Kapila <amit.kapila16@gmail.com>
wrote:
On Tue, Dec 16, 2014 at 10:11 PM, Heikki Linnakangas <
hlinnakangas@vmware.com> wrote:
On 12/16/2014 06:30 PM, Andrew Dunstan wrote:
I'm not clear why human readability is the major criterion here. As for
that, it will be quite difficult for a human to distinguish a name with
a space at the end from one without. I really think a simple encoding
scheme would be much the best.Yeah that could work, but we need the special encoding mainly for newline,
other's would work with current patch. However it might be worth to do
it for all kind of spaces. Currently it just reads the line upto newline
using
fscanf, but if we use special encoding, we might need to read the file
character by character and check for newline without backslash(or other
special encoding character); do you have something like that in mind?Another thing is that we need to take care that we encode/decode link
path for tar format, as plain format might already be working.
Attached patch handles the newline and other characters that are allowed
in tablespace path, as we need escape character only for newline, I have
added the same only for newline. So after patch, the tablespace_map
file will look like below for different kind of paths, as you can see for
tablespace id 16393 which contains newline, there is additional escape
sequence "\" before each newline where as other paths containing space
works as it is.
16391 /home/akapila/mywork/workspace_pg/master/tbs1
16393 /home/akapila/mywork/workspace_pg/master/tbs\
a\
b\
16392 /home/akapila/mywork/workspace_pg/master/tbs 2
So with this, I have handled all review comments raised for this patch
and it is ready for review, as the status of this patch is changed from
"Ready for Committer" to "Waiting on Author", so ideally I think it
should go back to "Ready for Committer", however as I am not very sure
about this point, I will change it to "Needs Review" (correct me if I am
wrong).
Summarization of latest changes:
1. Change file name from symlink_label to tablespace_map and changed
the same every where in comments and variable names.
2. This feature will be supportted for both windows and linux;
tablespace_map
file will be generated on both windows and linux to restore tablespace links
during archive recovery.
3. Handling for special characters in tablesapce path name.
4. Updation of docs.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
Attachments:
extend_basebackup_to_include_symlink_v6.patchapplication/octet-stream; name=extend_basebackup_to_include_symlink_v6.patchDownload
diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml
index 07ca0dc..8741fae 100644
--- a/doc/src/sgml/backup.sgml
+++ b/doc/src/sgml/backup.sgml
@@ -834,8 +834,11 @@ SELECT pg_start_backup('label');
<function>pg_start_backup</> creates a <firstterm>backup label</> file,
called <filename>backup_label</>, in the cluster directory with
information about your backup, including the start time and label
- string. The file is critical to the integrity of the backup, should
- you need to restore from it.
+ string. This function also creates a <firstterm>tablespace map</> file,
+ called <filename>tablespace_map</>, in the cluster directory with
+ information about tablespace symbolic links in <filename>pg_tblspc/</>
+ even if one such link is present. Both the files are critical to the
+ integrity of the backup, should you need to restore from it.
</para>
<para>
@@ -963,17 +966,19 @@ SELECT pg_stop_backup();
<para>
It's also worth noting that the <function>pg_start_backup</> function
- makes a file named <filename>backup_label</> in the database cluster
- directory, which is removed by <function>pg_stop_backup</>.
- This file will of course be archived as a part of your backup dump file.
- The backup label file includes the label string you gave to
- <function>pg_start_backup</>, as well as the time at which
- <function>pg_start_backup</> was run, and the name of the starting WAL
- file. In case of confusion it is therefore possible to look inside a
- backup dump file and determine exactly which backup session the dump file
- came from. However, this file is not merely for your information; its
- presence and contents are critical to the proper operation of the system's
- recovery process.
+ make the files named <filename>backup_label</> and
+ <filename>tablesapce_map</> in the database cluster directory,
+ which are removed by <function>pg_stop_backup</>. These files will of
+ course be archived as a part of your backup dump file. The backup label
+ file includes the label string you gave to <function>pg_start_backup</>,
+ as well as the time at which <function>pg_start_backup</> was run, and
+ the name of the starting WAL file. In case of confusion it is therefore
+ possible to look inside a backup dump file and determine exactly which
+ backup session the dump file came from. The tablespace map file includes
+ the symbolic link name as it exists in the directory
+ <filename>pg_tblspc/</> and the full path of symolic link. These files
+ are not merely for your information; their presence and contents are critical
+ to the proper operation of the system's recovery process.
</para>
<para>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index ef69b94..ee0a417 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -16402,11 +16402,11 @@ SELECT set_config('log_statement_stats', 'off', false);
<function>pg_start_backup</> accepts an
arbitrary user-defined label for the backup. (Typically this would be
the name under which the backup dump file will be stored.) The function
- writes a backup label file (<filename>backup_label</>) into the
- database cluster's data directory, performs a checkpoint,
- and then returns the backup's starting transaction log location as text.
- The user can ignore this result value, but it is
- provided in case it is useful.
+ writes a backup label file (<filename>backup_label</>) and tablespace map
+ file (<filename>tablespace_map</>) into the database cluster's data
+ directory, performs a checkpoint, and then returns the backup's starting
+ transaction log location as text. The user can ignore this result value,
+ but it is provided in case it is useful.
<programlisting>
postgres=# select pg_start_backup('label_goes_here');
pg_start_backup
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 705bddb..2b59324 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1882,8 +1882,8 @@ The commands accepted in walsender mode are:
</varlistentry>
<varlistentry>
- <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal> <replaceable>rate</replaceable>]
- <indexterm><primary>BASE_BACKUP</primary></indexterm>
+ <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal> <replaceable>rate</replaceable>] [<literal>TAR</literal>]
+ <indexterm><primary>BASE_BACKUP</primary></indexterm>
</term>
<listitem>
<para>
@@ -1968,6 +1968,19 @@ The commands accepted in walsender mode are:
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><literal>TAR</literal></term>
+ <listitem>
+ <para>
+ Include the information about symbolic links present in directory
+ <filename>pg_tblspc</filename> in a file named
+ <filename>tablespace_map</filename>. The tablespace map file includes
+ the symbolic link name as it exists in the directory
+ <filename>pg_tblspc/</> and the full path of symolic link.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</para>
<para>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 0f09add..5894d28 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -42,6 +42,7 @@
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
+#include "replication/basebackup.h"
#include "replication/logical.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
@@ -814,6 +815,8 @@ static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired, bool *backupFromStandby);
+static bool read_tablespace_map(List **tablespaces);
+
static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method);
@@ -5596,6 +5599,7 @@ StartupXLOG(void)
bool wasShutdown;
bool reachedStopPoint = false;
bool haveBackupLabel = false;
+ bool haveTblspcMap = false;
XLogRecPtr RecPtr,
checkPointLoc,
EndOfLog;
@@ -5670,16 +5674,6 @@ StartupXLOG(void)
ValidateXLOGDirectoryStructure();
/*
- * Clear out any old relcache cache files. This is *necessary* if we do
- * any WAL replay, since that would probably result in the cache files
- * being out of sync with database reality. In theory we could leave them
- * in place if the database had been cleanly shut down, but it seems
- * safest to just remove them always and let them be rebuilt during the
- * first backend startup.
- */
- RelationCacheInitFileRemove();
-
- /*
* Initialize on the assumption we want to recover to the latest timeline
* that's active according to pg_control.
*/
@@ -5748,6 +5742,8 @@ StartupXLOG(void)
if (read_backup_label(&checkPointLoc, &backupEndRequired,
&backupFromStandby))
{
+ List *tablespaces = NIL;
+
/*
* Archive recovery was requested, and thanks to the backup label
* file, we know how far we need to replay to reach consistency. Enter
@@ -5792,6 +5788,59 @@ StartupXLOG(void)
errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
wasShutdown = false; /* keep compiler quiet */
}
+
+ /* read the tablespace_map file if present and create symlinks. */
+ if (read_tablespace_map(&tablespaces))
+ {
+ ListCell *lc;
+ struct stat st;
+
+ foreach(lc, tablespaces)
+ {
+ tablespaceinfo *ti = lfirst(lc);
+ char *linkloc;
+
+ linkloc = psprintf("pg_tblspc/%s", ti->oid);
+
+ /*
+ * Remove the existing symlink if any and Create the symlink
+ * under PGDATA. We need to use rmtree instead of rmdir as
+ * the link location might contain directories/files corresponding
+ * to actual path, some tar utilities does that way while extracting
+ * symlinks.
+ */
+ if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode))
+ {
+ if (!rmtree(linkloc,true))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove directory \"%s\": %m",
+ linkloc)));
+ }
+ else
+ {
+ if (unlink(linkloc) < 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove symbolic link \"%s\": %m",
+ linkloc)));
+ }
+
+ if (symlink(ti->path, linkloc) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create symbolic link \"%s\": %m",
+ linkloc)));
+
+ pfree(ti->oid);
+ pfree(ti->path);
+ pfree(ti);
+ }
+
+ /* set flag to delete it later */
+ haveTblspcMap = true;
+ }
+
/* set flag to delete it later */
haveBackupLabel = true;
}
@@ -5866,6 +5915,20 @@ StartupXLOG(void)
}
/*
+ * Clear out any old relcache cache files. This is *necessary* if we do
+ * any WAL replay, since that would probably result in the cache files
+ * being out of sync with database reality. In theory we could leave them
+ * in place if the database had been cleanly shut down, but it seems
+ * safest to just remove them always and let them be rebuilt during the
+ * first backend startup. These files needs to be removed from all
+ * directories including pg_tblspc, however the symlinks are created
+ * only after reading tablesapce_map file in case of archive recovery
+ * from backup, so needs to clear old relcache files here after creating
+ * symlinks.
+ */
+ RelationCacheInitFileRemove();
+
+ /*
* If the location of the checkpoint record is not on the expected
* timeline in the history of the requested timeline, we cannot proceed:
* the backup is not part of the history of the requested timeline.
@@ -6122,6 +6185,23 @@ StartupXLOG(void)
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+ /*
+ * If there was a tablespace_map file, it's done its job and the
+ * symlinks have been created. We must get rid of the map file
+ * so that if we crash during recovery, we don't create symlinks
+ * again. It seems prudent though to just rename the file out of
+ * the way rather than delete it completely.
+ */
+ if (haveTblspcMap)
+ {
+ unlink(TABLESPACE_MAP_OLD);
+ if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not rename file \"%s\" to \"%s\": %m",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
+
/* Check that the GUCs used to generate the WAL allow recovery */
CheckRequiredParameterValues();
@@ -9179,16 +9259,27 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*
* There are two kind of backups: exclusive and non-exclusive. An exclusive
* backup is started with pg_start_backup(), and there can be only one active
- * at a time. The backup label file of an exclusive backup is written to
- * $PGDATA/backup_label, and it is removed by pg_stop_backup().
+ * at a time. The backup and tablespace map files of an exclusive backup are
+ * written to $PGDATA/backup_label and $PGDATA/tablespace_map, and they are
+ * removed by pg_stop_backup().
*
* A non-exclusive backup is used for the streaming base backups (see
* src/backend/replication/basebackup.c). The difference to exclusive backups
- * is that the backup label file is not written to disk. Instead, its would-be
- * contents are returned in *labelfile, and the caller is responsible for
- * including it in the backup archive as 'backup_label'. There can be many
- * non-exclusive backups active at the same time, and they don't conflict
- * with an exclusive backup either.
+ * is that the backup label and tablespace map files are not written to disk.
+ * Instead, there would-be contents are returned in *labelfile and *tblspcmapfile,
+ * and the caller is responsible for including them in the backup archive as
+ * 'backup_label' and 'tablespace_map'. There can be many non-exclusive backups
+ * active at the same time, and they don't conflict with an exclusive backup
+ * either.
+ *
+ * tblspcmapfile is required mainly for tar format in windows as native windows
+ * utilities are not able to create symlinks while extracting files from tar.
+ * However for consistency, the same is used for all platforms.
+ *
+ * needtblspcmapfile is true for the cases (exclusive backup and for
+ * non-exclusive backup only when tar format is used for taking backup)
+ * when backup needs to generate tablespace_map file, it is used to
+ * embed escape character before newline character in tablespace path.
*
* Returns the minimum WAL position that must be present to restore from this
* backup, and the corresponding timeline ID in *starttli_p.
@@ -9201,7 +9292,9 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*/
XLogRecPtr
do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
- char **labelfile)
+ char **labelfile, DIR *tblspcdir, List **tablespaces,
+ char **tblspcmapfile, bool infotbssize,
+ bool needtblspcmapfile)
{
bool exclusive = (labelfile == NULL);
bool backup_started_in_recovery = false;
@@ -9215,6 +9308,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
struct stat stat_buf;
FILE *fp;
StringInfoData labelfbuf;
+ StringInfoData tblspc_mapfbuf;
backup_started_in_recovery = RecoveryInProgress();
@@ -9286,6 +9380,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
{
bool gotUniqueStartpoint = false;
+ struct dirent *de;
+ tablespaceinfo *ti;
+ int datadirpathlen;
/*
* Force an XLOG file switch before the checkpoint, to ensure that the
@@ -9406,6 +9503,98 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
XLogFileName(xlogfilename, ThisTimeLineID, _logSegNo);
/*
+ * Construct tablespace_map file
+ */
+ initStringInfo(&tblspc_mapfbuf);
+
+ datadirpathlen = strlen(DataDir);
+
+ /* Collect information about all tablespaces */
+ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+ {
+ char fullpath[MAXPGPATH];
+ char linkpath[MAXPGPATH];
+ char *relpath = NULL;
+ int rllen;
+ StringInfoData buflinkpath;
+ char *s = linkpath;
+
+ /* Skip special stuff */
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+#if defined(HAVE_READLINK) || defined(WIN32)
+ rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+ if (rllen < 0)
+ {
+ ereport(WARNING,
+ (errmsg("could not read symbolic link \"%s\": %m",
+ fullpath)));
+ continue;
+ }
+ else if (rllen >= sizeof(linkpath))
+ {
+ ereport(WARNING,
+ (errmsg("symbolic link \"%s\" target is too long",
+ fullpath)));
+ continue;
+ }
+ linkpath[rllen] = '\0';
+
+ /*
+ * Add the escape character '\\' before newline in a string
+ * to ensure that we can distinguish between the newline in
+ * the tablespace path and end of line while reading
+ * tablespace_map file during archive recovery.
+ */
+ initStringInfo(&buflinkpath);
+
+ while (*s)
+ {
+ if ((*s == '\n' || *s == '\r') && needtblspcmapfile)
+ appendStringInfoChar(&buflinkpath, '\\');
+ appendStringInfoChar(&buflinkpath, *s++);
+ }
+
+
+ /*
+ * Relpath holds the relative path of the tablespace directory
+ * when it's located within PGDATA, or NULL if it's located
+ * elsewhere.
+ */
+ if (rllen > datadirpathlen &&
+ strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+ IS_DIR_SEP(linkpath[datadirpathlen]))
+ relpath = linkpath + datadirpathlen + 1;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(de->d_name);
+ ti->path = pstrdup(buflinkpath.data);
+ ti->rpath = relpath ? pstrdup(relpath) : NULL;
+ ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+
+ if(tablespaces)
+ *tablespaces = lappend(*tablespaces, ti);
+
+ appendStringInfo(&tblspc_mapfbuf, "%s %s\n", ti->oid, ti->path);
+
+ pfree(buflinkpath.data);
+#else
+
+ /*
+ * If the platform does not have symbolic links, it should not be
+ * possible to have tablespaces - clearly somebody else created
+ * them. Warn about it and ignore.
+ */
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+#endif
+ }
+
+ /*
* Construct backup label file
*/
initStringInfo(&labelfbuf);
@@ -9468,9 +9657,51 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
errmsg("could not write file \"%s\": %m",
BACKUP_LABEL_FILE)));
pfree(labelfbuf.data);
+
+ /* Write backup tablespace_map file. */
+ if (tblspc_mapfbuf.len > 0)
+ {
+ if (stat(TABLESPACE_MAP, &stat_buf) != 0)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ TABLESPACE_MAP)));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("a backup is already in progress"),
+ errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
+ TABLESPACE_MAP)));
+
+ fp = AllocateFile(TABLESPACE_MAP, "w");
+
+ if (!fp)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m",
+ TABLESPACE_MAP)));
+ if (fwrite(tblspc_mapfbuf.data, tblspc_mapfbuf.len, 1, fp) != 1 ||
+ fflush(fp) != 0 ||
+ pg_fsync(fileno(fp)) != 0 ||
+ ferror(fp) ||
+ FreeFile(fp))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ TABLESPACE_MAP)));
+ }
+
+ pfree(tblspc_mapfbuf.data);
}
else
+ {
*labelfile = labelfbuf.data;
+ if (tblspc_mapfbuf.len > 0)
+ *tblspcmapfile = tblspc_mapfbuf.data;
+ }
}
PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
@@ -9641,6 +9872,12 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
BACKUP_LABEL_FILE)));
+
+ /*
+ * Remove tablespace_map file if present, it is created
+ * only if there are tablespaces.
+ */
+ unlink(TABLESPACE_MAP);
}
/*
@@ -10041,6 +10278,86 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
}
/*
+ * read_tablespace_map: check to see if a tablespace_map file is present
+ *
+ * If we see a tablespace_map file during recovery, we assume that we are
+ * recovering from a backup dump file, and we therefore need to create symlinks
+ * as per the information present in tablespace_map file.
+ *
+ * Returns TRUE if a tablespace_map file was found (and fills the link
+ * information for all the tablespace links present in file); returns FALSE
+ * if not.
+ */
+static bool
+read_tablespace_map(List **tablespaces)
+{
+ tablespaceinfo *ti;
+ FILE *lfp;
+ char tbsoid[MAXPGPATH];
+ char *tbslinkpath;
+ char str[MAXPGPATH];
+ int ch, prev_ch = -1,
+ i = 0, n;
+
+ /*
+ * See if tablespace_map file is present
+ */
+ lfp = AllocateFile(TABLESPACE_MAP, "r");
+ if (!lfp)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ TABLESPACE_MAP)));
+ return false; /* it's not there, all is fine */
+ }
+
+ /*
+ * Read and parse the link name and path lines from tablespace_map file
+ * (this code is pretty crude, but we are not expecting any variability
+ * in the file format). While taking backup we embed escape character
+ * '\\' before newline in tablespace path, so that during reading of
+ * tablespace_map file, we could distinguish newline in tablespace path
+ * and end of line. Now while reading tablespace_map file, remove the
+ * escape character that has been added in tablespace path during backup.
+ */
+ while ((ch = fgetc(lfp)) != EOF)
+ {
+ if ((ch == '\n' || ch == '\r') && prev_ch != '\\')
+ {
+ str[i] = '\0';
+ if (sscanf(str, "%s %n", tbsoid, &n) != 1)
+ ereport(FATAL,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
+ tbslinkpath = str + n;
+ i = 0;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(tbsoid);
+ ti->path = pstrdup(tbslinkpath);
+
+ *tablespaces = lappend(*tablespaces, ti);
+ continue;
+ }
+ else if ((ch == '\n' || ch == '\r') && prev_ch == '\\')
+ str[i-1] = '\n';
+ else
+ str[i++] = ch;
+ prev_ch = ch;
+ }
+
+ if (ferror(lfp) || FreeFile(lfp))
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ TABLESPACE_MAP)));
+
+ return true;
+}
+
+/*
* Error context callback for errors occurring during rm_redo().
*/
static void
@@ -10071,11 +10388,14 @@ BackupInProgress(void)
}
/*
- * CancelBackup: rename the "backup_label" file to cancel backup mode
+ * CancelBackup: rename the "backup_label"/"tablespace_map" file to cancel
+ * backup mode
*
* If the "backup_label" file exists, it will be renamed to "backup_label.old".
- * Note that this will render an online backup in progress useless.
- * To correctly finish an online backup, pg_stop_backup must be called.
+ * Similarly, if the "tablespace_map" file exists, it will be renamed to
+ * "tablespace_map.old". Note that this will render an online backup in progress
+ * useless. To correctly finish an online backup, pg_stop_backup must be
+ * called.
*/
void
CancelBackup(void)
@@ -10104,6 +10424,29 @@ CancelBackup(void)
errdetail("Could not rename \"%s\" to \"%s\": %m.",
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+
+ /* if the file is not there, return */
+ if (stat(TABLESPACE_MAP, &stat_buf) < 0)
+ return;
+
+ /* remove leftover file from previously canceled backup if it exists */
+ unlink(TABLESPACE_MAP_OLD);
+
+ if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) == 0)
+ {
+ ereport(LOG,
+ (errmsg("online backup mode canceled"),
+ errdetail("\"%s\" was renamed to \"%s\".",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
+ else
+ {
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("online backup mode was not canceled"),
+ errdetail("Could not rename \"%s\" to \"%s\": %m.",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
}
/*
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 133143d..7cb819f 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -51,6 +51,7 @@ pg_start_backup(PG_FUNCTION_ARGS)
bool fast = PG_GETARG_BOOL(1);
char *backupidstr;
XLogRecPtr startpoint;
+ DIR *dir;
backupidstr = text_to_cstring(backupid);
@@ -59,7 +60,16 @@ pg_start_backup(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser or replication role to run a backup")));
- startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL);
+ /* Make sure we can open the directory with tablespaces in it */
+ dir = AllocateDir("pg_tblspc");
+ if (!dir)
+ ereport(ERROR,
+ (errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
+
+ startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL,
+ dir, NULL, NULL, false, true);
+
+ FreeDir(dir);
PG_RETURN_LSN(startpoint);
}
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index fbcecbb..a902203 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -46,11 +46,12 @@ typedef struct
bool nowait;
bool includewal;
uint32 maxrate;
+ bool sendtblspcmapfile;
} basebackup_options;
-static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces);
-static int64 sendTablespace(char *path, bool sizeonly);
+static int64 sendDir(char *path, int basepathlen, bool sizeonly,
+ List *tablespaces, bool sendtblspcmapinfo);
static bool sendFile(char *readfilename, char *tarfilename,
struct stat * statbuf, bool missing_ok);
static void sendFileWithContent(const char *filename, const char *content);
@@ -93,15 +94,6 @@ static int64 elapsed_min_unit;
/* The last check of the transfer rate. */
static int64 throttled_last;
-typedef struct
-{
- char *oid;
- char *path;
- char *rpath; /* relative path within PGDATA, or NULL */
- int64 size;
-} tablespaceinfo;
-
-
/*
* Called when ERROR or FATAL happens in perform_base_backup() after
* we have started the backup - make sure we end it!
@@ -126,14 +118,18 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
XLogRecPtr endptr;
TimeLineID endtli;
char *labelfile;
+ char *tblspc_map_file = NULL;
int datadirpathlen;
+ List *tablespaces = NIL;
datadirpathlen = strlen(DataDir);
backup_started_in_recovery = RecoveryInProgress();
startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
- &labelfile);
+ &labelfile, tblspcdir, &tablespaces,
+ &tblspc_map_file,
+ opt->progress, opt->sendtblspcmapfile);
/*
* Once do_pg_start_backup has been called, ensure that any failure causes
* us to abort the backup so we don't "leak" a backup counter. For this reason,
@@ -143,9 +139,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
{
- List *tablespaces = NIL;
ListCell *lc;
- struct dirent *de;
tablespaceinfo *ti;
SendXlogRecPtrResult(startptr, starttli);
@@ -162,70 +156,9 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
else
statrelpath = pgstat_stat_directory;
- /* Collect information about all tablespaces */
- while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
- {
- char fullpath[MAXPGPATH];
- char linkpath[MAXPGPATH];
- char *relpath = NULL;
- int rllen;
-
- /* Skip special stuff */
- if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
- continue;
-
- snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-#if defined(HAVE_READLINK) || defined(WIN32)
- rllen = readlink(fullpath, linkpath, sizeof(linkpath));
- if (rllen < 0)
- {
- ereport(WARNING,
- (errmsg("could not read symbolic link \"%s\": %m",
- fullpath)));
- continue;
- }
- else if (rllen >= sizeof(linkpath))
- {
- ereport(WARNING,
- (errmsg("symbolic link \"%s\" target is too long",
- fullpath)));
- continue;
- }
- linkpath[rllen] = '\0';
-
- /*
- * Relpath holds the relative path of the tablespace directory
- * when it's located within PGDATA, or NULL if it's located
- * elsewhere.
- */
- if (rllen > datadirpathlen &&
- strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
- IS_DIR_SEP(linkpath[datadirpathlen]))
- relpath = linkpath + datadirpathlen + 1;
-
- ti = palloc(sizeof(tablespaceinfo));
- ti->oid = pstrdup(de->d_name);
- ti->path = pstrdup(linkpath);
- ti->rpath = relpath ? pstrdup(relpath) : NULL;
- ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
- tablespaces = lappend(tablespaces, ti);
-#else
-
- /*
- * If the platform does not have symbolic links, it should not be
- * possible to have tablespaces - clearly somebody else created
- * them. Warn about it and ignore.
- */
- ereport(WARNING,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("tablespaces are not supported on this platform")));
-#endif
- }
-
/* Add a node for the base directory at the end */
ti = palloc0(sizeof(tablespaceinfo));
- ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
+ ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
tablespaces = lappend(tablespaces, ti);
/* Send tablespace header */
@@ -274,8 +207,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* In the main tar, include the backup_label first... */
sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
- /* ... then the bulk of the files ... */
- sendDir(".", 1, false, tablespaces);
+ /*
+ * Send tablespace_map file if required and then the bulk of
+ * the files.
+ */
+ if (tblspc_map_file && opt->sendtblspcmapfile)
+ {
+ sendFileWithContent(TABLESPACE_MAP, tblspc_map_file);
+ sendDir(".", 1, false, tablespaces, false);
+ }
+ else
+ sendDir(".", 1, false, tablespaces, true);
/* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -555,6 +497,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_nowait = false;
bool o_wal = false;
bool o_maxrate = false;
+ bool o_tar = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -625,6 +568,20 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->maxrate = (uint32) maxrate;
o_maxrate = true;
}
+ else if (strcmp(defel->defname, "tar") == 0)
+ {
+ /*
+ * tablespcmapfile is required only for tar format mainly in
+ * windows as native windows utilites are not able create symlinks
+ * while extracting files from tar.
+ */
+ if (o_tar)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ opt->sendtblspcmapfile = true;
+ o_tar = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -853,7 +810,7 @@ sendFileWithContent(const char *filename, const char *content)
*
* Only used to send auxiliary tablespaces, not PGDATA.
*/
-static int64
+int64
sendTablespace(char *path, bool sizeonly)
{
int64 size;
@@ -887,7 +844,7 @@ sendTablespace(char *path, bool sizeonly)
size = 512; /* Size of the header just added */
/* Send all the files in the tablespace version directory */
- size += sendDir(pathbuf, strlen(path), sizeonly, NIL);
+ size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
return size;
}
@@ -899,9 +856,14 @@ sendTablespace(char *path, bool sizeonly)
*
* Omit any directory in the tablespaces list, to avoid backing up
* tablespaces twice when they were created inside PGDATA.
+ *
+ * If sendtblspcmapinfo is false, we don't need to include symlink
+ * information inside tar file as it will be sent separately in
+ * tablespace_map file.
*/
static int64
-sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
+sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
+ bool sendtblspcmapinfo)
{
DIR *dir;
struct dirent *de;
@@ -929,13 +891,17 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
continue;
/*
- * If there's a backup_label file, it belongs to a backup started by
- * the user with pg_start_backup(). It is *not* correct for this
- * backup, our backup_label is injected into the tar separately.
+ * If there's a backup_label or tablespace_map file, it belongs to a
+ * backup started by the user with pg_start_backup(). It is *not*
+ * correct for this backup, our backup_label/tablespace_map is injected
+ * into the tar separately.
*/
if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
continue;
+ if (strcmp(de->d_name, TABLESPACE_MAP) == 0)
+ continue;
+
/*
* Check if the postmaster has signaled us to exit, and abort with an
* error in that case. The error handler further up will call
@@ -1099,8 +1065,16 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
break;
}
}
+
+ /*
+ * skip sending directories inside pg_tblspc, unless tablespace_map
+ * is not sent.
+ */
+ if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspcmapinfo)
+ skip_this_dir = true;
+
if (!skip_this_dir)
- size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces);
+ size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspcmapinfo);
}
else if (S_ISREG(statbuf.st_mode))
{
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 154aaac..25c7311 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -71,6 +71,7 @@ Node *replication_parse_result;
%token K_NOWAIT
%token K_MAX_RATE
%token K_WAL
+%token K_TAR
%token K_TIMELINE
%token K_PHYSICAL
%token K_LOGICAL
@@ -119,7 +120,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d] [TAR]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -168,6 +169,11 @@ base_backup_opt:
$$ = makeDefElem("max_rate",
(Node *)makeInteger($2));
}
+ | K_TAR
+ {
+ $$ = makeDefElem("tar",
+ (Node *)makeInteger(TRUE));
+ }
;
create_replication_slot:
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index a257124..e373d51 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -88,6 +88,7 @@ NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
+TAR { return K_TAR; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
CREATE_REPLICATION_SLOT { return K_CREATE_REPLICATION_SLOT; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 0470401..f175876 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -1636,13 +1636,14 @@ BaseBackup(void)
maxrate_clause = psprintf("MAX_RATE %u", maxrate);
basebkp =
- psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
+ psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
includewal ? "NOWAIT" : "",
- maxrate_clause ? maxrate_clause : "");
+ maxrate_clause ? maxrate_clause : "",
+ format == 't' ? "TAR": "");
if (PQsendQuery(conn, basebkp) == 0)
{
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index d06fbc0..af178d2 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -17,6 +17,8 @@
#include "access/xlogreader.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
+#include "nodes/pg_list.h"
+#include "storage/fd.h"
/* Sync methods */
@@ -249,7 +251,9 @@ extern void SetWalWriterSleeping(bool sleeping);
* Starting/stopping a base backup
*/
extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
- TimeLineID *starttli_p, char **labelfile);
+ TimeLineID *starttli_p, char **labelfile, DIR *tblspcdir,
+ List **tablespaces, char **tblspcmapfile, bool infotbssize,
+ bool needtblspcmapfile);
extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
TimeLineID *stoptli_p);
extern void do_pg_abort_backup(void);
@@ -258,4 +262,7 @@ extern void do_pg_abort_backup(void);
#define BACKUP_LABEL_FILE "backup_label"
#define BACKUP_LABEL_OLD "backup_label.old"
+#define TABLESPACE_MAP "tablespace_map"
+#define TABLESPACE_MAP_OLD "tablespace_map.old"
+
#endif /* XLOG_H */
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 988bce7..3540602 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -21,6 +21,16 @@
#define MAX_RATE_UPPER 1048576
+typedef struct
+{
+ char *oid;
+ char *path;
+ char *rpath; /* relative path within PGDATA, or NULL */
+ int64 size;
+} tablespaceinfo;
+
extern void SendBaseBackup(BaseBackupCmd *cmd);
+extern int64 sendTablespace(char *path, bool sizeonly);
+
#endif /* _BASEBACKUP_H */
On 20 December 2014 16:30, Amit Kapila Wrote,
Summarization of latest changes:
1. Change file name from symlink_label to tablespace_map and changed
the same every where in comments and variable names.
2. This feature will be supportted for both windows and linux; tablespace_map
file will be generated on both windows and linux to restore tablespace links
during archive recovery.
3. Handling for special characters in tablesapce path name.
4. Updation of docs.
I did not followed this patch for quite some time, I have seen all the threads regarding this patch and reviewed from those perspective.
1. I have done the testing and behavior is fine
2. For handling special character like new line character, I saw discussion mostly for two option,
a. Don’t support such table space name in tablespace map file and skip those tablespace.
b. Add ‘\’ character when there is new line in the tablespace name.
And you have selected option 2, I don’t see any problem in this because it is maintaining human readability, I just want ask is this as per the consensus ?
Other than that patch seems fine to me..
Regards,
Dilip
On Wed, Jan 7, 2015 at 10:45 AM, Dilip kumar <dilip.kumar@huawei.com> wrote:
On 20 December 2014 16:30, Amit Kapila Wrote,
Summarization of latest changes:
1. Change file name from symlink_label to tablespace_map and changed
the same every where in comments and variable names.
2. This feature will be supportted for both windows and linux;
tablespace_map
file will be generated on both windows and linux to restore tablespace
links
during archive recovery.
3. Handling for special characters in tablesapce path name.
4. Updation of docs.I did not followed this patch for quite some time, I have seen all the
threads regarding this patch and reviewed from those perspective.
1. I have done the testing and behavior is fine
2. For handling special character like new line character, I saw
discussion mostly for two option,
a. Don’t support such table space name in tablespace map file and
skip those tablespace.
b. Add ‘\’ character when there is new line in the tablespace name.
And you have selected option 2, I don’t see any problem in this because
it is maintaining human readability, I just want ask is this as per the
consensus ?
Tom has spotted this problem and suggested 3 different options
to handle this issue, apart from above 2, third one is "Go over to
a byte-count-then-value format". Then Andrew and Heikki
supported/asked to follow option 2 (as is followed in patch) and no
one objected, so I used the same to fix the issue.
Based on above, I would say we have a consensus to follow this
approach.
Other than that patch seems fine to me..
Thanks for reviewing it.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
On 07 January 2015 11:21, Amit Kapila Wrote,
Tom has spotted this problem and suggested 3 different options
to handle this issue, apart from above 2, third one is "Go over to
a byte-count-then-value format". Then Andrew and Heikki
supported/asked to follow option 2 (as is followed in patch) and no
one objected, so I used the same to fix the issue.
Based on above, I would say we have a consensus to follow this
approach.
Moved to Ready For Committer
Regards,
Dilip
On 12/20/2014 05:59 AM, Amit Kapila wrote:
On Wed, Dec 17, 2014 at 11:32 AM, Amit Kapila <amit.kapila16@gmail.com
<mailto:amit.kapila16@gmail.com>> wrote:On Tue, Dec 16, 2014 at 10:11 PM, Heikki Linnakangas
<hlinnakangas@vmware.com <mailto:hlinnakangas@vmware.com>> wrote:
On 12/16/2014 06:30 PM, Andrew Dunstan wrote:
I'm not clear why human readability is the major criterion here.
As for
that, it will be quite difficult for a human to distinguish a
name with
a space at the end from one without. I really think a simple encoding
scheme would be much the best.Yeah that could work, but we need the special encoding mainly for
newline,
other's would work with current patch. However it might be worth to do
it for all kind of spaces. Currently it just reads the line uptonewline using
fscanf, but if we use special encoding, we might need to read the file
character by character and check for newline without backslash(or other
special encoding character); do you have something like that in mind?Another thing is that we need to take care that we encode/decode link
path for tar format, as plain format might already be working.Attached patch handles the newline and other characters that are allowed
in tablespace path, as we need escape character only for newline, I have
added the same only for newline. So after patch, the tablespace_map
file will look like below for different kind of paths, as you can see for
tablespace id 16393 which contains newline, there is additional escape
sequence "\" before each newline where as other paths containing space
works as it is.16391 /home/akapila/mywork/workspace_pg/master/tbs1
16393 /home/akapila/mywork/workspace_pg/master/tbs\
a\
b\16392 /home/akapila/mywork/workspace_pg/master/tbs 2
So with this, I have handled all review comments raised for this patch
and it is ready for review, as the status of this patch is changed from
"Ready for Committer" to "Waiting on Author", so ideally I think it
should go back to "Ready for Committer", however as I am not very sure
about this point, I will change it to "Needs Review" (correct me if I am
wrong).Summarization of latest changes:
1. Change file name from symlink_label to tablespace_map and changed
the same every where in comments and variable names.
2. This feature will be supportted for both windows and linux;
tablespace_map
file will be generated on both windows and linux to restore tablespace
links
during archive recovery.
3. Handling for special characters in tablesapce path name.
4. Updation of docs.
This generally looks good, but I have a couple of questions before I
commit it.
First, why is the new option for the BASE_BACKUP command of the
Streaming Replication protcol "TAR"? It seems rather misleading.
Shouldn't it be something like "TABLESPACEMAP"? I realize we ask for it
when pg_basebackup is operating in TAR format mode, but the backend has
no notion of that, does it? The only thing this does is trigger the
sending of the tablespace map, so surely that's what the protocol option
should suggest.
Second, these lines in xlog.c seem wrong:
else if ((ch == '\n' || ch == '\r') && prev_ch == '\\')
str[i-1] = '\n';
It looks to me like we should be putting ch in the string, not
arbitrarily transforming \r into \n.
cheers
andrew
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Sun, May 10, 2015 at 6:01 AM, Andrew Dunstan <andrew@dunslane.net> wrote:
This generally looks good, but I have a couple of questions before I
commit it.
First, why is the new option for the BASE_BACKUP command of the
Streaming Replication protcol "TAR"? It seems rather misleading. Shouldn't
it be something like "TABLESPACEMAP"?
The reason to keep new option's name as TAR was that tablespace_map
was generated for that format type, but I agree with you that something
like "TABLESPACEMAP" suits better, so I have changed it to
"TABLESPACE_MAP". Putting '_' in name makes it somewhat consistent
with other names and filename it generates with this new option.
Second, these lines in xlog.c seem wrong:
else if ((ch == '\n' || ch == '\r') && prev_ch == '\\')
str[i-1] = '\n';It looks to me like we should be putting ch in the string, not
arbitrarily transforming \r into \n.
You are right, I have changed it as per your suggestion.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
Attachments:
extend_basebackup_to_include_symlink_v7.patchapplication/octet-stream; name=extend_basebackup_to_include_symlink_v7.patchDownload
diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml
index e25e0d0..aa83ce9 100644
--- a/doc/src/sgml/backup.sgml
+++ b/doc/src/sgml/backup.sgml
@@ -836,8 +836,11 @@ SELECT pg_start_backup('label');
<function>pg_start_backup</> creates a <firstterm>backup label</> file,
called <filename>backup_label</>, in the cluster directory with
information about your backup, including the start time and label
- string. The file is critical to the integrity of the backup, should
- you need to restore from it.
+ string. This function also creates a <firstterm>tablespace map</> file,
+ called <filename>tablespace_map</>, in the cluster directory with
+ information about tablespace symbolic links in <filename>pg_tblspc/</>
+ even if one such link is present. Both the files are critical to the
+ integrity of the backup, should you need to restore from it.
</para>
<para>
@@ -965,17 +968,19 @@ SELECT pg_stop_backup();
<para>
It's also worth noting that the <function>pg_start_backup</> function
- makes a file named <filename>backup_label</> in the database cluster
- directory, which is removed by <function>pg_stop_backup</>.
- This file will of course be archived as a part of your backup dump file.
- The backup label file includes the label string you gave to
- <function>pg_start_backup</>, as well as the time at which
- <function>pg_start_backup</> was run, and the name of the starting WAL
- file. In case of confusion it is therefore possible to look inside a
- backup dump file and determine exactly which backup session the dump file
- came from. However, this file is not merely for your information; its
- presence and contents are critical to the proper operation of the system's
- recovery process.
+ make the files named <filename>backup_label</> and
+ <filename>tablesapce_map</> in the database cluster directory,
+ which are removed by <function>pg_stop_backup</>. These files will of
+ course be archived as a part of your backup dump file. The backup label
+ file includes the label string you gave to <function>pg_start_backup</>,
+ as well as the time at which <function>pg_start_backup</> was run, and
+ the name of the starting WAL file. In case of confusion it is therefore
+ possible to look inside a backup dump file and determine exactly which
+ backup session the dump file came from. The tablespace map file includes
+ the symbolic link name as it exists in the directory
+ <filename>pg_tblspc/</> and the full path of symolic link. These files
+ are not merely for your information; their presence and contents are critical
+ to the proper operation of the system's recovery process.
</para>
<para>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index fb39731..6fde056 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -16591,11 +16591,11 @@ SELECT set_config('log_statement_stats', 'off', false);
<function>pg_start_backup</> accepts an
arbitrary user-defined label for the backup. (Typically this would be
the name under which the backup dump file will be stored.) The function
- writes a backup label file (<filename>backup_label</>) into the
- database cluster's data directory, performs a checkpoint,
- and then returns the backup's starting transaction log location as text.
- The user can ignore this result value, but it is
- provided in case it is useful.
+ writes a backup label file (<filename>backup_label</>) and tablespace map
+ file (<filename>tablespace_map</>) into the database cluster's data
+ directory, performs a checkpoint, and then returns the backup's starting
+ transaction log location as text. The user can ignore this result value,
+ but it is provided in case it is useful.
<programlisting>
postgres=# select pg_start_backup('label_goes_here');
pg_start_backup
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index ac13d32..06362b9 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1882,8 +1882,8 @@ The commands accepted in walsender mode are:
</varlistentry>
<varlistentry>
- <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal> <replaceable>rate</replaceable>]
- <indexterm><primary>BASE_BACKUP</primary></indexterm>
+ <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal> <replaceable>rate</replaceable>] [<literal>TABLESPACE_MAP</literal>]
+ <indexterm><primary>BASE_BACKUP</primary></indexterm>
</term>
<listitem>
<para>
@@ -1968,6 +1968,19 @@ The commands accepted in walsender mode are:
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><literal>TABLESPACE_MAP</literal></term>
+ <listitem>
+ <para>
+ Include the information about symbolic links present in directory
+ <filename>pg_tblspc</filename> in a file named
+ <filename>tablespace_map</filename>. The tablespace map file includes
+ the symbolic link name as it exists in the directory
+ <filename>pg_tblspc/</> and the full path of symolic link.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</para>
<para>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6f7e3bd9..b426571 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -42,6 +42,7 @@
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
+#include "replication/basebackup.h"
#include "replication/logical.h"
#include "replication/slot.h"
#include "replication/origin.h"
@@ -824,6 +825,8 @@ static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired, bool *backupFromStandby);
+static bool read_tablespace_map(List **tablespaces);
+
static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method);
@@ -5917,6 +5920,7 @@ StartupXLOG(void)
bool wasShutdown;
bool reachedStopPoint = false;
bool haveBackupLabel = false;
+ bool haveTblspcMap = false;
XLogRecPtr RecPtr,
checkPointLoc,
EndOfLog;
@@ -6002,16 +6006,6 @@ StartupXLOG(void)
ValidateXLOGDirectoryStructure();
/*
- * Clear out any old relcache cache files. This is *necessary* if we do
- * any WAL replay, since that would probably result in the cache files
- * being out of sync with database reality. In theory we could leave them
- * in place if the database had been cleanly shut down, but it seems
- * safest to just remove them always and let them be rebuilt during the
- * first backend startup.
- */
- RelationCacheInitFileRemove();
-
- /*
* Initialize on the assumption we want to recover to the latest timeline
* that's active according to pg_control.
*/
@@ -6080,6 +6074,8 @@ StartupXLOG(void)
if (read_backup_label(&checkPointLoc, &backupEndRequired,
&backupFromStandby))
{
+ List *tablespaces = NIL;
+
/*
* Archive recovery was requested, and thanks to the backup label
* file, we know how far we need to replay to reach consistency. Enter
@@ -6124,6 +6120,59 @@ StartupXLOG(void)
errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
wasShutdown = false; /* keep compiler quiet */
}
+
+ /* read the tablespace_map file if present and create symlinks. */
+ if (read_tablespace_map(&tablespaces))
+ {
+ ListCell *lc;
+ struct stat st;
+
+ foreach(lc, tablespaces)
+ {
+ tablespaceinfo *ti = lfirst(lc);
+ char *linkloc;
+
+ linkloc = psprintf("pg_tblspc/%s", ti->oid);
+
+ /*
+ * Remove the existing symlink if any and Create the symlink
+ * under PGDATA. We need to use rmtree instead of rmdir as
+ * the link location might contain directories/files corresponding
+ * to actual path, some tar utilities does that way while extracting
+ * symlinks.
+ */
+ if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode))
+ {
+ if (!rmtree(linkloc,true))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove directory \"%s\": %m",
+ linkloc)));
+ }
+ else
+ {
+ if (unlink(linkloc) < 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove symbolic link \"%s\": %m",
+ linkloc)));
+ }
+
+ if (symlink(ti->path, linkloc) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create symbolic link \"%s\": %m",
+ linkloc)));
+
+ pfree(ti->oid);
+ pfree(ti->path);
+ pfree(ti);
+ }
+
+ /* set flag to delete it later */
+ haveTblspcMap = true;
+ }
+
/* set flag to delete it later */
haveBackupLabel = true;
}
@@ -6198,6 +6247,20 @@ StartupXLOG(void)
}
/*
+ * Clear out any old relcache cache files. This is *necessary* if we do
+ * any WAL replay, since that would probably result in the cache files
+ * being out of sync with database reality. In theory we could leave them
+ * in place if the database had been cleanly shut down, but it seems
+ * safest to just remove them always and let them be rebuilt during the
+ * first backend startup. These files needs to be removed from all
+ * directories including pg_tblspc, however the symlinks are created
+ * only after reading tablesapce_map file in case of archive recovery
+ * from backup, so needs to clear old relcache files here after creating
+ * symlinks.
+ */
+ RelationCacheInitFileRemove();
+
+ /*
* If the location of the checkpoint record is not on the expected
* timeline in the history of the requested timeline, we cannot proceed:
* the backup is not part of the history of the requested timeline.
@@ -6466,6 +6529,23 @@ StartupXLOG(void)
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+ /*
+ * If there was a tablespace_map file, it's done its job and the
+ * symlinks have been created. We must get rid of the map file
+ * so that if we crash during recovery, we don't create symlinks
+ * again. It seems prudent though to just rename the file out of
+ * the way rather than delete it completely.
+ */
+ if (haveTblspcMap)
+ {
+ unlink(TABLESPACE_MAP_OLD);
+ if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not rename file \"%s\" to \"%s\": %m",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
+
/* Check that the GUCs used to generate the WAL allow recovery */
CheckRequiredParameterValues();
@@ -9610,16 +9690,27 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*
* There are two kind of backups: exclusive and non-exclusive. An exclusive
* backup is started with pg_start_backup(), and there can be only one active
- * at a time. The backup label file of an exclusive backup is written to
- * $PGDATA/backup_label, and it is removed by pg_stop_backup().
+ * at a time. The backup and tablespace map files of an exclusive backup are
+ * written to $PGDATA/backup_label and $PGDATA/tablespace_map, and they are
+ * removed by pg_stop_backup().
*
* A non-exclusive backup is used for the streaming base backups (see
* src/backend/replication/basebackup.c). The difference to exclusive backups
- * is that the backup label file is not written to disk. Instead, its would-be
- * contents are returned in *labelfile, and the caller is responsible for
- * including it in the backup archive as 'backup_label'. There can be many
- * non-exclusive backups active at the same time, and they don't conflict
- * with an exclusive backup either.
+ * is that the backup label and tablespace map files are not written to disk.
+ * Instead, there would-be contents are returned in *labelfile and *tblspcmapfile,
+ * and the caller is responsible for including them in the backup archive as
+ * 'backup_label' and 'tablespace_map'. There can be many non-exclusive backups
+ * active at the same time, and they don't conflict with an exclusive backup
+ * either.
+ *
+ * tblspcmapfile is required mainly for tar format in windows as native windows
+ * utilities are not able to create symlinks while extracting files from tar.
+ * However for consistency, the same is used for all platforms.
+ *
+ * needtblspcmapfile is true for the cases (exclusive backup and for
+ * non-exclusive backup only when tar format is used for taking backup)
+ * when backup needs to generate tablespace_map file, it is used to
+ * embed escape character before newline character in tablespace path.
*
* Returns the minimum WAL position that must be present to restore from this
* backup, and the corresponding timeline ID in *starttli_p.
@@ -9632,7 +9723,9 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*/
XLogRecPtr
do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
- char **labelfile)
+ char **labelfile, DIR *tblspcdir, List **tablespaces,
+ char **tblspcmapfile, bool infotbssize,
+ bool needtblspcmapfile)
{
bool exclusive = (labelfile == NULL);
bool backup_started_in_recovery = false;
@@ -9646,6 +9739,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
struct stat stat_buf;
FILE *fp;
StringInfoData labelfbuf;
+ StringInfoData tblspc_mapfbuf;
backup_started_in_recovery = RecoveryInProgress();
@@ -9717,6 +9811,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
{
bool gotUniqueStartpoint = false;
+ struct dirent *de;
+ tablespaceinfo *ti;
+ int datadirpathlen;
/*
* Force an XLOG file switch before the checkpoint, to ensure that the
@@ -9837,6 +9934,98 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
XLogFileName(xlogfilename, ThisTimeLineID, _logSegNo);
/*
+ * Construct tablespace_map file
+ */
+ initStringInfo(&tblspc_mapfbuf);
+
+ datadirpathlen = strlen(DataDir);
+
+ /* Collect information about all tablespaces */
+ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+ {
+ char fullpath[MAXPGPATH];
+ char linkpath[MAXPGPATH];
+ char *relpath = NULL;
+ int rllen;
+ StringInfoData buflinkpath;
+ char *s = linkpath;
+
+ /* Skip special stuff */
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+#if defined(HAVE_READLINK) || defined(WIN32)
+ rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+ if (rllen < 0)
+ {
+ ereport(WARNING,
+ (errmsg("could not read symbolic link \"%s\": %m",
+ fullpath)));
+ continue;
+ }
+ else if (rllen >= sizeof(linkpath))
+ {
+ ereport(WARNING,
+ (errmsg("symbolic link \"%s\" target is too long",
+ fullpath)));
+ continue;
+ }
+ linkpath[rllen] = '\0';
+
+ /*
+ * Add the escape character '\\' before newline in a string
+ * to ensure that we can distinguish between the newline in
+ * the tablespace path and end of line while reading
+ * tablespace_map file during archive recovery.
+ */
+ initStringInfo(&buflinkpath);
+
+ while (*s)
+ {
+ if ((*s == '\n' || *s == '\r') && needtblspcmapfile)
+ appendStringInfoChar(&buflinkpath, '\\');
+ appendStringInfoChar(&buflinkpath, *s++);
+ }
+
+
+ /*
+ * Relpath holds the relative path of the tablespace directory
+ * when it's located within PGDATA, or NULL if it's located
+ * elsewhere.
+ */
+ if (rllen > datadirpathlen &&
+ strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+ IS_DIR_SEP(linkpath[datadirpathlen]))
+ relpath = linkpath + datadirpathlen + 1;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(de->d_name);
+ ti->path = pstrdup(buflinkpath.data);
+ ti->rpath = relpath ? pstrdup(relpath) : NULL;
+ ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+
+ if(tablespaces)
+ *tablespaces = lappend(*tablespaces, ti);
+
+ appendStringInfo(&tblspc_mapfbuf, "%s %s\n", ti->oid, ti->path);
+
+ pfree(buflinkpath.data);
+#else
+
+ /*
+ * If the platform does not have symbolic links, it should not be
+ * possible to have tablespaces - clearly somebody else created
+ * them. Warn about it and ignore.
+ */
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+#endif
+ }
+
+ /*
* Construct backup label file
*/
initStringInfo(&labelfbuf);
@@ -9899,9 +10088,51 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
errmsg("could not write file \"%s\": %m",
BACKUP_LABEL_FILE)));
pfree(labelfbuf.data);
+
+ /* Write backup tablespace_map file. */
+ if (tblspc_mapfbuf.len > 0)
+ {
+ if (stat(TABLESPACE_MAP, &stat_buf) != 0)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ TABLESPACE_MAP)));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("a backup is already in progress"),
+ errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
+ TABLESPACE_MAP)));
+
+ fp = AllocateFile(TABLESPACE_MAP, "w");
+
+ if (!fp)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m",
+ TABLESPACE_MAP)));
+ if (fwrite(tblspc_mapfbuf.data, tblspc_mapfbuf.len, 1, fp) != 1 ||
+ fflush(fp) != 0 ||
+ pg_fsync(fileno(fp)) != 0 ||
+ ferror(fp) ||
+ FreeFile(fp))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ TABLESPACE_MAP)));
+ }
+
+ pfree(tblspc_mapfbuf.data);
}
else
+ {
*labelfile = labelfbuf.data;
+ if (tblspc_mapfbuf.len > 0)
+ *tblspcmapfile = tblspc_mapfbuf.data;
+ }
}
PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
@@ -10072,6 +10303,12 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
BACKUP_LABEL_FILE)));
+
+ /*
+ * Remove tablespace_map file if present, it is created
+ * only if there are tablespaces.
+ */
+ unlink(TABLESPACE_MAP);
}
/*
@@ -10472,6 +10709,86 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
}
/*
+ * read_tablespace_map: check to see if a tablespace_map file is present
+ *
+ * If we see a tablespace_map file during recovery, we assume that we are
+ * recovering from a backup dump file, and we therefore need to create symlinks
+ * as per the information present in tablespace_map file.
+ *
+ * Returns TRUE if a tablespace_map file was found (and fills the link
+ * information for all the tablespace links present in file); returns FALSE
+ * if not.
+ */
+static bool
+read_tablespace_map(List **tablespaces)
+{
+ tablespaceinfo *ti;
+ FILE *lfp;
+ char tbsoid[MAXPGPATH];
+ char *tbslinkpath;
+ char str[MAXPGPATH];
+ int ch, prev_ch = -1,
+ i = 0, n;
+
+ /*
+ * See if tablespace_map file is present
+ */
+ lfp = AllocateFile(TABLESPACE_MAP, "r");
+ if (!lfp)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ TABLESPACE_MAP)));
+ return false; /* it's not there, all is fine */
+ }
+
+ /*
+ * Read and parse the link name and path lines from tablespace_map file
+ * (this code is pretty crude, but we are not expecting any variability
+ * in the file format). While taking backup we embed escape character
+ * '\\' before newline in tablespace path, so that during reading of
+ * tablespace_map file, we could distinguish newline in tablespace path
+ * and end of line. Now while reading tablespace_map file, remove the
+ * escape character that has been added in tablespace path during backup.
+ */
+ while ((ch = fgetc(lfp)) != EOF)
+ {
+ if ((ch == '\n' || ch == '\r') && prev_ch != '\\')
+ {
+ str[i] = '\0';
+ if (sscanf(str, "%s %n", tbsoid, &n) != 1)
+ ereport(FATAL,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
+ tbslinkpath = str + n;
+ i = 0;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(tbsoid);
+ ti->path = pstrdup(tbslinkpath);
+
+ *tablespaces = lappend(*tablespaces, ti);
+ continue;
+ }
+ else if ((ch == '\n' || ch == '\r') && prev_ch == '\\')
+ str[i-1] = ch;
+ else
+ str[i++] = ch;
+ prev_ch = ch;
+ }
+
+ if (ferror(lfp) || FreeFile(lfp))
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ TABLESPACE_MAP)));
+
+ return true;
+}
+
+/*
* Error context callback for errors occurring during rm_redo().
*/
static void
@@ -10502,11 +10819,14 @@ BackupInProgress(void)
}
/*
- * CancelBackup: rename the "backup_label" file to cancel backup mode
+ * CancelBackup: rename the "backup_label"/"tablespace_map" file to cancel
+ * backup mode
*
* If the "backup_label" file exists, it will be renamed to "backup_label.old".
- * Note that this will render an online backup in progress useless.
- * To correctly finish an online backup, pg_stop_backup must be called.
+ * Similarly, if the "tablespace_map" file exists, it will be renamed to
+ * "tablespace_map.old". Note that this will render an online backup in progress
+ * useless. To correctly finish an online backup, pg_stop_backup must be
+ * called.
*/
void
CancelBackup(void)
@@ -10535,6 +10855,29 @@ CancelBackup(void)
errdetail("Could not rename \"%s\" to \"%s\": %m.",
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+
+ /* if the file is not there, return */
+ if (stat(TABLESPACE_MAP, &stat_buf) < 0)
+ return;
+
+ /* remove leftover file from previously canceled backup if it exists */
+ unlink(TABLESPACE_MAP_OLD);
+
+ if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) == 0)
+ {
+ ereport(LOG,
+ (errmsg("online backup mode canceled"),
+ errdetail("\"%s\" was renamed to \"%s\".",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
+ else
+ {
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("online backup mode was not canceled"),
+ errdetail("Could not rename \"%s\" to \"%s\": %m.",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
}
/*
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 2179bf7..329bb8c 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -51,6 +51,7 @@ pg_start_backup(PG_FUNCTION_ARGS)
bool fast = PG_GETARG_BOOL(1);
char *backupidstr;
XLogRecPtr startpoint;
+ DIR *dir;
backupidstr = text_to_cstring(backupid);
@@ -59,7 +60,16 @@ pg_start_backup(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser or replication role to run a backup")));
- startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL);
+ /* Make sure we can open the directory with tablespaces in it */
+ dir = AllocateDir("pg_tblspc");
+ if (!dir)
+ ereport(ERROR,
+ (errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
+
+ startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL,
+ dir, NULL, NULL, false, true);
+
+ FreeDir(dir);
PG_RETURN_LSN(startpoint);
}
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index de103c6..dba302b 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -46,11 +46,12 @@ typedef struct
bool nowait;
bool includewal;
uint32 maxrate;
+ bool sendtblspcmapfile;
} basebackup_options;
-static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces);
-static int64 sendTablespace(char *path, bool sizeonly);
+static int64 sendDir(char *path, int basepathlen, bool sizeonly,
+ List *tablespaces, bool sendtblspcmapinfo);
static bool sendFile(char *readfilename, char *tarfilename,
struct stat * statbuf, bool missing_ok);
static void sendFileWithContent(const char *filename, const char *content);
@@ -93,15 +94,6 @@ static int64 elapsed_min_unit;
/* The last check of the transfer rate. */
static int64 throttled_last;
-typedef struct
-{
- char *oid;
- char *path;
- char *rpath; /* relative path within PGDATA, or NULL */
- int64 size;
-} tablespaceinfo;
-
-
/*
* Called when ERROR or FATAL happens in perform_base_backup() after
* we have started the backup - make sure we end it!
@@ -126,14 +118,18 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
XLogRecPtr endptr;
TimeLineID endtli;
char *labelfile;
+ char *tblspc_map_file = NULL;
int datadirpathlen;
+ List *tablespaces = NIL;
datadirpathlen = strlen(DataDir);
backup_started_in_recovery = RecoveryInProgress();
startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
- &labelfile);
+ &labelfile, tblspcdir, &tablespaces,
+ &tblspc_map_file,
+ opt->progress, opt->sendtblspcmapfile);
/*
* Once do_pg_start_backup has been called, ensure that any failure causes
* us to abort the backup so we don't "leak" a backup counter. For this reason,
@@ -143,9 +139,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
{
- List *tablespaces = NIL;
ListCell *lc;
- struct dirent *de;
tablespaceinfo *ti;
SendXlogRecPtrResult(startptr, starttli);
@@ -162,70 +156,9 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
else
statrelpath = pgstat_stat_directory;
- /* Collect information about all tablespaces */
- while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
- {
- char fullpath[MAXPGPATH];
- char linkpath[MAXPGPATH];
- char *relpath = NULL;
- int rllen;
-
- /* Skip special stuff */
- if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
- continue;
-
- snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-#if defined(HAVE_READLINK) || defined(WIN32)
- rllen = readlink(fullpath, linkpath, sizeof(linkpath));
- if (rllen < 0)
- {
- ereport(WARNING,
- (errmsg("could not read symbolic link \"%s\": %m",
- fullpath)));
- continue;
- }
- else if (rllen >= sizeof(linkpath))
- {
- ereport(WARNING,
- (errmsg("symbolic link \"%s\" target is too long",
- fullpath)));
- continue;
- }
- linkpath[rllen] = '\0';
-
- /*
- * Relpath holds the relative path of the tablespace directory
- * when it's located within PGDATA, or NULL if it's located
- * elsewhere.
- */
- if (rllen > datadirpathlen &&
- strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
- IS_DIR_SEP(linkpath[datadirpathlen]))
- relpath = linkpath + datadirpathlen + 1;
-
- ti = palloc(sizeof(tablespaceinfo));
- ti->oid = pstrdup(de->d_name);
- ti->path = pstrdup(linkpath);
- ti->rpath = relpath ? pstrdup(relpath) : NULL;
- ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
- tablespaces = lappend(tablespaces, ti);
-#else
-
- /*
- * If the platform does not have symbolic links, it should not be
- * possible to have tablespaces - clearly somebody else created
- * them. Warn about it and ignore.
- */
- ereport(WARNING,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("tablespaces are not supported on this platform")));
-#endif
- }
-
/* Add a node for the base directory at the end */
ti = palloc0(sizeof(tablespaceinfo));
- ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
+ ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
tablespaces = lappend(tablespaces, ti);
/* Send tablespace header */
@@ -274,8 +207,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* In the main tar, include the backup_label first... */
sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
- /* ... then the bulk of the files ... */
- sendDir(".", 1, false, tablespaces);
+ /*
+ * Send tablespace_map file if required and then the bulk of
+ * the files.
+ */
+ if (tblspc_map_file && opt->sendtblspcmapfile)
+ {
+ sendFileWithContent(TABLESPACE_MAP, tblspc_map_file);
+ sendDir(".", 1, false, tablespaces, false);
+ }
+ else
+ sendDir(".", 1, false, tablespaces, true);
/* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -567,6 +509,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_nowait = false;
bool o_wal = false;
bool o_maxrate = false;
+ bool o_tablespacemap = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -637,6 +580,20 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->maxrate = (uint32) maxrate;
o_maxrate = true;
}
+ else if (strcmp(defel->defname, "tablespace_map") == 0)
+ {
+ /*
+ * tablespcmapfile is required only for tar format mainly in
+ * windows as native windows utilites are not able create symlinks
+ * while extracting files from tar.
+ */
+ if (o_tablespacemap)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ opt->sendtblspcmapfile = true;
+ o_tablespacemap = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -865,7 +822,7 @@ sendFileWithContent(const char *filename, const char *content)
*
* Only used to send auxiliary tablespaces, not PGDATA.
*/
-static int64
+int64
sendTablespace(char *path, bool sizeonly)
{
int64 size;
@@ -899,7 +856,7 @@ sendTablespace(char *path, bool sizeonly)
size = 512; /* Size of the header just added */
/* Send all the files in the tablespace version directory */
- size += sendDir(pathbuf, strlen(path), sizeonly, NIL);
+ size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
return size;
}
@@ -911,9 +868,14 @@ sendTablespace(char *path, bool sizeonly)
*
* Omit any directory in the tablespaces list, to avoid backing up
* tablespaces twice when they were created inside PGDATA.
+ *
+ * If sendtblspcmapinfo is false, we don't need to include symlink
+ * information inside tar file as it will be sent separately in
+ * tablespace_map file.
*/
static int64
-sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
+sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
+ bool sendtblspcmapinfo)
{
DIR *dir;
struct dirent *de;
@@ -941,13 +903,17 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
continue;
/*
- * If there's a backup_label file, it belongs to a backup started by
- * the user with pg_start_backup(). It is *not* correct for this
- * backup, our backup_label is injected into the tar separately.
+ * If there's a backup_label or tablespace_map file, it belongs to a
+ * backup started by the user with pg_start_backup(). It is *not*
+ * correct for this backup, our backup_label/tablespace_map is injected
+ * into the tar separately.
*/
if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
continue;
+ if (strcmp(de->d_name, TABLESPACE_MAP) == 0)
+ continue;
+
/*
* Check if the postmaster has signaled us to exit, and abort with an
* error in that case. The error handler further up will call
@@ -1120,8 +1086,16 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
break;
}
}
+
+ /*
+ * skip sending directories inside pg_tblspc, unless tablespace_map
+ * is not sent.
+ */
+ if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspcmapinfo)
+ skip_this_dir = true;
+
if (!skip_this_dir)
- size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces);
+ size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspcmapinfo);
}
else if (S_ISREG(statbuf.st_mode))
{
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 7d6d154..a59a381 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -71,6 +71,7 @@ Node *replication_parse_result;
%token K_NOWAIT
%token K_MAX_RATE
%token K_WAL
+%token K_TABLESPACE_MAP
%token K_TIMELINE
%token K_PHYSICAL
%token K_LOGICAL
@@ -119,7 +120,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d] [TABLESPACE_MAP]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -168,6 +169,11 @@ base_backup_opt:
$$ = makeDefElem("max_rate",
(Node *)makeInteger($2));
}
+ | K_TABLESPACE_MAP
+ {
+ $$ = makeDefElem("tablespace_map",
+ (Node *)makeInteger(TRUE));
+ }
;
create_replication_slot:
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index f8acb66..056cc14 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -88,6 +88,7 @@ NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
+TABLESPACE_MAP { return K_TABLESPACE_MAP; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
CREATE_REPLICATION_SLOT { return K_CREATE_REPLICATION_SLOT; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 83bf2f5..2d0ea7b 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -1652,13 +1652,14 @@ BaseBackup(void)
maxrate_clause = psprintf("MAX_RATE %u", maxrate);
basebkp =
- psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
+ psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
includewal ? "NOWAIT" : "",
- maxrate_clause ? maxrate_clause : "");
+ maxrate_clause ? maxrate_clause : "",
+ format == 't' ? "TABLESPACE_MAP": "");
if (PQsendQuery(conn, basebkp) == 0)
{
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index f08b676..961e050 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -17,6 +17,8 @@
#include "access/xlogreader.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
+#include "nodes/pg_list.h"
+#include "storage/fd.h"
/* Sync methods */
@@ -258,7 +260,9 @@ extern void assign_checkpoint_completion_target(double newval, void *extra);
* Starting/stopping a base backup
*/
extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
- TimeLineID *starttli_p, char **labelfile);
+ TimeLineID *starttli_p, char **labelfile, DIR *tblspcdir,
+ List **tablespaces, char **tblspcmapfile, bool infotbssize,
+ bool needtblspcmapfile);
extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
TimeLineID *stoptli_p);
extern void do_pg_abort_backup(void);
@@ -267,4 +271,7 @@ extern void do_pg_abort_backup(void);
#define BACKUP_LABEL_FILE "backup_label"
#define BACKUP_LABEL_OLD "backup_label.old"
+#define TABLESPACE_MAP "tablespace_map"
+#define TABLESPACE_MAP_OLD "tablespace_map.old"
+
#endif /* XLOG_H */
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 64f2bd5..7d3d09e 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -21,6 +21,16 @@
#define MAX_RATE_UPPER 1048576
+typedef struct
+{
+ char *oid;
+ char *path;
+ char *rpath; /* relative path within PGDATA, or NULL */
+ int64 size;
+} tablespaceinfo;
+
extern void SendBaseBackup(BaseBackupCmd *cmd);
+extern int64 sendTablespace(char *path, bool sizeonly);
+
#endif /* _BASEBACKUP_H */
On 05/11/2015 02:02 AM, Amit Kapila wrote:
On Sun, May 10, 2015 at 6:01 AM, Andrew Dunstan <andrew@dunslane.net
<mailto:andrew@dunslane.net>> wrote:This generally looks good, but I have a couple of questions before I
commit it.
First, why is the new option for the BASE_BACKUP command of the
Streaming Replication protcol "TAR"? It seems rather misleading.
Shouldn't it be something like "TABLESPACEMAP"?The reason to keep new option's name as TAR was that tablespace_map
was generated for that format type, but I agree with you that something
like "TABLESPACEMAP" suits better, so I have changed it to
"TABLESPACE_MAP". Putting '_' in name makes it somewhat consistent
with other names and filename it generates with this new option.Second, these lines in xlog.c seem wrong:
else if ((ch == '\n' || ch == '\r') && prev_ch == '\\')
str[i-1] = '\n';It looks to me like we should be putting ch in the string, not
arbitrarily transforming \r into \n.
You are right, I have changed it as per your suggestion.
OK, I have cleaned this up a bit - I had already started so I didn't
take your latest patch but instead applied relevant changes to my
changeset. Here is my latest version.
In testing I notice that now "pg_baseback -F t" leaves it completely up
to the user on all platforms to create the relevant links in pg_tblspc/.
It includes the tablespace_map file in base.tar, but that's really just
informational. I think we need to add something to the pg_basebackup
docs about that, at the very least (and it will also need to be a
release note item.)
cheers
andrew
Attachments:
backup_tablespace_fix-ad.patchtext/x-patch; name=backup_tablespace_fix-ad.patchDownload
diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml
index e25e0d0..def43a2 100644
--- a/doc/src/sgml/backup.sgml
+++ b/doc/src/sgml/backup.sgml
@@ -836,8 +836,11 @@ SELECT pg_start_backup('label');
<function>pg_start_backup</> creates a <firstterm>backup label</> file,
called <filename>backup_label</>, in the cluster directory with
information about your backup, including the start time and label
- string. The file is critical to the integrity of the backup, should
- you need to restore from it.
+ string. The function also creates a <firstterm>tablespace map</> file,
+ called <filename>tablespace_map</>, in the cluster directory with
+ information about tablespace symbolic links in <filename>pg_tblspc/</>
+ if one or more such link is present. Both files are critical to the
+ integrity of the backup, should you need to restore from it.
</para>
<para>
@@ -965,17 +968,20 @@ SELECT pg_stop_backup();
<para>
It's also worth noting that the <function>pg_start_backup</> function
- makes a file named <filename>backup_label</> in the database cluster
- directory, which is removed by <function>pg_stop_backup</>.
- This file will of course be archived as a part of your backup dump file.
- The backup label file includes the label string you gave to
- <function>pg_start_backup</>, as well as the time at which
- <function>pg_start_backup</> was run, and the name of the starting WAL
- file. In case of confusion it is therefore possible to look inside a
- backup dump file and determine exactly which backup session the dump file
- came from. However, this file is not merely for your information; its
- presence and contents are critical to the proper operation of the system's
- recovery process.
+ makes files named <filename>backup_label</> and
+ <filename>tablesapce_map</> in the database cluster directory,
+ which are removed by <function>pg_stop_backup</>. These files will of
+ course be archived as a part of your backup dump file. The backup label
+ file includes the label string you gave to <function>pg_start_backup</>,
+ as well as the time at which <function>pg_start_backup</> was run, and
+ the name of the starting WAL file. In case of confusion it is therefore
+ possible to look inside a backup dump file and determine exactly which
+ backup session the dump file came from. The tablespace map file includes
+ the symbolic link names as they exist in the directory
+ <filename>pg_tblspc/</> and the full path of each symbolic link.
+ These files are not merely for your information; their presence and
+ contents are critical to the proper operation of the system's recovery
+ process.
</para>
<para>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index fb39731..24d43d9 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -16591,11 +16591,12 @@ SELECT set_config('log_statement_stats', 'off', false);
<function>pg_start_backup</> accepts an
arbitrary user-defined label for the backup. (Typically this would be
the name under which the backup dump file will be stored.) The function
- writes a backup label file (<filename>backup_label</>) into the
- database cluster's data directory, performs a checkpoint,
- and then returns the backup's starting transaction log location as text.
- The user can ignore this result value, but it is
- provided in case it is useful.
+ writes a backup label file (<filename>backup_label</>) and, if there
+ are any links in the <filename>pg_tblspc/</> directory, a tablespace map
+ file (<filename>tablespace_map</>) into the database cluster's data
+ directory, performs a checkpoint, and then returns the backup's starting
+ transaction log location as text. The user can ignore this result value,
+ but it is provided in case it is useful.
<programlisting>
postgres=# select pg_start_backup('label_goes_here');
pg_start_backup
@@ -16610,7 +16611,8 @@ postgres=# select pg_start_backup('label_goes_here');
</para>
<para>
- <function>pg_stop_backup</> removes the label file created by
+ <function>pg_stop_backup</> removes the label file and, if it exists,
+ the <filename>tablespace_map</> file created by
<function>pg_start_backup</>, and creates a backup history file in
the transaction log archive area. The history file includes the label given to
<function>pg_start_backup</>, the starting and ending transaction log locations for
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index ac13d32..d985204 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1882,7 +1882,7 @@ The commands accepted in walsender mode are:
</varlistentry>
<varlistentry>
- <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal> <replaceable>rate</replaceable>]
+ <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal> <replaceable>rate</replaceable>] [<literal>TABLESPACE_MAP</literal>]
<indexterm><primary>BASE_BACKUP</primary></indexterm>
</term>
<listitem>
@@ -1968,6 +1968,19 @@ The commands accepted in walsender mode are:
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><literal>TABLESPACE_MAP</literal></term>
+ <listitem>
+ <para>
+ Include information about symbolic links present in the directory
+ <filename>pg_tblspc</filename> in a file named
+ <filename>tablespace_map</filename>. The tablespace map file includes
+ each symbolic link name as it exists in the directory
+ <filename>pg_tblspc/</> and the full path of that symbolic link.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</para>
<para>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6f7e3bd..5f0551a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -42,6 +42,7 @@
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
+#include "replication/basebackup.h"
#include "replication/logical.h"
#include "replication/slot.h"
#include "replication/origin.h"
@@ -824,6 +825,8 @@ static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired, bool *backupFromStandby);
+static bool read_tablespace_map(List **tablespaces);
+
static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method);
@@ -5917,6 +5920,7 @@ StartupXLOG(void)
bool wasShutdown;
bool reachedStopPoint = false;
bool haveBackupLabel = false;
+ bool haveTblspcMap = false;
XLogRecPtr RecPtr,
checkPointLoc,
EndOfLog;
@@ -6002,16 +6006,6 @@ StartupXLOG(void)
ValidateXLOGDirectoryStructure();
/*
- * Clear out any old relcache cache files. This is *necessary* if we do
- * any WAL replay, since that would probably result in the cache files
- * being out of sync with database reality. In theory we could leave them
- * in place if the database had been cleanly shut down, but it seems
- * safest to just remove them always and let them be rebuilt during the
- * first backend startup.
- */
- RelationCacheInitFileRemove();
-
- /*
* Initialize on the assumption we want to recover to the latest timeline
* that's active according to pg_control.
*/
@@ -6080,6 +6074,8 @@ StartupXLOG(void)
if (read_backup_label(&checkPointLoc, &backupEndRequired,
&backupFromStandby))
{
+ List *tablespaces = NIL;
+
/*
* Archive recovery was requested, and thanks to the backup label
* file, we know how far we need to replay to reach consistency. Enter
@@ -6124,6 +6120,59 @@ StartupXLOG(void)
errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
wasShutdown = false; /* keep compiler quiet */
}
+
+ /* read the tablespace_map file if present and create symlinks. */
+ if (read_tablespace_map(&tablespaces))
+ {
+ ListCell *lc;
+ struct stat st;
+
+ foreach(lc, tablespaces)
+ {
+ tablespaceinfo *ti = lfirst(lc);
+ char *linkloc;
+
+ linkloc = psprintf("pg_tblspc/%s", ti->oid);
+
+ /*
+ * Remove the existing symlink if any and Create the symlink
+ * under PGDATA. We need to use rmtree instead of rmdir as
+ * the link location might contain directories or files
+ * corresponding to the actual path. Some tar utilities do
+ * things that way while extracting symlinks.
+ */
+ if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode))
+ {
+ if (!rmtree(linkloc,true))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove directory \"%s\": %m",
+ linkloc)));
+ }
+ else
+ {
+ if (unlink(linkloc) < 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove symbolic link \"%s\": %m",
+ linkloc)));
+ }
+
+ if (symlink(ti->path, linkloc) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create symbolic link \"%s\": %m",
+ linkloc)));
+
+ pfree(ti->oid);
+ pfree(ti->path);
+ pfree(ti);
+ }
+
+ /* set flag to delete it later */
+ haveTblspcMap = true;
+ }
+
/* set flag to delete it later */
haveBackupLabel = true;
}
@@ -6198,6 +6247,20 @@ StartupXLOG(void)
}
/*
+ * Clear out any old relcache cache files. This is *necessary* if we do
+ * any WAL replay, since that would probably result in the cache files
+ * being out of sync with database reality. In theory we could leave them
+ * in place if the database had been cleanly shut down, but it seems
+ * safest to just remove them always and let them be rebuilt during the
+ * first backend startup. These files needs to be removed from all
+ * directories including pg_tblspc, however the symlinks are created
+ * only after reading tablesapce_map file in case of archive recovery
+ * from backup, so needs to clear old relcache files here after creating
+ * symlinks.
+ */
+ RelationCacheInitFileRemove();
+
+ /*
* If the location of the checkpoint record is not on the expected
* timeline in the history of the requested timeline, we cannot proceed:
* the backup is not part of the history of the requested timeline.
@@ -6466,6 +6529,23 @@ StartupXLOG(void)
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+ /*
+ * If there was a tablespace_map file, it's done its job and the
+ * symlinks have been created. We must get rid of the map file
+ * so that if we crash during recovery, we don't create symlinks
+ * again. It seems prudent though to just rename the file out of
+ * the way rather than delete it completely.
+ */
+ if (haveTblspcMap)
+ {
+ unlink(TABLESPACE_MAP_OLD);
+ if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not rename file \"%s\" to \"%s\": %m",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
+
/* Check that the GUCs used to generate the WAL allow recovery */
CheckRequiredParameterValues();
@@ -9610,16 +9690,27 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*
* There are two kind of backups: exclusive and non-exclusive. An exclusive
* backup is started with pg_start_backup(), and there can be only one active
- * at a time. The backup label file of an exclusive backup is written to
- * $PGDATA/backup_label, and it is removed by pg_stop_backup().
+ * at a time. The backup and tablespace map files of an exclusive backup are
+ * written to $PGDATA/backup_label and $PGDATA/tablespace_map, and they are
+ * removed by pg_stop_backup().
*
* A non-exclusive backup is used for the streaming base backups (see
* src/backend/replication/basebackup.c). The difference to exclusive backups
- * is that the backup label file is not written to disk. Instead, its would-be
- * contents are returned in *labelfile, and the caller is responsible for
- * including it in the backup archive as 'backup_label'. There can be many
- * non-exclusive backups active at the same time, and they don't conflict
- * with an exclusive backup either.
+ * is that the backup label and tablespace map files are not written to disk.
+ * Instead, there would-be contents are returned in *labelfile and *tblspcmapfile,
+ * and the caller is responsible for including them in the backup archive as
+ * 'backup_label' and 'tablespace_map'. There can be many non-exclusive backups
+ * active at the same time, and they don't conflict with an exclusive backup
+ * either.
+ *
+ * tblspcmapfile is required mainly for tar format in windows as native windows
+ * utilities are not able to create symlinks while extracting files from tar.
+ * However for consistency, the same is used for all platforms.
+ *
+ * needtblspcmapfile is true for the cases (exclusive backup and for
+ * non-exclusive backup only when tar format is used for taking backup)
+ * when backup needs to generate tablespace_map file, it is used to
+ * embed escape character before newline character in tablespace path.
*
* Returns the minimum WAL position that must be present to restore from this
* backup, and the corresponding timeline ID in *starttli_p.
@@ -9632,7 +9723,9 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*/
XLogRecPtr
do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
- char **labelfile)
+ char **labelfile, DIR *tblspcdir, List **tablespaces,
+ char **tblspcmapfile, bool infotbssize,
+ bool needtblspcmapfile)
{
bool exclusive = (labelfile == NULL);
bool backup_started_in_recovery = false;
@@ -9646,6 +9739,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
struct stat stat_buf;
FILE *fp;
StringInfoData labelfbuf;
+ StringInfoData tblspc_mapfbuf;
backup_started_in_recovery = RecoveryInProgress();
@@ -9717,6 +9811,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
{
bool gotUniqueStartpoint = false;
+ struct dirent *de;
+ tablespaceinfo *ti;
+ int datadirpathlen;
/*
* Force an XLOG file switch before the checkpoint, to ensure that the
@@ -9837,6 +9934,98 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
XLogFileName(xlogfilename, ThisTimeLineID, _logSegNo);
/*
+ * Construct tablespace_map file
+ */
+ initStringInfo(&tblspc_mapfbuf);
+
+ datadirpathlen = strlen(DataDir);
+
+ /* Collect information about all tablespaces */
+ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+ {
+ char fullpath[MAXPGPATH];
+ char linkpath[MAXPGPATH];
+ char *relpath = NULL;
+ int rllen;
+ StringInfoData buflinkpath;
+ char *s = linkpath;
+
+ /* Skip special stuff */
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+#if defined(HAVE_READLINK) || defined(WIN32)
+ rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+ if (rllen < 0)
+ {
+ ereport(WARNING,
+ (errmsg("could not read symbolic link \"%s\": %m",
+ fullpath)));
+ continue;
+ }
+ else if (rllen >= sizeof(linkpath))
+ {
+ ereport(WARNING,
+ (errmsg("symbolic link \"%s\" target is too long",
+ fullpath)));
+ continue;
+ }
+ linkpath[rllen] = '\0';
+
+ /*
+ * Add the escape character '\\' before newline in a string
+ * to ensure that we can distinguish between the newline in
+ * the tablespace path and end of line while reading
+ * tablespace_map file during archive recovery.
+ */
+ initStringInfo(&buflinkpath);
+
+ while (*s)
+ {
+ if ((*s == '\n' || *s == '\r') && needtblspcmapfile)
+ appendStringInfoChar(&buflinkpath, '\\');
+ appendStringInfoChar(&buflinkpath, *s++);
+ }
+
+
+ /*
+ * Relpath holds the relative path of the tablespace directory
+ * when it's located within PGDATA, or NULL if it's located
+ * elsewhere.
+ */
+ if (rllen > datadirpathlen &&
+ strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+ IS_DIR_SEP(linkpath[datadirpathlen]))
+ relpath = linkpath + datadirpathlen + 1;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(de->d_name);
+ ti->path = pstrdup(buflinkpath.data);
+ ti->rpath = relpath ? pstrdup(relpath) : NULL;
+ ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+
+ if(tablespaces)
+ *tablespaces = lappend(*tablespaces, ti);
+
+ appendStringInfo(&tblspc_mapfbuf, "%s %s\n", ti->oid, ti->path);
+
+ pfree(buflinkpath.data);
+#else
+
+ /*
+ * If the platform does not have symbolic links, it should not be
+ * possible to have tablespaces - clearly somebody else created
+ * them. Warn about it and ignore.
+ */
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+#endif
+ }
+
+ /*
* Construct backup label file
*/
initStringInfo(&labelfbuf);
@@ -9899,9 +10088,51 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
errmsg("could not write file \"%s\": %m",
BACKUP_LABEL_FILE)));
pfree(labelfbuf.data);
+
+ /* Write backup tablespace_map file. */
+ if (tblspc_mapfbuf.len > 0)
+ {
+ if (stat(TABLESPACE_MAP, &stat_buf) != 0)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ TABLESPACE_MAP)));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("a backup is already in progress"),
+ errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
+ TABLESPACE_MAP)));
+
+ fp = AllocateFile(TABLESPACE_MAP, "w");
+
+ if (!fp)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m",
+ TABLESPACE_MAP)));
+ if (fwrite(tblspc_mapfbuf.data, tblspc_mapfbuf.len, 1, fp) != 1 ||
+ fflush(fp) != 0 ||
+ pg_fsync(fileno(fp)) != 0 ||
+ ferror(fp) ||
+ FreeFile(fp))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ TABLESPACE_MAP)));
+ }
+
+ pfree(tblspc_mapfbuf.data);
}
else
+ {
*labelfile = labelfbuf.data;
+ if (tblspc_mapfbuf.len > 0)
+ *tblspcmapfile = tblspc_mapfbuf.data;
+ }
}
PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
@@ -10072,6 +10303,12 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
BACKUP_LABEL_FILE)));
+
+ /*
+ * Remove tablespace_map file if present, it is created
+ * only if there are tablespaces.
+ */
+ unlink(TABLESPACE_MAP);
}
/*
@@ -10472,6 +10709,86 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
}
/*
+ * read_tablespace_map: check to see if a tablespace_map file is present
+ *
+ * If we see a tablespace_map file during recovery, we assume that we are
+ * recovering from a backup dump file, and we therefore need to create symlinks
+ * as per the information present in tablespace_map file.
+ *
+ * Returns TRUE if a tablespace_map file was found (and fills the link
+ * information for all the tablespace links present in file); returns FALSE
+ * if not.
+ */
+static bool
+read_tablespace_map(List **tablespaces)
+{
+ tablespaceinfo *ti;
+ FILE *lfp;
+ char tbsoid[MAXPGPATH];
+ char *tbslinkpath;
+ char str[MAXPGPATH];
+ int ch, prev_ch = -1,
+ i = 0, n;
+
+ /*
+ * See if tablespace_map file is present
+ */
+ lfp = AllocateFile(TABLESPACE_MAP, "r");
+ if (!lfp)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ TABLESPACE_MAP)));
+ return false; /* it's not there, all is fine */
+ }
+
+ /*
+ * Read and parse the link name and path lines from tablespace_map file
+ * (this code is pretty crude, but we are not expecting any variability
+ * in the file format). While taking backup we embed escape character
+ * '\\' before newline in tablespace path, so that during reading of
+ * tablespace_map file, we could distinguish newline in tablespace path
+ * and end of line. Now while reading tablespace_map file, remove the
+ * escape character that has been added in tablespace path during backup.
+ */
+ while ((ch = fgetc(lfp)) != EOF)
+ {
+ if ((ch == '\n' || ch == '\r') && prev_ch != '\\')
+ {
+ str[i] = '\0';
+ if (sscanf(str, "%s %n", tbsoid, &n) != 1)
+ ereport(FATAL,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
+ tbslinkpath = str + n;
+ i = 0;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(tbsoid);
+ ti->path = pstrdup(tbslinkpath);
+
+ *tablespaces = lappend(*tablespaces, ti);
+ continue;
+ }
+ else if ((ch == '\n' || ch == '\r') && prev_ch == '\\')
+ str[i-1] = ch;
+ else
+ str[i++] = ch;
+ prev_ch = ch;
+ }
+
+ if (ferror(lfp) || FreeFile(lfp))
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ TABLESPACE_MAP)));
+
+ return true;
+}
+
+/*
* Error context callback for errors occurring during rm_redo().
*/
static void
@@ -10502,11 +10819,16 @@ BackupInProgress(void)
}
/*
- * CancelBackup: rename the "backup_label" file to cancel backup mode
+ * CancelBackup: rename the "backup_label" and "tablespace_map"
+ * files to cancel backup mode
*
* If the "backup_label" file exists, it will be renamed to "backup_label.old".
- * Note that this will render an online backup in progress useless.
- * To correctly finish an online backup, pg_stop_backup must be called.
+ * Similarly, if the "tablespace_map" file exists, it will be renamed to
+ * "tablespace_map.old".
+ *
+ * Note that this will render an online backup in progress
+ * useless. To correctly finish an online backup, pg_stop_backup must be
+ * called.
*/
void
CancelBackup(void)
@@ -10535,6 +10857,29 @@ CancelBackup(void)
errdetail("Could not rename \"%s\" to \"%s\": %m.",
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+
+ /* if the tablespace_map file is not there, return */
+ if (stat(TABLESPACE_MAP, &stat_buf) < 0)
+ return;
+
+ /* remove leftover file from previously canceled backup if it exists */
+ unlink(TABLESPACE_MAP_OLD);
+
+ if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) == 0)
+ {
+ ereport(LOG,
+ (errmsg("online backup mode canceled"),
+ errdetail("\"%s\" was renamed to \"%s\".",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
+ else
+ {
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("online backup mode was not canceled"),
+ errdetail("Could not rename \"%s\" to \"%s\": %m.",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
}
/*
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 2179bf7..329bb8c 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -51,6 +51,7 @@ pg_start_backup(PG_FUNCTION_ARGS)
bool fast = PG_GETARG_BOOL(1);
char *backupidstr;
XLogRecPtr startpoint;
+ DIR *dir;
backupidstr = text_to_cstring(backupid);
@@ -59,7 +60,16 @@ pg_start_backup(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser or replication role to run a backup")));
- startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL);
+ /* Make sure we can open the directory with tablespaces in it */
+ dir = AllocateDir("pg_tblspc");
+ if (!dir)
+ ereport(ERROR,
+ (errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
+
+ startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL,
+ dir, NULL, NULL, false, true);
+
+ FreeDir(dir);
PG_RETURN_LSN(startpoint);
}
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index de103c6..b341ff6 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -46,11 +46,12 @@ typedef struct
bool nowait;
bool includewal;
uint32 maxrate;
+ bool sendtblspcmapfile;
} basebackup_options;
-static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces);
-static int64 sendTablespace(char *path, bool sizeonly);
+static int64 sendDir(char *path, int basepathlen, bool sizeonly,
+ List *tablespaces, bool sendtblspclinks);
static bool sendFile(char *readfilename, char *tarfilename,
struct stat * statbuf, bool missing_ok);
static void sendFileWithContent(const char *filename, const char *content);
@@ -93,15 +94,6 @@ static int64 elapsed_min_unit;
/* The last check of the transfer rate. */
static int64 throttled_last;
-typedef struct
-{
- char *oid;
- char *path;
- char *rpath; /* relative path within PGDATA, or NULL */
- int64 size;
-} tablespaceinfo;
-
-
/*
* Called when ERROR or FATAL happens in perform_base_backup() after
* we have started the backup - make sure we end it!
@@ -126,14 +118,18 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
XLogRecPtr endptr;
TimeLineID endtli;
char *labelfile;
+ char *tblspc_map_file = NULL;
int datadirpathlen;
+ List *tablespaces = NIL;
datadirpathlen = strlen(DataDir);
backup_started_in_recovery = RecoveryInProgress();
startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
- &labelfile);
+ &labelfile, tblspcdir, &tablespaces,
+ &tblspc_map_file,
+ opt->progress, opt->sendtblspcmapfile);
/*
* Once do_pg_start_backup has been called, ensure that any failure causes
* us to abort the backup so we don't "leak" a backup counter. For this reason,
@@ -143,9 +139,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
{
- List *tablespaces = NIL;
ListCell *lc;
- struct dirent *de;
tablespaceinfo *ti;
SendXlogRecPtrResult(startptr, starttli);
@@ -162,70 +156,9 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
else
statrelpath = pgstat_stat_directory;
- /* Collect information about all tablespaces */
- while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
- {
- char fullpath[MAXPGPATH];
- char linkpath[MAXPGPATH];
- char *relpath = NULL;
- int rllen;
-
- /* Skip special stuff */
- if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
- continue;
-
- snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-#if defined(HAVE_READLINK) || defined(WIN32)
- rllen = readlink(fullpath, linkpath, sizeof(linkpath));
- if (rllen < 0)
- {
- ereport(WARNING,
- (errmsg("could not read symbolic link \"%s\": %m",
- fullpath)));
- continue;
- }
- else if (rllen >= sizeof(linkpath))
- {
- ereport(WARNING,
- (errmsg("symbolic link \"%s\" target is too long",
- fullpath)));
- continue;
- }
- linkpath[rllen] = '\0';
-
- /*
- * Relpath holds the relative path of the tablespace directory
- * when it's located within PGDATA, or NULL if it's located
- * elsewhere.
- */
- if (rllen > datadirpathlen &&
- strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
- IS_DIR_SEP(linkpath[datadirpathlen]))
- relpath = linkpath + datadirpathlen + 1;
-
- ti = palloc(sizeof(tablespaceinfo));
- ti->oid = pstrdup(de->d_name);
- ti->path = pstrdup(linkpath);
- ti->rpath = relpath ? pstrdup(relpath) : NULL;
- ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
- tablespaces = lappend(tablespaces, ti);
-#else
-
- /*
- * If the platform does not have symbolic links, it should not be
- * possible to have tablespaces - clearly somebody else created
- * them. Warn about it and ignore.
- */
- ereport(WARNING,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("tablespaces are not supported on this platform")));
-#endif
- }
-
/* Add a node for the base directory at the end */
ti = palloc0(sizeof(tablespaceinfo));
- ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
+ ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
tablespaces = lappend(tablespaces, ti);
/* Send tablespace header */
@@ -274,8 +207,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* In the main tar, include the backup_label first... */
sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
- /* ... then the bulk of the files ... */
- sendDir(".", 1, false, tablespaces);
+ /*
+ * Send tablespace_map file if required and then the bulk of
+ * the files.
+ */
+ if (tblspc_map_file && opt->sendtblspcmapfile)
+ {
+ sendFileWithContent(TABLESPACE_MAP, tblspc_map_file);
+ sendDir(".", 1, false, tablespaces, false);
+ }
+ else
+ sendDir(".", 1, false, tablespaces, true);
/* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -567,6 +509,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_nowait = false;
bool o_wal = false;
bool o_maxrate = false;
+ bool o_tablespace_map = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -637,6 +580,15 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->maxrate = (uint32) maxrate;
o_maxrate = true;
}
+ else if (strcmp(defel->defname, "tablespace_map") == 0)
+ {
+ if (o_tablespace_map)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ opt->sendtblspcmapfile = true;
+ o_tablespace_map = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -865,7 +817,7 @@ sendFileWithContent(const char *filename, const char *content)
*
* Only used to send auxiliary tablespaces, not PGDATA.
*/
-static int64
+int64
sendTablespace(char *path, bool sizeonly)
{
int64 size;
@@ -899,7 +851,7 @@ sendTablespace(char *path, bool sizeonly)
size = 512; /* Size of the header just added */
/* Send all the files in the tablespace version directory */
- size += sendDir(pathbuf, strlen(path), sizeonly, NIL);
+ size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
return size;
}
@@ -911,9 +863,14 @@ sendTablespace(char *path, bool sizeonly)
*
* Omit any directory in the tablespaces list, to avoid backing up
* tablespaces twice when they were created inside PGDATA.
+ *
+ * If sendtblspclinks is true, we need to include symlink
+ * information in the tar file. If not, we can skip that
+ * as it will be sent separately in the tablespace_map file.
*/
static int64
-sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
+sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
+ bool sendtblspclinks)
{
DIR *dir;
struct dirent *de;
@@ -941,13 +898,17 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
continue;
/*
- * If there's a backup_label file, it belongs to a backup started by
- * the user with pg_start_backup(). It is *not* correct for this
- * backup, our backup_label is injected into the tar separately.
+ * If there's a backup_label or tablespace_map file, it belongs to a
+ * backup started by the user with pg_start_backup(). It is *not*
+ * correct for this backup, our backup_label/tablespace_map is injected
+ * into the tar separately.
*/
if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
continue;
+ if (strcmp(de->d_name, TABLESPACE_MAP) == 0)
+ continue;
+
/*
* Check if the postmaster has signaled us to exit, and abort with an
* error in that case. The error handler further up will call
@@ -1120,8 +1081,15 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
break;
}
}
+
+ /*
+ * skip sending directories inside pg_tblspc, if not required.
+ */
+ if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks)
+ skip_this_dir = true;
+
if (!skip_this_dir)
- size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces);
+ size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
}
else if (S_ISREG(statbuf.st_mode))
{
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 7d6d154..6a5ecc5 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -71,6 +71,7 @@ Node *replication_parse_result;
%token K_NOWAIT
%token K_MAX_RATE
%token K_WAL
+%token K_TABLESPACE_MAP
%token K_TIMELINE
%token K_PHYSICAL
%token K_LOGICAL
@@ -119,7 +120,8 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT]
+ * [MAX_RATE %d] [TABLESPACE_MAP]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -168,6 +170,11 @@ base_backup_opt:
$$ = makeDefElem("max_rate",
(Node *)makeInteger($2));
}
+ | K_TABLESPACE_MAP
+ {
+ $$ = makeDefElem("tablespace_map",
+ (Node *)makeInteger(TRUE));
+ }
;
create_replication_slot:
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index f8acb66..056cc14 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -88,6 +88,7 @@ NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
+TABLESPACE_MAP { return K_TABLESPACE_MAP; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
CREATE_REPLICATION_SLOT { return K_CREATE_REPLICATION_SLOT; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 83bf2f5..2d0ea7b 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -1652,13 +1652,14 @@ BaseBackup(void)
maxrate_clause = psprintf("MAX_RATE %u", maxrate);
basebkp =
- psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
+ psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
includewal ? "NOWAIT" : "",
- maxrate_clause ? maxrate_clause : "");
+ maxrate_clause ? maxrate_clause : "",
+ format == 't' ? "TABLESPACE_MAP": "");
if (PQsendQuery(conn, basebkp) == 0)
{
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index f08b676..961e050 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -17,6 +17,8 @@
#include "access/xlogreader.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
+#include "nodes/pg_list.h"
+#include "storage/fd.h"
/* Sync methods */
@@ -258,7 +260,9 @@ extern void assign_checkpoint_completion_target(double newval, void *extra);
* Starting/stopping a base backup
*/
extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
- TimeLineID *starttli_p, char **labelfile);
+ TimeLineID *starttli_p, char **labelfile, DIR *tblspcdir,
+ List **tablespaces, char **tblspcmapfile, bool infotbssize,
+ bool needtblspcmapfile);
extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
TimeLineID *stoptli_p);
extern void do_pg_abort_backup(void);
@@ -267,4 +271,7 @@ extern void do_pg_abort_backup(void);
#define BACKUP_LABEL_FILE "backup_label"
#define BACKUP_LABEL_OLD "backup_label.old"
+#define TABLESPACE_MAP "tablespace_map"
+#define TABLESPACE_MAP_OLD "tablespace_map.old"
+
#endif /* XLOG_H */
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 64f2bd5..7d3d09e 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -21,6 +21,16 @@
#define MAX_RATE_UPPER 1048576
+typedef struct
+{
+ char *oid;
+ char *path;
+ char *rpath; /* relative path within PGDATA, or NULL */
+ int64 size;
+} tablespaceinfo;
+
extern void SendBaseBackup(BaseBackupCmd *cmd);
+extern int64 sendTablespace(char *path, bool sizeonly);
+
#endif /* _BASEBACKUP_H */
On Tue, May 12, 2015 at 5:50 AM, Andrew Dunstan <andrew@dunslane.net> wrote:
On 05/11/2015 02:02 AM, Amit Kapila wrote:
On Sun, May 10, 2015 at 6:01 AM, Andrew Dunstan <andrew@dunslane.net
<mailto:andrew@dunslane.net>> wrote:
This generally looks good, but I have a couple of questions before I
commit it.
First, why is the new option for the BASE_BACKUP command of the
Streaming Replication protcol "TAR"? It seems rather misleading. Shouldn't
it be something like "TABLESPACEMAP"?
The reason to keep new option's name as TAR was that tablespace_map
was generated for that format type, but I agree with you that something
like "TABLESPACEMAP" suits better, so I have changed it to
"TABLESPACE_MAP". Putting '_' in name makes it somewhat consistent
with other names and filename it generates with this new option.Second, these lines in xlog.c seem wrong:
else if ((ch == '\n' || ch == '\r') && prev_ch == '\\')
str[i-1] = '\n';It looks to me like we should be putting ch in the string, not
arbitrarily transforming \r into \n.
You are right, I have changed it as per your suggestion.
OK, I have cleaned this up a bit - I had already started so I didn't take
your latest patch but instead applied relevant changes to my changeset.
Here is my latest version.
In testing I notice that now "pg_baseback -F t" leaves it completely up
to the user on all platforms to create the relevant links in pg_tblspc/. It
includes the tablespace_map file in base.tar, but that's really just
informational.
Sorry, but I am not able to follow your point. User don't need to create
the relevant links, they will get created during first startup (during
recovery)
from the backup. I have tested and it works both on Windows and Linux.
Refer below code of patch in xlog.c
+
+ /* read the tablespace_map file if present and create symlinks. */
+ if (read_tablespace_map(&tablespaces))
+ {
..
I think we need to add something to the pg_basebackup docs about that, at
the very least (and it will also need to be a release note item.)
Currently, below lines in patch suggest that this file is required for
recovery.
Do you expect more information to be added?
+ These files are not merely for your information; their presence and
+ contents are critical to the proper operation of the system's recovery
+ process.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
On 05/11/2015 11:01 PM, Amit Kapila wrote:
On Tue, May 12, 2015 at 5:50 AM, Andrew Dunstan <andrew@dunslane.net
<mailto:andrew@dunslane.net>> wrote:On 05/11/2015 02:02 AM, Amit Kapila wrote:
On Sun, May 10, 2015 at 6:01 AM, Andrew Dunstan
<andrew@dunslane.net <mailto:andrew@dunslane.net>
<mailto:andrew@dunslane.net <mailto:andrew@dunslane.net>>> wrote:This generally looks good, but I have a couple of questions
before I commit it.
First, why is the new option for the BASE_BACKUP command of the
Streaming Replication protcol "TAR"? It seems rather misleading.
Shouldn't it be something like "TABLESPACEMAP"?The reason to keep new option's name as TAR was that tablespace_map
was generated for that format type, but I agree with you that something
like "TABLESPACEMAP" suits better, so I have changed it to
"TABLESPACE_MAP". Putting '_' in name makes it somewhat consistent
with other names and filename it generates with this new option.Second, these lines in xlog.c seem wrong:
else if ((ch == '\n' || ch == '\r') && prev_ch == '\\')
str[i-1] = '\n';It looks to me like we should be putting ch in the string, not
arbitrarily transforming \r into \n.
You are right, I have changed it as per your suggestion.
OK, I have cleaned this up a bit - I had already started so I didn't
take your latest patch but instead applied relevant changes to my
changeset. Here is my latest version.In testing I notice that now "pg_baseback -F t" leaves it completely
up to the user on all platforms to create the relevant links in
pg_tblspc/. It includes the tablespace_map file in base.tar, but
that's really just informational.Sorry, but I am not able to follow your point. User don't need to create
the relevant links, they will get created during first startup (during
recovery)
from the backup. I have tested and it works both on Windows and Linux.Refer below code of patch in xlog.c
+ + /* read the tablespace_map file if present and create symlinks. */ + if (read_tablespace_map(&tablespaces)) + { ..I think we need to add something to the pg_basebackup docs about
that, at the very least (and it will also need to be a release note item.)
Currently, below lines in patch suggest that this file is required for
recovery.
Do you expect more information to be added?+ These files are not merely for your information; their presence and + contents are critical to the proper operation of the system's recovery + process.
Yes, sorry, I had a moment of brain fade yesterday. However, I think
we're a bit under-documented on the pg_basebackup page, regarding both
tar mode and tablespace_map (which isn't even mentioned).
And there is this which I noticed this morning: the --tablespace-mapping
feature of pg_basebackup seems to be quite broken in --format=tar mode -
it simply has no effect AFAICT. I assume it was broken before, but we
should either fix it (possibly hard) or disallow the combination (which
would be a pity).
I'm going to go ahead and commit this in the state I have it now,
because for the most part these are preexisting deficiencies.
cheers
andrew
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 05/12/2015 08:35 AM, Andrew Dunstan wrote:
Yes, sorry, I had a moment of brain fade yesterday. However, I think
we're a bit under-documented on the pg_basebackup page, regarding both
tar mode and tablespace_map (which isn't even mentioned).And there is this which I noticed this morning: the
--tablespace-mapping feature of pg_basebackup seems to be quite broken
in --format=tar mode - it simply has no effect AFAICT. I assume it was
broken before, but we should either fix it (possibly hard) or disallow
the combination (which would be a pity).I'm going to go ahead and commit this in the state I have it now,
because for the most part these are preexisting deficiencies.
One more thing: I think pg_basebackup will now not work in tar mode with
pre-9.5 servers, since it will be using an unrecognized option of the
BASE_BACKUP protocol command. If so that certainly needs to be
documented and release noted.
cheers
andrew
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Tue, May 12, 2015 at 6:37 PM, Andrew Dunstan <andrew@dunslane.net> wrote:
On 05/12/2015 08:35 AM, Andrew Dunstan wrote:
Yes, sorry, I had a moment of brain fade yesterday. However, I think
we're a bit under-documented on the pg_basebackup page, regarding both tar
mode and tablespace_map (which isn't even mentioned).
And there is this which I noticed this morning: the --tablespace-mapping
feature of pg_basebackup seems to be quite broken in --format=tar mode - it
simply has no effect AFAICT. I assume it was broken before, but we should
either fix it (possibly hard) or disallow the combination (which would be a
pity).
I'm going to go ahead and commit this in the state I have it now,
because for the most part these are preexisting deficiencies.
One more thing: I think pg_basebackup will now not work in tar mode with
pre-9.5 servers, since it will be using an unrecognized option of the
BASE_BACKUP protocol command. If so that certainly needs to be documented
and release noted.
Yes, thats true and I have added the same in docs, updated
patch attached.
As a side note, I think we should have added the same for
--max-rate= option introduced in 9.4.
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com
Attachments:
backup_tablespace_fix-ad-v2.patchapplication/octet-stream; name=backup_tablespace_fix-ad-v2.patchDownload
diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml
index e25e0d0..def43a2 100644
--- a/doc/src/sgml/backup.sgml
+++ b/doc/src/sgml/backup.sgml
@@ -836,8 +836,11 @@ SELECT pg_start_backup('label');
<function>pg_start_backup</> creates a <firstterm>backup label</> file,
called <filename>backup_label</>, in the cluster directory with
information about your backup, including the start time and label
- string. The file is critical to the integrity of the backup, should
- you need to restore from it.
+ string. The function also creates a <firstterm>tablespace map</> file,
+ called <filename>tablespace_map</>, in the cluster directory with
+ information about tablespace symbolic links in <filename>pg_tblspc/</>
+ if one or more such link is present. Both files are critical to the
+ integrity of the backup, should you need to restore from it.
</para>
<para>
@@ -965,17 +968,20 @@ SELECT pg_stop_backup();
<para>
It's also worth noting that the <function>pg_start_backup</> function
- makes a file named <filename>backup_label</> in the database cluster
- directory, which is removed by <function>pg_stop_backup</>.
- This file will of course be archived as a part of your backup dump file.
- The backup label file includes the label string you gave to
- <function>pg_start_backup</>, as well as the time at which
- <function>pg_start_backup</> was run, and the name of the starting WAL
- file. In case of confusion it is therefore possible to look inside a
- backup dump file and determine exactly which backup session the dump file
- came from. However, this file is not merely for your information; its
- presence and contents are critical to the proper operation of the system's
- recovery process.
+ makes files named <filename>backup_label</> and
+ <filename>tablesapce_map</> in the database cluster directory,
+ which are removed by <function>pg_stop_backup</>. These files will of
+ course be archived as a part of your backup dump file. The backup label
+ file includes the label string you gave to <function>pg_start_backup</>,
+ as well as the time at which <function>pg_start_backup</> was run, and
+ the name of the starting WAL file. In case of confusion it is therefore
+ possible to look inside a backup dump file and determine exactly which
+ backup session the dump file came from. The tablespace map file includes
+ the symbolic link names as they exist in the directory
+ <filename>pg_tblspc/</> and the full path of each symbolic link.
+ These files are not merely for your information; their presence and
+ contents are critical to the proper operation of the system's recovery
+ process.
</para>
<para>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 1ee4f63..bf8d72e 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -16591,11 +16591,12 @@ SELECT set_config('log_statement_stats', 'off', false);
<function>pg_start_backup</> accepts an
arbitrary user-defined label for the backup. (Typically this would be
the name under which the backup dump file will be stored.) The function
- writes a backup label file (<filename>backup_label</>) into the
- database cluster's data directory, performs a checkpoint,
- and then returns the backup's starting transaction log location as text.
- The user can ignore this result value, but it is
- provided in case it is useful.
+ writes a backup label file (<filename>backup_label</>) and, if there
+ are any links in the <filename>pg_tblspc/</> directory, a tablespace map
+ file (<filename>tablespace_map</>) into the database cluster's data
+ directory, performs a checkpoint, and then returns the backup's starting
+ transaction log location as text. The user can ignore this result value,
+ but it is provided in case it is useful.
<programlisting>
postgres=# select pg_start_backup('label_goes_here');
pg_start_backup
@@ -16610,7 +16611,8 @@ postgres=# select pg_start_backup('label_goes_here');
</para>
<para>
- <function>pg_stop_backup</> removes the label file created by
+ <function>pg_stop_backup</> removes the label file and, if it exists,
+ the <filename>tablespace_map</> file created by
<function>pg_start_backup</>, and creates a backup history file in
the transaction log archive area. The history file includes the label given to
<function>pg_start_backup</>, the starting and ending transaction log locations for
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index ac13d32..d985204 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1882,7 +1882,7 @@ The commands accepted in walsender mode are:
</varlistentry>
<varlistentry>
- <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal> <replaceable>rate</replaceable>]
+ <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal> <replaceable>rate</replaceable>] [<literal>TABLESPACE_MAP</literal>]
<indexterm><primary>BASE_BACKUP</primary></indexterm>
</term>
<listitem>
@@ -1968,6 +1968,19 @@ The commands accepted in walsender mode are:
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><literal>TABLESPACE_MAP</literal></term>
+ <listitem>
+ <para>
+ Include information about symbolic links present in the directory
+ <filename>pg_tblspc</filename> in a file named
+ <filename>tablespace_map</filename>. The tablespace map file includes
+ each symbolic link name as it exists in the directory
+ <filename>pg_tblspc/</> and the full path of that symbolic link.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</para>
<para>
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 642fccf..2ef8c2b 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -590,7 +590,8 @@ PostgreSQL documentation
<para>
<application>pg_basebackup</application> works with servers of the same
or an older major version, down to 9.1. However, WAL streaming mode (-X
- stream) only works with server version 9.3 and later.
+ stream) only works with server version 9.3 and later, tar format (-F
+ tar) only works with server 9.5 and later.
</para>
</refsect1>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6f7e3bd..5f0551a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -42,6 +42,7 @@
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
+#include "replication/basebackup.h"
#include "replication/logical.h"
#include "replication/slot.h"
#include "replication/origin.h"
@@ -824,6 +825,8 @@ static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
static void pg_start_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired, bool *backupFromStandby);
+static bool read_tablespace_map(List **tablespaces);
+
static void rm_redo_error_callback(void *arg);
static int get_sync_bit(int method);
@@ -5917,6 +5920,7 @@ StartupXLOG(void)
bool wasShutdown;
bool reachedStopPoint = false;
bool haveBackupLabel = false;
+ bool haveTblspcMap = false;
XLogRecPtr RecPtr,
checkPointLoc,
EndOfLog;
@@ -6002,16 +6006,6 @@ StartupXLOG(void)
ValidateXLOGDirectoryStructure();
/*
- * Clear out any old relcache cache files. This is *necessary* if we do
- * any WAL replay, since that would probably result in the cache files
- * being out of sync with database reality. In theory we could leave them
- * in place if the database had been cleanly shut down, but it seems
- * safest to just remove them always and let them be rebuilt during the
- * first backend startup.
- */
- RelationCacheInitFileRemove();
-
- /*
* Initialize on the assumption we want to recover to the latest timeline
* that's active according to pg_control.
*/
@@ -6080,6 +6074,8 @@ StartupXLOG(void)
if (read_backup_label(&checkPointLoc, &backupEndRequired,
&backupFromStandby))
{
+ List *tablespaces = NIL;
+
/*
* Archive recovery was requested, and thanks to the backup label
* file, we know how far we need to replay to reach consistency. Enter
@@ -6124,6 +6120,59 @@ StartupXLOG(void)
errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
wasShutdown = false; /* keep compiler quiet */
}
+
+ /* read the tablespace_map file if present and create symlinks. */
+ if (read_tablespace_map(&tablespaces))
+ {
+ ListCell *lc;
+ struct stat st;
+
+ foreach(lc, tablespaces)
+ {
+ tablespaceinfo *ti = lfirst(lc);
+ char *linkloc;
+
+ linkloc = psprintf("pg_tblspc/%s", ti->oid);
+
+ /*
+ * Remove the existing symlink if any and Create the symlink
+ * under PGDATA. We need to use rmtree instead of rmdir as
+ * the link location might contain directories or files
+ * corresponding to the actual path. Some tar utilities do
+ * things that way while extracting symlinks.
+ */
+ if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode))
+ {
+ if (!rmtree(linkloc,true))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove directory \"%s\": %m",
+ linkloc)));
+ }
+ else
+ {
+ if (unlink(linkloc) < 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove symbolic link \"%s\": %m",
+ linkloc)));
+ }
+
+ if (symlink(ti->path, linkloc) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create symbolic link \"%s\": %m",
+ linkloc)));
+
+ pfree(ti->oid);
+ pfree(ti->path);
+ pfree(ti);
+ }
+
+ /* set flag to delete it later */
+ haveTblspcMap = true;
+ }
+
/* set flag to delete it later */
haveBackupLabel = true;
}
@@ -6198,6 +6247,20 @@ StartupXLOG(void)
}
/*
+ * Clear out any old relcache cache files. This is *necessary* if we do
+ * any WAL replay, since that would probably result in the cache files
+ * being out of sync with database reality. In theory we could leave them
+ * in place if the database had been cleanly shut down, but it seems
+ * safest to just remove them always and let them be rebuilt during the
+ * first backend startup. These files needs to be removed from all
+ * directories including pg_tblspc, however the symlinks are created
+ * only after reading tablesapce_map file in case of archive recovery
+ * from backup, so needs to clear old relcache files here after creating
+ * symlinks.
+ */
+ RelationCacheInitFileRemove();
+
+ /*
* If the location of the checkpoint record is not on the expected
* timeline in the history of the requested timeline, we cannot proceed:
* the backup is not part of the history of the requested timeline.
@@ -6466,6 +6529,23 @@ StartupXLOG(void)
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+ /*
+ * If there was a tablespace_map file, it's done its job and the
+ * symlinks have been created. We must get rid of the map file
+ * so that if we crash during recovery, we don't create symlinks
+ * again. It seems prudent though to just rename the file out of
+ * the way rather than delete it completely.
+ */
+ if (haveTblspcMap)
+ {
+ unlink(TABLESPACE_MAP_OLD);
+ if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) != 0)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not rename file \"%s\" to \"%s\": %m",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
+
/* Check that the GUCs used to generate the WAL allow recovery */
CheckRequiredParameterValues();
@@ -9610,16 +9690,27 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*
* There are two kind of backups: exclusive and non-exclusive. An exclusive
* backup is started with pg_start_backup(), and there can be only one active
- * at a time. The backup label file of an exclusive backup is written to
- * $PGDATA/backup_label, and it is removed by pg_stop_backup().
+ * at a time. The backup and tablespace map files of an exclusive backup are
+ * written to $PGDATA/backup_label and $PGDATA/tablespace_map, and they are
+ * removed by pg_stop_backup().
*
* A non-exclusive backup is used for the streaming base backups (see
* src/backend/replication/basebackup.c). The difference to exclusive backups
- * is that the backup label file is not written to disk. Instead, its would-be
- * contents are returned in *labelfile, and the caller is responsible for
- * including it in the backup archive as 'backup_label'. There can be many
- * non-exclusive backups active at the same time, and they don't conflict
- * with an exclusive backup either.
+ * is that the backup label and tablespace map files are not written to disk.
+ * Instead, there would-be contents are returned in *labelfile and *tblspcmapfile,
+ * and the caller is responsible for including them in the backup archive as
+ * 'backup_label' and 'tablespace_map'. There can be many non-exclusive backups
+ * active at the same time, and they don't conflict with an exclusive backup
+ * either.
+ *
+ * tblspcmapfile is required mainly for tar format in windows as native windows
+ * utilities are not able to create symlinks while extracting files from tar.
+ * However for consistency, the same is used for all platforms.
+ *
+ * needtblspcmapfile is true for the cases (exclusive backup and for
+ * non-exclusive backup only when tar format is used for taking backup)
+ * when backup needs to generate tablespace_map file, it is used to
+ * embed escape character before newline character in tablespace path.
*
* Returns the minimum WAL position that must be present to restore from this
* backup, and the corresponding timeline ID in *starttli_p.
@@ -9632,7 +9723,9 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
*/
XLogRecPtr
do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
- char **labelfile)
+ char **labelfile, DIR *tblspcdir, List **tablespaces,
+ char **tblspcmapfile, bool infotbssize,
+ bool needtblspcmapfile)
{
bool exclusive = (labelfile == NULL);
bool backup_started_in_recovery = false;
@@ -9646,6 +9739,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
struct stat stat_buf;
FILE *fp;
StringInfoData labelfbuf;
+ StringInfoData tblspc_mapfbuf;
backup_started_in_recovery = RecoveryInProgress();
@@ -9717,6 +9811,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
{
bool gotUniqueStartpoint = false;
+ struct dirent *de;
+ tablespaceinfo *ti;
+ int datadirpathlen;
/*
* Force an XLOG file switch before the checkpoint, to ensure that the
@@ -9837,6 +9934,98 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
XLogFileName(xlogfilename, ThisTimeLineID, _logSegNo);
/*
+ * Construct tablespace_map file
+ */
+ initStringInfo(&tblspc_mapfbuf);
+
+ datadirpathlen = strlen(DataDir);
+
+ /* Collect information about all tablespaces */
+ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+ {
+ char fullpath[MAXPGPATH];
+ char linkpath[MAXPGPATH];
+ char *relpath = NULL;
+ int rllen;
+ StringInfoData buflinkpath;
+ char *s = linkpath;
+
+ /* Skip special stuff */
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+#if defined(HAVE_READLINK) || defined(WIN32)
+ rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+ if (rllen < 0)
+ {
+ ereport(WARNING,
+ (errmsg("could not read symbolic link \"%s\": %m",
+ fullpath)));
+ continue;
+ }
+ else if (rllen >= sizeof(linkpath))
+ {
+ ereport(WARNING,
+ (errmsg("symbolic link \"%s\" target is too long",
+ fullpath)));
+ continue;
+ }
+ linkpath[rllen] = '\0';
+
+ /*
+ * Add the escape character '\\' before newline in a string
+ * to ensure that we can distinguish between the newline in
+ * the tablespace path and end of line while reading
+ * tablespace_map file during archive recovery.
+ */
+ initStringInfo(&buflinkpath);
+
+ while (*s)
+ {
+ if ((*s == '\n' || *s == '\r') && needtblspcmapfile)
+ appendStringInfoChar(&buflinkpath, '\\');
+ appendStringInfoChar(&buflinkpath, *s++);
+ }
+
+
+ /*
+ * Relpath holds the relative path of the tablespace directory
+ * when it's located within PGDATA, or NULL if it's located
+ * elsewhere.
+ */
+ if (rllen > datadirpathlen &&
+ strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+ IS_DIR_SEP(linkpath[datadirpathlen]))
+ relpath = linkpath + datadirpathlen + 1;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(de->d_name);
+ ti->path = pstrdup(buflinkpath.data);
+ ti->rpath = relpath ? pstrdup(relpath) : NULL;
+ ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+
+ if(tablespaces)
+ *tablespaces = lappend(*tablespaces, ti);
+
+ appendStringInfo(&tblspc_mapfbuf, "%s %s\n", ti->oid, ti->path);
+
+ pfree(buflinkpath.data);
+#else
+
+ /*
+ * If the platform does not have symbolic links, it should not be
+ * possible to have tablespaces - clearly somebody else created
+ * them. Warn about it and ignore.
+ */
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+#endif
+ }
+
+ /*
* Construct backup label file
*/
initStringInfo(&labelfbuf);
@@ -9899,9 +10088,51 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
errmsg("could not write file \"%s\": %m",
BACKUP_LABEL_FILE)));
pfree(labelfbuf.data);
+
+ /* Write backup tablespace_map file. */
+ if (tblspc_mapfbuf.len > 0)
+ {
+ if (stat(TABLESPACE_MAP, &stat_buf) != 0)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ TABLESPACE_MAP)));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("a backup is already in progress"),
+ errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
+ TABLESPACE_MAP)));
+
+ fp = AllocateFile(TABLESPACE_MAP, "w");
+
+ if (!fp)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m",
+ TABLESPACE_MAP)));
+ if (fwrite(tblspc_mapfbuf.data, tblspc_mapfbuf.len, 1, fp) != 1 ||
+ fflush(fp) != 0 ||
+ pg_fsync(fileno(fp)) != 0 ||
+ ferror(fp) ||
+ FreeFile(fp))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ TABLESPACE_MAP)));
+ }
+
+ pfree(tblspc_mapfbuf.data);
}
else
+ {
*labelfile = labelfbuf.data;
+ if (tblspc_mapfbuf.len > 0)
+ *tblspcmapfile = tblspc_mapfbuf.data;
+ }
}
PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
@@ -10072,6 +10303,12 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m",
BACKUP_LABEL_FILE)));
+
+ /*
+ * Remove tablespace_map file if present, it is created
+ * only if there are tablespaces.
+ */
+ unlink(TABLESPACE_MAP);
}
/*
@@ -10472,6 +10709,86 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
}
/*
+ * read_tablespace_map: check to see if a tablespace_map file is present
+ *
+ * If we see a tablespace_map file during recovery, we assume that we are
+ * recovering from a backup dump file, and we therefore need to create symlinks
+ * as per the information present in tablespace_map file.
+ *
+ * Returns TRUE if a tablespace_map file was found (and fills the link
+ * information for all the tablespace links present in file); returns FALSE
+ * if not.
+ */
+static bool
+read_tablespace_map(List **tablespaces)
+{
+ tablespaceinfo *ti;
+ FILE *lfp;
+ char tbsoid[MAXPGPATH];
+ char *tbslinkpath;
+ char str[MAXPGPATH];
+ int ch, prev_ch = -1,
+ i = 0, n;
+
+ /*
+ * See if tablespace_map file is present
+ */
+ lfp = AllocateFile(TABLESPACE_MAP, "r");
+ if (!lfp)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ TABLESPACE_MAP)));
+ return false; /* it's not there, all is fine */
+ }
+
+ /*
+ * Read and parse the link name and path lines from tablespace_map file
+ * (this code is pretty crude, but we are not expecting any variability
+ * in the file format). While taking backup we embed escape character
+ * '\\' before newline in tablespace path, so that during reading of
+ * tablespace_map file, we could distinguish newline in tablespace path
+ * and end of line. Now while reading tablespace_map file, remove the
+ * escape character that has been added in tablespace path during backup.
+ */
+ while ((ch = fgetc(lfp)) != EOF)
+ {
+ if ((ch == '\n' || ch == '\r') && prev_ch != '\\')
+ {
+ str[i] = '\0';
+ if (sscanf(str, "%s %n", tbsoid, &n) != 1)
+ ereport(FATAL,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
+ tbslinkpath = str + n;
+ i = 0;
+
+ ti = palloc(sizeof(tablespaceinfo));
+ ti->oid = pstrdup(tbsoid);
+ ti->path = pstrdup(tbslinkpath);
+
+ *tablespaces = lappend(*tablespaces, ti);
+ continue;
+ }
+ else if ((ch == '\n' || ch == '\r') && prev_ch == '\\')
+ str[i-1] = ch;
+ else
+ str[i++] = ch;
+ prev_ch = ch;
+ }
+
+ if (ferror(lfp) || FreeFile(lfp))
+ ereport(FATAL,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ TABLESPACE_MAP)));
+
+ return true;
+}
+
+/*
* Error context callback for errors occurring during rm_redo().
*/
static void
@@ -10502,11 +10819,16 @@ BackupInProgress(void)
}
/*
- * CancelBackup: rename the "backup_label" file to cancel backup mode
+ * CancelBackup: rename the "backup_label" and "tablespace_map"
+ * files to cancel backup mode
*
* If the "backup_label" file exists, it will be renamed to "backup_label.old".
- * Note that this will render an online backup in progress useless.
- * To correctly finish an online backup, pg_stop_backup must be called.
+ * Similarly, if the "tablespace_map" file exists, it will be renamed to
+ * "tablespace_map.old".
+ *
+ * Note that this will render an online backup in progress
+ * useless. To correctly finish an online backup, pg_stop_backup must be
+ * called.
*/
void
CancelBackup(void)
@@ -10535,6 +10857,29 @@ CancelBackup(void)
errdetail("Could not rename \"%s\" to \"%s\": %m.",
BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
}
+
+ /* if the tablespace_map file is not there, return */
+ if (stat(TABLESPACE_MAP, &stat_buf) < 0)
+ return;
+
+ /* remove leftover file from previously canceled backup if it exists */
+ unlink(TABLESPACE_MAP_OLD);
+
+ if (rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD) == 0)
+ {
+ ereport(LOG,
+ (errmsg("online backup mode canceled"),
+ errdetail("\"%s\" was renamed to \"%s\".",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
+ else
+ {
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("online backup mode was not canceled"),
+ errdetail("Could not rename \"%s\" to \"%s\": %m.",
+ TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+ }
}
/*
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 2179bf7..329bb8c 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -51,6 +51,7 @@ pg_start_backup(PG_FUNCTION_ARGS)
bool fast = PG_GETARG_BOOL(1);
char *backupidstr;
XLogRecPtr startpoint;
+ DIR *dir;
backupidstr = text_to_cstring(backupid);
@@ -59,7 +60,16 @@ pg_start_backup(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser or replication role to run a backup")));
- startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL);
+ /* Make sure we can open the directory with tablespaces in it */
+ dir = AllocateDir("pg_tblspc");
+ if (!dir)
+ ereport(ERROR,
+ (errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
+
+ startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL,
+ dir, NULL, NULL, false, true);
+
+ FreeDir(dir);
PG_RETURN_LSN(startpoint);
}
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index de103c6..b341ff6 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -46,11 +46,12 @@ typedef struct
bool nowait;
bool includewal;
uint32 maxrate;
+ bool sendtblspcmapfile;
} basebackup_options;
-static int64 sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces);
-static int64 sendTablespace(char *path, bool sizeonly);
+static int64 sendDir(char *path, int basepathlen, bool sizeonly,
+ List *tablespaces, bool sendtblspclinks);
static bool sendFile(char *readfilename, char *tarfilename,
struct stat * statbuf, bool missing_ok);
static void sendFileWithContent(const char *filename, const char *content);
@@ -93,15 +94,6 @@ static int64 elapsed_min_unit;
/* The last check of the transfer rate. */
static int64 throttled_last;
-typedef struct
-{
- char *oid;
- char *path;
- char *rpath; /* relative path within PGDATA, or NULL */
- int64 size;
-} tablespaceinfo;
-
-
/*
* Called when ERROR or FATAL happens in perform_base_backup() after
* we have started the backup - make sure we end it!
@@ -126,14 +118,18 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
XLogRecPtr endptr;
TimeLineID endtli;
char *labelfile;
+ char *tblspc_map_file = NULL;
int datadirpathlen;
+ List *tablespaces = NIL;
datadirpathlen = strlen(DataDir);
backup_started_in_recovery = RecoveryInProgress();
startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
- &labelfile);
+ &labelfile, tblspcdir, &tablespaces,
+ &tblspc_map_file,
+ opt->progress, opt->sendtblspcmapfile);
/*
* Once do_pg_start_backup has been called, ensure that any failure causes
* us to abort the backup so we don't "leak" a backup counter. For this reason,
@@ -143,9 +139,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
{
- List *tablespaces = NIL;
ListCell *lc;
- struct dirent *de;
tablespaceinfo *ti;
SendXlogRecPtrResult(startptr, starttli);
@@ -162,70 +156,9 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
else
statrelpath = pgstat_stat_directory;
- /* Collect information about all tablespaces */
- while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
- {
- char fullpath[MAXPGPATH];
- char linkpath[MAXPGPATH];
- char *relpath = NULL;
- int rllen;
-
- /* Skip special stuff */
- if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
- continue;
-
- snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-#if defined(HAVE_READLINK) || defined(WIN32)
- rllen = readlink(fullpath, linkpath, sizeof(linkpath));
- if (rllen < 0)
- {
- ereport(WARNING,
- (errmsg("could not read symbolic link \"%s\": %m",
- fullpath)));
- continue;
- }
- else if (rllen >= sizeof(linkpath))
- {
- ereport(WARNING,
- (errmsg("symbolic link \"%s\" target is too long",
- fullpath)));
- continue;
- }
- linkpath[rllen] = '\0';
-
- /*
- * Relpath holds the relative path of the tablespace directory
- * when it's located within PGDATA, or NULL if it's located
- * elsewhere.
- */
- if (rllen > datadirpathlen &&
- strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
- IS_DIR_SEP(linkpath[datadirpathlen]))
- relpath = linkpath + datadirpathlen + 1;
-
- ti = palloc(sizeof(tablespaceinfo));
- ti->oid = pstrdup(de->d_name);
- ti->path = pstrdup(linkpath);
- ti->rpath = relpath ? pstrdup(relpath) : NULL;
- ti->size = opt->progress ? sendTablespace(fullpath, true) : -1;
- tablespaces = lappend(tablespaces, ti);
-#else
-
- /*
- * If the platform does not have symbolic links, it should not be
- * possible to have tablespaces - clearly somebody else created
- * them. Warn about it and ignore.
- */
- ereport(WARNING,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("tablespaces are not supported on this platform")));
-#endif
- }
-
/* Add a node for the base directory at the end */
ti = palloc0(sizeof(tablespaceinfo));
- ti->size = opt->progress ? sendDir(".", 1, true, tablespaces) : -1;
+ ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
tablespaces = lappend(tablespaces, ti);
/* Send tablespace header */
@@ -274,8 +207,17 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* In the main tar, include the backup_label first... */
sendFileWithContent(BACKUP_LABEL_FILE, labelfile);
- /* ... then the bulk of the files ... */
- sendDir(".", 1, false, tablespaces);
+ /*
+ * Send tablespace_map file if required and then the bulk of
+ * the files.
+ */
+ if (tblspc_map_file && opt->sendtblspcmapfile)
+ {
+ sendFileWithContent(TABLESPACE_MAP, tblspc_map_file);
+ sendDir(".", 1, false, tablespaces, false);
+ }
+ else
+ sendDir(".", 1, false, tablespaces, true);
/* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -567,6 +509,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_nowait = false;
bool o_wal = false;
bool o_maxrate = false;
+ bool o_tablespace_map = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -637,6 +580,15 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->maxrate = (uint32) maxrate;
o_maxrate = true;
}
+ else if (strcmp(defel->defname, "tablespace_map") == 0)
+ {
+ if (o_tablespace_map)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ opt->sendtblspcmapfile = true;
+ o_tablespace_map = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -865,7 +817,7 @@ sendFileWithContent(const char *filename, const char *content)
*
* Only used to send auxiliary tablespaces, not PGDATA.
*/
-static int64
+int64
sendTablespace(char *path, bool sizeonly)
{
int64 size;
@@ -899,7 +851,7 @@ sendTablespace(char *path, bool sizeonly)
size = 512; /* Size of the header just added */
/* Send all the files in the tablespace version directory */
- size += sendDir(pathbuf, strlen(path), sizeonly, NIL);
+ size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
return size;
}
@@ -911,9 +863,14 @@ sendTablespace(char *path, bool sizeonly)
*
* Omit any directory in the tablespaces list, to avoid backing up
* tablespaces twice when they were created inside PGDATA.
+ *
+ * If sendtblspclinks is true, we need to include symlink
+ * information in the tar file. If not, we can skip that
+ * as it will be sent separately in the tablespace_map file.
*/
static int64
-sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
+sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
+ bool sendtblspclinks)
{
DIR *dir;
struct dirent *de;
@@ -941,13 +898,17 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
continue;
/*
- * If there's a backup_label file, it belongs to a backup started by
- * the user with pg_start_backup(). It is *not* correct for this
- * backup, our backup_label is injected into the tar separately.
+ * If there's a backup_label or tablespace_map file, it belongs to a
+ * backup started by the user with pg_start_backup(). It is *not*
+ * correct for this backup, our backup_label/tablespace_map is injected
+ * into the tar separately.
*/
if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
continue;
+ if (strcmp(de->d_name, TABLESPACE_MAP) == 0)
+ continue;
+
/*
* Check if the postmaster has signaled us to exit, and abort with an
* error in that case. The error handler further up will call
@@ -1120,8 +1081,15 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces)
break;
}
}
+
+ /*
+ * skip sending directories inside pg_tblspc, if not required.
+ */
+ if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks)
+ skip_this_dir = true;
+
if (!skip_this_dir)
- size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces);
+ size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
}
else if (S_ISREG(statbuf.st_mode))
{
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 7d6d154..6a5ecc5 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -71,6 +71,7 @@ Node *replication_parse_result;
%token K_NOWAIT
%token K_MAX_RATE
%token K_WAL
+%token K_TABLESPACE_MAP
%token K_TIMELINE
%token K_PHYSICAL
%token K_LOGICAL
@@ -119,7 +120,8 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT]
+ * [MAX_RATE %d] [TABLESPACE_MAP]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -168,6 +170,11 @@ base_backup_opt:
$$ = makeDefElem("max_rate",
(Node *)makeInteger($2));
}
+ | K_TABLESPACE_MAP
+ {
+ $$ = makeDefElem("tablespace_map",
+ (Node *)makeInteger(TRUE));
+ }
;
create_replication_slot:
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index f8acb66..056cc14 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -88,6 +88,7 @@ NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
+TABLESPACE_MAP { return K_TABLESPACE_MAP; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
CREATE_REPLICATION_SLOT { return K_CREATE_REPLICATION_SLOT; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 83bf2f5..2d0ea7b 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -1652,13 +1652,14 @@ BaseBackup(void)
maxrate_clause = psprintf("MAX_RATE %u", maxrate);
basebkp =
- psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
+ psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
includewal ? "NOWAIT" : "",
- maxrate_clause ? maxrate_clause : "");
+ maxrate_clause ? maxrate_clause : "",
+ format == 't' ? "TABLESPACE_MAP": "");
if (PQsendQuery(conn, basebkp) == 0)
{
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index f08b676..961e050 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -17,6 +17,8 @@
#include "access/xlogreader.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
+#include "nodes/pg_list.h"
+#include "storage/fd.h"
/* Sync methods */
@@ -258,7 +260,9 @@ extern void assign_checkpoint_completion_target(double newval, void *extra);
* Starting/stopping a base backup
*/
extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
- TimeLineID *starttli_p, char **labelfile);
+ TimeLineID *starttli_p, char **labelfile, DIR *tblspcdir,
+ List **tablespaces, char **tblspcmapfile, bool infotbssize,
+ bool needtblspcmapfile);
extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
TimeLineID *stoptli_p);
extern void do_pg_abort_backup(void);
@@ -267,4 +271,7 @@ extern void do_pg_abort_backup(void);
#define BACKUP_LABEL_FILE "backup_label"
#define BACKUP_LABEL_OLD "backup_label.old"
+#define TABLESPACE_MAP "tablespace_map"
+#define TABLESPACE_MAP_OLD "tablespace_map.old"
+
#endif /* XLOG_H */
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 64f2bd5..7d3d09e 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -21,6 +21,16 @@
#define MAX_RATE_UPPER 1048576
+typedef struct
+{
+ char *oid;
+ char *path;
+ char *rpath; /* relative path within PGDATA, or NULL */
+ int64 size;
+} tablespaceinfo;
+
extern void SendBaseBackup(BaseBackupCmd *cmd);
+extern int64 sendTablespace(char *path, bool sizeonly);
+
#endif /* _BASEBACKUP_H */