New option in pg_basebackup to exclude pg_log files during base backup
Hi all,
Following the discussion in message id - CAHGQGwFFMOr4EcugWHZpAaPYQbsEKDg66VmJ1rveJ6Z-EgaqAg@mail.gmail.com<mailto:CAHGQGwFFMOr4EcugWHZpAaPYQbsEKDg66VmJ1rveJ6Z-EgaqAg@mail.gmail.com> , I have developed the patch which gives option to user to exclude pg_log directory contents in pg_basebackup.
[Current situation]
During pg_basebackup, all files in pg_log directory will be copied to new backup directory.
[Design]
- Added new non-mandatory option "-S/--skip-log-dir" to pg_basebackup .
- If "skip-log-dir" is specified in pg_basebackup command, then in basebackup, exclude copying log files from standard "pg_log" directory and any other directory specified in Log_directory guc variable. (Still empty folder "pg_log"/$Log_directory will be created)
- In case, pg_log/$Log_directory is symbolic link, then an empty folder will be created
[Advantage]
It gives an option to user to avoid copying of large log files if they doesn't wish to and hence can save memory space.
Attached the patch.
Thanks & Regards,
Vaishnavi
Fujitsu Australia
Attachments:
pgbasebackup_excludes_pglog_v1.patchapplication/octet-stream; name=pgbasebackup_excludes_pglog_v1.patchDownload
*** git/doc/src/sgml/ref/pg_basebackup.sgml Wed Apr 9 09:37:30 2014
--- Git_code/postgresql/doc/src/sgml/ref/pg_basebackup.sgml Wed Mar 26 11:36:16 2014
***************
*** 349,354 ****
--- 349,365 ----
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><option>-S</option></term>
+ <term><option>--skip-log-dir</option></term>
+ <listitem>
+ <para>
+ Excludes the log files in the backup. This will exclude all the logs in pg_log directory and any other
+ log directory specified via postgresql.conf file settings.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</para>
<para>
*** git/src/backend/replication/basebackup.c Wed Apr 9 09:37:42 2014
--- Git_code/postgresql/src/backend/replication/basebackup.c Wed Apr 9 09:52:15 2014
***************
*** 36,42 ****
#include "utils/elog.h"
#include "utils/ps_status.h"
#include "utils/timestamp.h"
!
typedef struct
{
--- 36,42 ----
#include "utils/elog.h"
#include "utils/ps_status.h"
#include "utils/timestamp.h"
! extern PGDLLIMPORT char *Log_directory;
typedef struct
{
***************
*** 46,51 ****
--- 46,52 ----
bool nowait;
bool includewal;
uint32 maxrate;
+ bool skip_log_dir;
} basebackup_options;
***************
*** 70,75 ****
--- 71,77 ----
/* Relative path of temporary statistics directory */
static char *statrelpath = NULL;
+ static char *logrelpath = NULL;
/*
* Size of each block sent into the tar stream for larger files.
***************
*** 148,153 ****
--- 150,170 ----
else
statrelpath = pgstat_stat_directory;
+ if (opt->skip_log_dir)
+ {
+ if (is_absolute_path(Log_directory) &&
+ strncmp(Log_directory, DataDir, datadirpathlen) == 0)
+ {
+ logrelpath = psprintf("./%s", Log_directory + datadirpathlen + 1);
+ }
+ else if (strncmp(Log_directory, "./", 2) != 0)
+ {
+ logrelpath = psprintf("./%s", Log_directory);
+ }
+ else
+ logrelpath = Log_directory;
+ }
+
PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
{
List *tablespaces = NIL;
***************
*** 548,553 ****
--- 565,571 ----
bool o_nowait = false;
bool o_wal = false;
bool o_maxrate = false;
+ bool o_skiplogdir = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
***************
*** 618,623 ****
--- 636,650 ----
opt->maxrate = (uint32) maxrate;
o_maxrate = true;
}
+ else if (strcmp(defel->defname, "skip_log_dir") == 0)
+ {
+ if (o_skiplogdir)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ opt->skip_log_dir = true;
+ o_skiplogdir = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
***************
*** 984,989 ****
--- 1011,1040 ----
}
/*
+ * Skip log directory files only if skip_log_dir option specified in
+ * pg_basebackup. PG_LOG_DIRECTORY is also skipped. Create empty
+ * directory anyway.
+ */
+ if ((logrelpath != NULL) &&
+ ((strncmp(pathbuf, logrelpath, strlen(pathbuf)) == 0) || strncmp(de->d_name, PG_LOG_DIRECTORY, strlen(de->d_name)) == 0))
+ {
+ if (!sizeonly)
+ {
+ #ifndef WIN32
+ if (S_ISLNK(statbuf.st_mode))
+ #else
+ if (pgwin32_is_junction(pathbuf))
+ #endif
+ statbuf.st_mode = S_IFDIR | S_IRWXU;
+ _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
+ }
+ size += 512;
+ continue;
+ }
+
+
+
+ /*
* Skip pg_replslot, not useful to copy. But include it as an empty
* directory anyway, so we get permissions right.
*/
*** git/src/backend/replication/repl_gram.y Wed Apr 9 09:37:42 2014
--- Git_code/postgresql/src/backend/replication/repl_gram.y Wed Mar 26 13:43:04 2014
***************
*** 75,80 ****
--- 75,81 ----
%token K_PHYSICAL
%token K_LOGICAL
%token K_SLOT
+ %token K_SKIP_LOG_DIR
%type <node> command
%type <node> base_backup start_replication start_logical_replication create_replication_slot drop_replication_slot identify_system timeline_history
***************
*** 168,173 ****
--- 169,179 ----
$$ = makeDefElem("max_rate",
(Node *)makeInteger($2));
}
+ | K_SKIP_LOG_DIR
+ {
+ $$ = makeDefElem("skip_log_dir",
+ (Node *)makeInteger(TRUE));
+ }
;
create_replication_slot:
*** git/src/backend/replication/repl_scanner.l Wed Apr 9 09:37:42 2014
--- Git_code/postgresql/src/backend/replication/repl_scanner.l Wed Mar 26 13:43:40 2014
***************
*** 87,92 ****
--- 87,93 ----
NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
MAX_RATE { return K_MAX_RATE; }
+ SKIP_LOG_DIR { return K_SKIP_LOG_DIR; }
WAL { return K_WAL; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
*** git/src/backend/utils/misc/guc.c Wed Apr 9 09:37:59 2014
--- Git_code/postgresql/src/backend/utils/misc/guc.c Wed Mar 26 11:39:06 2014
***************
*** 61,66 ****
--- 61,67 ----
#include "replication/syncrep.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
+ #include "replication/basebackup.h"
#include "storage/bufmgr.h"
#include "storage/dsm_impl.h"
#include "storage/standby.h"
***************
*** 2985,2991 ****
GUC_SUPERUSER_ONLY
},
&Log_directory,
! "pg_log",
check_canonical_path, NULL, NULL
},
{
--- 2986,2992 ----
GUC_SUPERUSER_ONLY
},
&Log_directory,
! PG_LOG_DIRECTORY,
check_canonical_path, NULL, NULL
},
{
*** git/src/bin/pg_basebackup/pg_basebackup.c Wed Apr 9 09:37:43 2014
--- Git_code/postgresql/src/bin/pg_basebackup/pg_basebackup.c Wed Apr 9 09:55:40 2014
***************
*** 66,72 ****
static int standby_message_timeout = 10 * 1000; /* 10 sec = default */
static pg_time_t last_progress_report = 0;
static int32 maxrate = 0; /* no limit by default */
!
/* Progress counters */
static uint64 totalsize;
--- 66,72 ----
static int standby_message_timeout = 10 * 1000; /* 10 sec = default */
static pg_time_t last_progress_report = 0;
static int32 maxrate = 0; /* no limit by default */
! static bool skip_log_dir = false;
/* Progress counters */
static uint64 totalsize;
***************
*** 239,244 ****
--- 239,245 ----
printf(_(" --xlogdir=XLOGDIR location for the transaction log directory\n"));
printf(_(" -z, --gzip compress tar output\n"));
printf(_(" -Z, --compress=0-9 compress tar output with given compression level\n"));
+ printf(_(" -S, --skip-log-dir avoid backup of log files\n"));
printf(_("\nGeneral options:\n"));
printf(_(" -c, --checkpoint=fast|spread\n"
" set fast or spread checkpointing\n"));
***************
*** 1659,1671 ****
maxrate_clause = psprintf("MAX_RATE %u", maxrate);
basebkp =
! psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
includewal ? "NOWAIT" : "",
! maxrate_clause ? maxrate_clause : "");
if (PQsendQuery(conn, basebkp) == 0)
{
--- 1660,1673 ----
maxrate_clause = psprintf("MAX_RATE %u", maxrate);
basebkp =
! psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
includewal ? "NOWAIT" : "",
! maxrate_clause ? maxrate_clause : "",
! skip_log_dir ? "SKIP_LOG_DIR" : "");
if (PQsendQuery(conn, basebkp) == 0)
{
***************
*** 1964,1969 ****
--- 1966,1972 ----
{"status-interval", required_argument, NULL, 's'},
{"verbose", no_argument, NULL, 'v'},
{"progress", no_argument, NULL, 'P'},
+ {"skip-log-dir", no_argument, NULL, 'S'},
{"xlogdir", required_argument, NULL, 1},
{NULL, 0, NULL, 0}
};
***************
*** 1989,1995 ****
}
}
! while ((c = getopt_long(argc, argv, "D:F:r:RT:xX:l:zZ:d:c:h:p:U:s:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
--- 1992,1998 ----
}
}
! while ((c = getopt_long(argc, argv, "D:F:r:RT:xX:l:zZ:d:c:h:p:U:s:wWvPS",
long_options, &option_index)) != -1)
{
switch (c)
***************
*** 2122,2127 ****
--- 2125,2133 ----
case 'P':
showprogress = true;
break;
+ case 'S':
+ skip_log_dir = true;
+ break;
default:
/*
*** git/src/include/replication/basebackup.h Wed Apr 9 09:37:47 2014
--- Git_code/postgresql/src/include/replication/basebackup.h Wed Mar 26 13:45:13 2014
***************
*** 21,26 ****
--- 21,27 ----
#define MAX_RATE_UPPER 1048576
+ #define PG_LOG_DIRECTORY "pg_log"
extern void SendBaseBackup(BaseBackupCmd *cmd);
#endif /* _BASEBACKUP_H */
On Wed, Apr 9, 2014 at 2:06 AM, Prabakaran, Vaishnavi <
vaishnavip@fast.au.fujitsu.com> wrote:
Hi all,
Following the discussion in message id -
CAHGQGwFFMOr4EcugWHZpAaPYQbsEKDg66VmJ1rveJ6Z-EgaqAg@mail.gmail.com , I
have developed the patch which gives option to user to exclude pg_log
directory contents in pg_basebackup.[Current situation]
During pg_basebackup, all files in pg_log directory will be copied to new
backup directory.[Design]
- Added new non-mandatory option "-S/--skip-log-dir" to pg_basebackup .
- If "skip-log-dir" is specified in pg_basebackup command, then in
basebackup, exclude copying log files from standard "pg_log" directory and
any other directory specified in Log_directory guc variable. (Still empty
folder "pg_log"/$Log_directory will be created)- In case, pg_log/$Log_directory is symbolic link, then an empty folder
will be created[Advantage]
It gives an option to user to avoid copying of large log files if they
doesn't wish to and hence can save memory space.
While pg_log is definitely the most common one being the default on many
platforms, we'll still be missing other ones. Should we really hardcode it,
or should we somehow derive it from the settings for log_directory instead?
As a more general discussion, is this something we might want to expose as
a more general facility rather than hardcode it to the log directory only?
And is it perhaps something we'd rather have configured at the server than
specified in pg_basebackup - like a guc saying which directories should
always be excluded from a basebackup? So you don't have to remember it
every time?
--
Magnus Hagander
Me: http://www.hagander.net/
Work: http://www.redpill-linpro.com/
Magnus Hagander wrote:
While pg_log is definitely the most common one being the default on many
platforms, we'll still be missing other ones. Should we really hardcode it,
or should we somehow derive it from the settings for log_directory instead?As a more general discussion, is this something we might want to expose as
a more general facility rather than hardcode it to the log directory only?
And is it perhaps something we'd rather have configured at the server than
specified in pg_basebackup - like a guc saying which directories should
always be excluded from a basebackup? So you don't have to remember it
every time?
So it'd be an array, and by default you'd have something like:
basebackup_skip_path = $log_directory
?
Maybe use it to skip backup labels by default as well.
basebackup_skip_path = $log_directory, $backup_label_files
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Wed, Apr 9, 2014 at 4:55 PM, Alvaro Herrera <alvherre@2ndquadrant.com>wrote:
Magnus Hagander wrote:
While pg_log is definitely the most common one being the default on many
platforms, we'll still be missing other ones. Should we really hardcodeit,
or should we somehow derive it from the settings for log_directory
instead?
As a more general discussion, is this something we might want to expose
as
a more general facility rather than hardcode it to the log directory
only?
And is it perhaps something we'd rather have configured at the server
than
specified in pg_basebackup - like a guc saying which directories should
always be excluded from a basebackup? So you don't have to remember it
every time?So it'd be an array, and by default you'd have something like:
basebackup_skip_path = $log_directory
?Maybe use it to skip backup labels by default as well.
basebackup_skip_path = $log_directory, $backup_label_files
I hadn't considered any details, but yes, someting along that line. And
then you could also include arbitrary filenames or directories should you
want. E.g. if you use the data directory to store your torrents or
something.
--
Magnus Hagander
Me: http://www.hagander.net/
Work: http://www.redpill-linpro.com/
Magnus Hagander wrote:
On Wed, Apr 9, 2014 at 4:55 PM, Alvaro Herrera <alvherre@2ndquadrant.com>wrote:
So it'd be an array, and by default you'd have something like:
basebackup_skip_path = $log_directory
?Maybe use it to skip backup labels by default as well.
basebackup_skip_path = $log_directory, $backup_label_filesI hadn't considered any details, but yes, someting along that line. And
then you could also include arbitrary filenames or directories should you
want. E.g. if you use the data directory to store your torrents or
something.
Man, that's a great idea. Database servers have lots of diskspace in
that partition, so it should work really well. Thanks!
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Thursday, Apr 10,2014 at 1:15Am, Álvaro Herrera wrote:
Magnus Hagander wrote:
On Wed, Apr 9, 2014 at 4:55 PM, Alvaro Herrera <alvherre@2ndquadrant.com>wrote:
So it'd be an array, and by default you'd have something like:
basebackup_skip_path = $log_directory ?Maybe use it to skip backup labels by default as well.
basebackup_skip_path = $log_directory, $backup_label_filesI hadn't considered any details, but yes, someting along that line.
And then you could also include arbitrary filenames or directories
should you want. E.g. if you use the data directory to store your
torrents or something.
Man, that's a great idea. Database servers have lots of diskspace in that partition, so it should work really well. Thanks!
Yes, It sounds like a good idea. I will look into this and start working in sometime.
Thanks & Regards,
Vaishnavi
Fujitsu Australia
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 4/9/14, 10:57 AM, Magnus Hagander wrote:
So it'd be an array, and by default you'd have something like:
basebackup_skip_path = $log_directory
?Maybe use it to skip backup labels by default as well.
basebackup_skip_path = $log_directory, $backup_label_filesI hadn't considered any details, but yes, someting along that line. And
then you could also include arbitrary filenames or directories should
you want.
What are the use cases for excluding anything else?
pg_basebackup ought to have some intelligence about what files are
appropriate to include or exclude, depending on what the user is trying
to do. It shouldn't become a general file copying tool.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers