Backup throttling
Hello,
the purpose of this patch is to limit impact of pg_backup on running
server. Feedback is appreciated.
// Antonin Houska (Tony)
Attachments:
backup_throttling.patchtext/x-patch; name=backup_throttling.patchDownload
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index eb0c1d6..3b7ecfd 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -189,6 +189,22 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>-r</option></term>
+ <term><option>--max-rate</option></term>
+ <listitem>
+ <para>
+ The maximum amount of data transferred from server per second.
+ The purpose is to limit impact of
+ <application>pg_basebackup</application> on a running master server.
+ </para>
+ <para>
+ Suffixes <literal>k</literal> (kilobytes) and <literal>m</literal>
+ (megabytes) are accepted. For example: <literal>10m</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-R</option></term>
<term><option>--write-recovery-conf</option></term>
<listitem>
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index a1e12a8..7fb2d78 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -45,11 +45,23 @@ bool streamwal = false;
bool fastcheckpoint = false;
bool writerecoveryconf = false;
int standby_message_timeout = 10 * 1000; /* 10 sec = default */
+double max_rate = 0; /* Maximum bytes per second. 0 implies
+ * unlimited rate. */
+
+#define MAX_RATE_LOWER 0x00008000 /* 32 kB */
+#define MAX_RATE_UPPER 0x40000000 /* 1 GB */
+/*
+ * We shouldn't measure time whenever a small piece of data (e.g. TAR file
+ * header) has arrived. That would introduce high CPU overhead.
+ */
+#define RATE_MIN_SAMPLE 32768
/* Progress counters */
static uint64 totalsize;
static uint64 totaldone;
+static uint64 last_measured;
static int tablespacecount;
+static int64 last_measured_time;
/* Pipe to communicate with background wal receiver process */
#ifndef WIN32
@@ -72,9 +84,14 @@ static volatile LONG has_xlogendptr = 0;
static PQExpBuffer recoveryconfcontents = NULL;
/* Function headers */
+
+
+
static void usage(void);
static void verify_dir_is_empty_or_create(char *dirname);
static void progress_report(int tablespacenum, const char *filename);
+static double parse_max_rate(char *src);
+static void enforce_max_rate();
static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum);
static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
@@ -110,6 +127,7 @@ usage(void)
printf(_("\nOptions controlling the output:\n"));
printf(_(" -D, --pgdata=DIRECTORY receive base backup into directory\n"));
printf(_(" -F, --format=p|t output format (plain (default), tar)\n"));
+ printf(_(" -r, --max-rate maximum transfer rate between server and client\n"));
printf(_(" -R, --write-recovery-conf\n"
" write recovery.conf after backup\n"));
printf(_(" -x, --xlog include required WAL files in backup (fetch mode)\n"));
@@ -473,6 +491,113 @@ progress_report(int tablespacenum, const char *filename)
fprintf(stderr, "\r");
}
+static double
+parse_max_rate(char *src)
+{
+ int factor;
+ char *after_num;
+ double result;
+
+ result = strtod(src, &after_num);
+ if (src == after_num)
+ {
+ fprintf(stderr, _("%s: invalid transfer rate \"%s\"\n"), progname, src);
+ exit(1);
+ }
+
+ /*
+ * Evaluate (optional) suffix.
+ *
+ * after_num should now be right behind the numeric value.
+ */
+ factor = 1;
+ switch (tolower(*after_num))
+ {
+ /*
+ * Only the following suffixes are allowed. It's not too useful to
+ * restrict the rate to gigabytes: such a rate will probably bring
+ * significant impact on the master anyway, so the throttling
+ * won't help much.
+ */
+ case 'm':
+ factor <<= 10;
+ case 'k':
+ factor <<= 10;
+ after_num++;
+ break;
+
+ default:
+
+ /*
+ * If there's no suffix, byte is the unit. Possible invalid letter
+ * will make conversion to integer fail.
+ */
+ break;
+ }
+
+ /* The rest can only consist of white space. */
+ while (*after_num != '\0')
+ {
+ if (!isspace(*after_num))
+ {
+ fprintf(stderr, _("%s: invalid transfer rate \"%s\"\n"), progname, src);
+ exit(1);
+ }
+ after_num++;
+ }
+
+ result *= factor;
+ if (result < MAX_RATE_LOWER || result > MAX_RATE_UPPER)
+ {
+ fprintf(stderr, _("%s: transfer rate out of range \"%s\"\n"), progname, src);
+ exit(1);
+ }
+ return result;
+}
+
+/*
+ * If the progress is more than what max_rate allows, sleep.
+ *
+ * Do not call if max_rate == 0.
+ */
+static void
+enforce_max_rate()
+{
+ int64 min_elapsed,
+ now,
+ elapsed,
+ to_sleep;
+
+ int64 last_chunk;
+
+ last_chunk = totaldone - last_measured;
+
+ /* The measurements shouldn't be more frequent then necessary. */
+ if (last_chunk < RATE_MIN_SAMPLE)
+ return;
+
+
+ now = localGetCurrentTimestamp();
+ elapsed = now - last_measured_time;
+
+ /*
+ * max_rate relates to seconds, thus the expression in brackets is
+ * milliseconds per byte.
+ */
+ min_elapsed = last_chunk * (USECS_PER_SEC / max_rate);
+
+ /*
+ * min_elapsed is the minimum time we must have spent to get here. If we
+ * spent less, let's wait.
+ */
+ to_sleep = min_elapsed - elapsed;
+ if (to_sleep > 0)
+ pg_usleep((long) to_sleep);
+
+ last_measured = totaldone;
+ last_measured_time = now + to_sleep;
+}
+
/*
* Write a piece of tar data
@@ -852,6 +977,8 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
totaldone += r;
if (showprogress)
progress_report(rownum, filename);
+ if (max_rate > 0)
+ enforce_max_rate();
} /* while (1) */
if (copybuf != NULL)
@@ -1075,6 +1202,8 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum)
totaldone += r;
if (showprogress)
progress_report(rownum, filename);
+ if (max_rate > 0)
+ enforce_max_rate();
current_len_left -= r;
if (current_len_left == 0 && current_padding == 0)
@@ -1446,7 +1575,7 @@ BaseBackup(void)
/*
* Sum up the total size, for progress reporting
*/
- totalsize = totaldone = 0;
+ totalsize = totaldone = last_measured = 0;
tablespacecount = PQntuples(res);
for (i = 0; i < PQntuples(res); i++)
{
@@ -1488,6 +1617,8 @@ BaseBackup(void)
/*
* Start receiving chunks
*/
+ last_measured_time = localGetCurrentTimestamp();
+
for (i = 0; i < PQntuples(res); i++)
{
if (format == 't')
@@ -1651,6 +1782,7 @@ main(int argc, char **argv)
{"pgdata", required_argument, NULL, 'D'},
{"format", required_argument, NULL, 'F'},
{"checkpoint", required_argument, NULL, 'c'},
+ {"max-rate", required_argument, NULL, 'r'},
{"write-recovery-conf", no_argument, NULL, 'R'},
{"xlog", no_argument, NULL, 'x'},
{"xlog-method", required_argument, NULL, 'X'},
@@ -1690,7 +1822,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "D:F:RxX:l:zZ:d:c:h:p:U:s:wWvP",
+ while ((c = getopt_long(argc, argv, "D:F:r:RxX:l:zZ:d:c:h:p:U:s:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
@@ -1711,6 +1843,9 @@ main(int argc, char **argv)
exit(1);
}
break;
+ case 'r':
+ max_rate = parse_max_rate(optarg);
+ break;
case 'R':
writerecoveryconf = true;
break;
diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c
index d56a4d7..f31300b 100644
--- a/src/bin/pg_basebackup/receivelog.c
+++ b/src/bin/pg_basebackup/receivelog.c
@@ -193,27 +193,6 @@ close_walfile(char *basedir, char *partial_suffix)
/*
- * Local version of GetCurrentTimestamp(), since we are not linked with
- * backend code. The protocol always uses integer timestamps, regardless of
- * server setting.
- */
-static int64
-localGetCurrentTimestamp(void)
-{
- int64 result;
- struct timeval tp;
-
- gettimeofday(&tp, NULL);
-
- result = (int64) tp.tv_sec -
- ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY);
-
- result = (result * USECS_PER_SEC) + tp.tv_usec;
-
- return result;
-}
-
-/*
* Local version of TimestampDifference(), since we are not linked with
* backend code.
*/
diff --git a/src/bin/pg_basebackup/streamutil.c b/src/bin/pg_basebackup/streamutil.c
index 1dfb80f..ea71cc0 100644
--- a/src/bin/pg_basebackup/streamutil.c
+++ b/src/bin/pg_basebackup/streamutil.c
@@ -210,3 +210,23 @@ GetConnection(void)
return tmpconn;
}
}
+
+/*
+ * Local version of GetCurrentTimestamp().
+ * The protocol always uses integer timestamps, regardless of server setting.
+ */
+int64
+localGetCurrentTimestamp(void)
+{
+ int64 result;
+ struct timeval tp;
+
+ gettimeofday(&tp, NULL);
+
+ result = (int64) tp.tv_sec -
+ ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY);
+
+ result = (result * USECS_PER_SEC) + tp.tv_usec;
+
+ return result;
+}
diff --git a/src/bin/pg_basebackup/streamutil.h b/src/bin/pg_basebackup/streamutil.h
index 77d6b86..2cdebde 100644
--- a/src/bin/pg_basebackup/streamutil.h
+++ b/src/bin/pg_basebackup/streamutil.h
@@ -1,4 +1,7 @@
#include "libpq-fe.h"
+#include "datatype/timestamp.h"
+
+#include <sys/time.h>
extern const char *progname;
extern char *connection_string;
@@ -17,3 +20,5 @@ extern PGconn *conn;
}
extern PGconn *GetConnection(void);
+
+extern int64 localGetCurrentTimestamp(void);
On Wed, Jul 24, 2013 at 4:20 PM, Antonin Houska
<antonin.houska@gmail.com> wrote:
Hello,
the purpose of this patch is to limit impact of pg_backup on running server.
Feedback is appreciated.
Interesting. Please add this patch to the next commitfeat.
https://commitfest.postgresql.org/action/commitfest_view?id=19
Regards,
--
Fujii Masao
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Wed, 24 Jul 2013 09:20:52 +0200
Antonin Houska <antonin.houska@gmail.com> wrote:
Hello,
the purpose of this patch is to limit impact of pg_backup on running
server. Feedback is appreciated.// Antonin Houska (Tony)
Hi,
That is a really nice feature. I took a first look at your patch and
some empty lines you added (e.g. line 60 your patch). Can you remove
them?
Why did you move localGetCurrentTimestamp() into streamutil.c? Is
sys/time.h still needed in receivelog.c after the move?
I will try your patch later today to see, if it works.
regards,
Stefan Radomski
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Gibheer escribi�:
Why did you move localGetCurrentTimestamp() into streamutil.c? Is
sys/time.h still needed in receivelog.c after the move?
I think we should have GetCurrentTimestamp() in src/common; there are
way too many copies of that functionality now. I looked into this
awhile back, but eviscerating the datetime.c/timestamp.c code out of the
backend proved much messier than I anticipated.
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 07/31/2013 07:13 AM, Gibheer wrote:
Hi,
That is a really nice feature.
I don't pretend it's my idea, I just coded it. My boss proposed the
feature as such :-)
I took a first look at your patch and some empty lines you added (e.g. line 60 your patch). Can you remove
them?
Sure, will do in the next version.
Why did you move localGetCurrentTimestamp() into streamutil.c? Is sys/time.h still needed in receivelog.c after the move?
Because both receivelog.c and pg_basebackup.c need it now. I thought I
could move localTimestampDifference() and
localTimestampDifferenceExceeds() as well for the sake of consistency
(these are actually utilities too) but I didn't get convinced enough
that the feature alone justifies such a change.
As mentioned in
/messages/by-id/20130731173624.GX14652@eldon.alvh.no-ip.org
these functions ideally shouldn't have separate implementation at all.
However the problem is that pg_basebackup is not linked to the backend.
You're right about sys/time.h, it's included via via streamutil.h. I'll
fix that too.
I will try your patch later today to see, if it works.
Whenever you have time. Thanks!
// Tony
On Wed, 31 Jul 2013 22:50:19 +0200
Antonin Houska <antonin.houska@gmail.com> wrote:
On 07/31/2013 07:13 AM, Gibheer wrote:
Hi,
That is a really nice feature.
I don't pretend it's my idea, I just coded it. My boss proposed the
feature as such :-)I took a first look at your patch and some empty lines you added
(e.g. line 60 your patch). Can you remove them?Sure, will do in the next version.
Why did you move localGetCurrentTimestamp() into streamutil.c? Is
sys/time.h still needed in receivelog.c after the move?Because both receivelog.c and pg_basebackup.c need it now. I thought
I could move localTimestampDifference() and
localTimestampDifferenceExceeds() as well for the sake of consistency
(these are actually utilities too) but I didn't get convinced enough
that the feature alone justifies such a change.As mentioned in
/messages/by-id/20130731173624.GX14652@eldon.alvh.no-ip.org
these functions ideally shouldn't have separate implementation at
all. However the problem is that pg_basebackup is not linked to the
backend.You're right about sys/time.h, it's included via via streamutil.h.
I'll fix that too.I will try your patch later today to see, if it works.
Whenever you have time. Thanks!
// Tony
Hi,
I got around to test your patch and it works. I've build a small script
for others to test it easily. You can tell it with ROOTDIR and BASEDIR
environment variables where to look for the binaries and where to put
the test directories.
There is only one small thing I hit, namely the error message when the
limit is too small. It was like
transfer rate out of range '10k'
but it does not mention what the actual range is.
Maybe a message like
transfer rate of 10k is too small. Lower limit is 32k.
or
transfer rate has to be between 32k and 1GB, was 10k.
would be better as they mention what the actual limit is. That avoids
that people have to look up the limits in the manual.
We should also add the current limits to the documentation.
regards,
Stefan Radomski
Attachments:
2013-07-31 22:50 keltez�ssel, Antonin Houska �rta:
On 07/31/2013 07:13 AM, Gibheer wrote:
Hi,
That is a really nice feature.
I don't pretend it's my idea, I just coded it. My boss proposed the feature as such :-)
I took a first look at your patch and some empty lines you added (e.g. line 60 your patch). Can you remove
them?Sure, will do in the next version.
Why did you move localGetCurrentTimestamp() into streamutil.c? Is sys/time.h still needed in receivelog.c after the move?
Because both receivelog.c and pg_basebackup.c need it now. I thought I could move
localTimestampDifference() and localTimestampDifferenceExceeds() as well for the sake of
consistency (these are actually utilities too) but I didn't get convinced enough that
the feature alone justifies such a change.As mentioned in
/messages/by-id/20130731173624.GX14652@eldon.alvh.no-ip.org these
functions ideally shouldn't have separate implementation at all. However the problem is
that pg_basebackup is not linked to the backend.
Have you considered the src/common directory?
You're right about sys/time.h, it's included via via streamutil.h. I'll fix that too.
I will try your patch later today to see, if it works.
Whenever you have time. Thanks!
// Tony
Best regards,
Zolt�n B�sz�rm�nyi
--
----------------------------------
Zolt�n B�sz�rm�nyi
Cybertec Sch�nig & Sch�nig GmbH
Gr�hrm�hlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
http://www.postgresql.at/
Hi,
On 2013-07-24 09:20:52 +0200, Antonin Houska wrote:
Hello,
the purpose of this patch is to limit impact of pg_backup on running server.
Feedback is appreciated.
Based on a quick look it seems like you're throttling on the receiving
side. Is that a good idea? Especially over longer latency links, TCP
buffering will reduce the effect on the sender side considerably.
Greetings,
Andres Freund
--
Andres Freund http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2013-08-19 19:20 keltez�ssel, Andres Freund �rta:
Hi,
On 2013-07-24 09:20:52 +0200, Antonin Houska wrote:
Hello,
the purpose of this patch is to limit impact of pg_backup on running server.
Feedback is appreciated.Based on a quick look it seems like you're throttling on the receiving
side. Is that a good idea? Especially over longer latency links, TCP
buffering will reduce the effect on the sender side considerably.Greetings,
Andres Freund
Throttling on the sender side requires extending the syntax of
BASE_BACKUP and maybe START_REPLICATION so both can be
throttled but throttling is still initiated by the receiver side.
Maybe throttling the walsender is not a good idea, it can lead
to DoS via disk space shortage.
Best regards,
Zolt�n B�sz�rm�nyi
--
----------------------------------
Zolt�n B�sz�rm�nyi
Cybertec Sch�nig & Sch�nig GmbH
Gr�hrm�hlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
http://www.postgresql.at/
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2013-08-19 20:15:51 +0200, Boszormenyi Zoltan wrote:
2013-08-19 19:20 keltezéssel, Andres Freund írta:
Hi,
On 2013-07-24 09:20:52 +0200, Antonin Houska wrote:
Hello,
the purpose of this patch is to limit impact of pg_backup on running server.
Feedback is appreciated.Based on a quick look it seems like you're throttling on the receiving
side. Is that a good idea? Especially over longer latency links, TCP
buffering will reduce the effect on the sender side considerably.
Throttling on the sender side requires extending the syntax of
BASE_BACKUP and maybe START_REPLICATION so both can be
throttled but throttling is still initiated by the receiver side.
Seems fine to me. Under the premise that the idea is decided to be
worthwile to be integrated. Which I am not yet convinced of.
Maybe throttling the walsender is not a good idea, it can lead
to DoS via disk space shortage.
Not in a measurably different way than receiver side throttling?
Greetings,
Andres Freund
--
Andres Freund http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2013-08-19 21:11 keltez�ssel, Andres Freund �rta:
On 2013-08-19 20:15:51 +0200, Boszormenyi Zoltan wrote:
2013-08-19 19:20 keltez�ssel, Andres Freund �rta:
Hi,
On 2013-07-24 09:20:52 +0200, Antonin Houska wrote:
Hello,
the purpose of this patch is to limit impact of pg_backup on running server.
Feedback is appreciated.Based on a quick look it seems like you're throttling on the receiving
side. Is that a good idea? Especially over longer latency links, TCP
buffering will reduce the effect on the sender side considerably.Throttling on the sender side requires extending the syntax of
BASE_BACKUP and maybe START_REPLICATION so both can be
throttled but throttling is still initiated by the receiver side.Seems fine to me. Under the premise that the idea is decided to be
worthwile to be integrated. Which I am not yet convinced of.Maybe throttling the walsender is not a good idea, it can lead
to DoS via disk space shortage.Not in a measurably different way than receiver side throttling?
No, but that's not what I meant.
START_BACKUP has to deal with big data but it finishes after some time.
But pg_receivexlog may sit there indefinitely...
Best regards,
Zolt�n B�sz�rm�nyi
--
----------------------------------
Zolt�n B�sz�rm�nyi
Cybertec Sch�nig & Sch�nig GmbH
Gr�hrm�hlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
http://www.postgresql.at/
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 19.08.2013 21:15, Boszormenyi Zoltan wrote:
2013-08-19 19:20 keltez�ssel, Andres Freund �rta:
Based on a quick look it seems like you're throttling on the receiving
side. Is that a good idea? Especially over longer latency links, TCP
buffering will reduce the effect on the sender side considerably.Throttling on the sender side requires extending the syntax of
BASE_BACKUP and maybe START_REPLICATION so both can be
throttled but throttling is still initiated by the receiver side.
Throttling in the client seems much better to me. TCP is designed to
handle a slow client.
Maybe throttling the walsender is not a good idea, it can lead
to DoS via disk space shortage.
If a client can initiate a backup and/or streaming replication, he can
already do much more damage than a DoS via out of disk space. And a
nothing stops even a non-privileged user from causing an out of disk
space situation anyway. IOW that's a non-issue.
- Heikki
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
2013-08-20 08:37 keltez�ssel, Heikki Linnakangas �rta:
On 19.08.2013 21:15, Boszormenyi Zoltan wrote:
2013-08-19 19:20 keltez�ssel, Andres Freund �rta:
Based on a quick look it seems like you're throttling on the receiving
side. Is that a good idea? Especially over longer latency links, TCP
buffering will reduce the effect on the sender side considerably.Throttling on the sender side requires extending the syntax of
BASE_BACKUP and maybe START_REPLICATION so both can be
throttled but throttling is still initiated by the receiver side.Throttling in the client seems much better to me. TCP is designed to handle a slow client.
Maybe throttling the walsender is not a good idea, it can lead
to DoS via disk space shortage.If a client can initiate a backup and/or streaming replication, he can already do much
more damage than a DoS via out of disk space. And a nothing stops even a non-privileged
user from causing an out of disk space situation anyway. IOW that's a non-issue.
I got to the same conclusion this morning, but because of wal_keep_segments.
Best regards,
Zolt�n B�sz�rm�nyi
- Heikki
--
----------------------------------
Zolt�n B�sz�rm�nyi
Cybertec Sch�nig & Sch�nig GmbH
Gr�hrm�hlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
http://www.postgresql.at/
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Aug 19, 2013, at 9:11 PM, Andres Freund wrote:
On 2013-08-19 20:15:51 +0200, Boszormenyi Zoltan wrote:
2013-08-19 19:20 keltezéssel, Andres Freund írta:
Hi,
On 2013-07-24 09:20:52 +0200, Antonin Houska wrote:
Hello,
the purpose of this patch is to limit impact of pg_backup on running server.
Feedback is appreciated.Based on a quick look it seems like you're throttling on the receiving
side. Is that a good idea? Especially over longer latency links, TCP
buffering will reduce the effect on the sender side considerably.Throttling on the sender side requires extending the syntax of
BASE_BACKUP and maybe START_REPLICATION so both can be
throttled but throttling is still initiated by the receiver side.Seems fine to me. Under the premise that the idea is decided to be
worthwile to be integrated. Which I am not yet convinced of.
i think there is a lot of value for this one. the scenario we had a couple of times is pretty simple:
just assume a weak server - maybe just one disk or two - and a slave.
master and slave are connected via a 1 GB network.
pg_basebackup will fetch data full speed basically putting those lonely disks out of business.
we actually had a case where a client asked if "PostgreSQL is locked during base backup". of
course it was just disk wait caused by a full speed pg_basebackup.
regarding the client side implementation: we have chosen this way because it is less invasive.
i cannot see a reason to do this on the server side because we won't have 10
pg_basebackup-style tools making use of this feature anyway.
of course, if you got 20 disk and a 1 gbit network this is useless - but many people don't have that.
regards,
hans-jürgen schönig
--
Cybertec Schönig & Schönig GmbH
Gröhrmühlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2013-08-21 08:10:42 +0200, PostgreSQL - Hans-Jürgen Schönig wrote:
On Aug 19, 2013, at 9:11 PM, Andres Freund wrote:
On 2013-08-19 20:15:51 +0200, Boszormenyi Zoltan wrote:
2013-08-19 19:20 keltezéssel, Andres Freund írta:
Hi,
On 2013-07-24 09:20:52 +0200, Antonin Houska wrote:
Hello,
the purpose of this patch is to limit impact of pg_backup on running server.
Feedback is appreciated.Based on a quick look it seems like you're throttling on the receiving
side. Is that a good idea? Especially over longer latency links, TCP
buffering will reduce the effect on the sender side considerably.Throttling on the sender side requires extending the syntax of
BASE_BACKUP and maybe START_REPLICATION so both can be
throttled but throttling is still initiated by the receiver side.Seems fine to me. Under the premise that the idea is decided to be
worthwile to be integrated. Which I am not yet convinced of.i think there is a lot of value for this one. the scenario we had a couple of times is pretty simple:
just assume a weak server - maybe just one disk or two - and a slave.
master and slave are connected via a 1 GB network.
pg_basebackup will fetch data full speed basically putting those lonely disks out of business.
we actually had a case where a client asked if "PostgreSQL is locked during base backup". of
course it was just disk wait caused by a full speed pg_basebackup.
regarding the client side implementation: we have chosen this way because it is less invasive.
i cannot see a reason to do this on the server side because we won't have 10
pg_basebackup-style tools making use of this feature anyway.
The problem is that receiver side throttling over TCP doesn't always
work all that nicely unless you have a low rate of transfer and/or very
low latency . Quite often you will have OS buffers/the TCP Window being
filled in bursts where the sender sends at max capacity and then a
period where nothing happens on the sender. That's often not what you
want when you need to throttle.
Besides, I can see some value in e.g. normal streaming replication also
being rate limited...
Greetings,
Andres Freund
--
Andres Freund http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Aug 21, 2013, at 10:57 AM, Andres Freund wrote:
On 2013-08-21 08:10:42 +0200, PostgreSQL - Hans-Jürgen Schönig wrote:
On Aug 19, 2013, at 9:11 PM, Andres Freund wrote:
On 2013-08-19 20:15:51 +0200, Boszormenyi Zoltan wrote:
2013-08-19 19:20 keltezéssel, Andres Freund írta:
Hi,
On 2013-07-24 09:20:52 +0200, Antonin Houska wrote:
Hello,
the purpose of this patch is to limit impact of pg_backup on running server.
Feedback is appreciated.Based on a quick look it seems like you're throttling on the receiving
side. Is that a good idea? Especially over longer latency links, TCP
buffering will reduce the effect on the sender side considerably.Throttling on the sender side requires extending the syntax of
BASE_BACKUP and maybe START_REPLICATION so both can be
throttled but throttling is still initiated by the receiver side.Seems fine to me. Under the premise that the idea is decided to be
worthwile to be integrated. Which I am not yet convinced of.i think there is a lot of value for this one. the scenario we had a couple of times is pretty simple:
just assume a weak server - maybe just one disk or two - and a slave.
master and slave are connected via a 1 GB network.
pg_basebackup will fetch data full speed basically putting those lonely disks out of business.
we actually had a case where a client asked if "PostgreSQL is locked during base backup". of
course it was just disk wait caused by a full speed pg_basebackup.regarding the client side implementation: we have chosen this way because it is less invasive.
i cannot see a reason to do this on the server side because we won't have 10
pg_basebackup-style tools making use of this feature anyway.The problem is that receiver side throttling over TCP doesn't always
work all that nicely unless you have a low rate of transfer and/or very
low latency . Quite often you will have OS buffers/the TCP Window being
filled in bursts where the sender sends at max capacity and then a
period where nothing happens on the sender. That's often not what you
want when you need to throttle.Besides, I can see some value in e.g. normal streaming replication also
being rate limited...
what would be a reasonable scenario where limiting streaming would make sense? i cannot think of any to be honest.
regards,
hans
--
Cybertec Schönig & Schönig GmbH
Gröhrmühlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 08/22/2013 01:39 PM, PostgreSQL - Hans-J�rgen Sch�nig wrote:
what would be a reasonable scenario where limiting streaming would make sense? i cannot think of any to be honest.
I tend to agree. If anything we're likely to want the reverse - the
ability to throttle WAL *generation* on the master so streaming can keep up.
I see a lot of value in throttling base backup transfer rates. It's
something PgBarman does per-tablespace using rsync at the moment, but
it'd be nice if it available as an option possible over the streaming
replication protocol via pg_basebackup so it was easier for people to
use ad-hoc and without all the shell access wrangling.
--
Craig Ringer http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2013-08-22 07:39:41 +0200, PostgreSQL - Hans-Jürgen Schönig wrote:
regarding the client side implementation: we have chosen this way because it is less invasive.
i cannot see a reason to do this on the server side because we won't have 10
pg_basebackup-style tools making use of this feature anyway.The problem is that receiver side throttling over TCP doesn't always
work all that nicely unless you have a low rate of transfer and/or very
low latency . Quite often you will have OS buffers/the TCP Window being
filled in bursts where the sender sends at max capacity and then a
period where nothing happens on the sender. That's often not what you
want when you need to throttle.Besides, I can see some value in e.g. normal streaming replication also
being rate limited...what would be a reasonable scenario where limiting streaming would make sense? i cannot think of any to be honest.
It's not an unreasonable goal if you have several streaming replicas
with only some of them being synchronous replicas. Also, analytics
replicas that need to catchup don't really need priority over local
operations.
Greetings,
Andres Freund
--
Andres Freund http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Wed, 2013-07-24 at 09:20 +0200, Antonin Houska wrote:
the purpose of this patch is to limit impact of pg_backup on running
server. Feedback is appreciated.
Please replace the tabs in the SGML files with spaces.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 08/22/2013 03:33 PM, Craig Ringer wrote:
On 08/22/2013 01:39 PM, PostgreSQL - Hans-J�rgen Sch�nig wrote:
what would be a reasonable scenario where limiting streaming would make sense? i cannot think of any to be honest.
I tend to agree. If anything we're likely to want the reverse - the
ability to throttle WAL *generation* on the master so streaming can keep up.
(I assume you mean WAL *sending* rather than *generation*.)
I don't think we want to throttle WAL sending/receiving at all. The WAL
senders should always keep up with the amount of WAL generated. If the
rate of WAL sending is restricted, then the pg_basebackup (or another
client application) might never catch up.
Also, IMO, pg_basebackup starts at the current WAL segment. Thus the
unthrottled WAL transfer shouldn't cause significant additional load,
such as reading many old WAL segments from the master's disk.
I see a lot of value in throttling base backup transfer rates. It's
something PgBarman does per-tablespace using rsync at the moment, but
it'd be nice if it available as an option possible over the streaming
replication protocol via pg_basebackup so it was easier for people to
use ad-hoc and without all the shell access wrangling.
(Possibly huge) DATA directory (tablespaces, etc.) is the actual purpose
of this patch. This is the additional load that pg_basebackup imposes on
the master. As pointed out elsewhere in the thread, the client-side
throttling was chosen because it seemed to be less invasive. But the
discussion (including this your comment) keeps me no longer convinced
that it's the best way. I'll reconsider things.
// Antonin Houska (Tony)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 08/26/2013 12:50 PM, Antonin Houska wrote:
On 08/22/2013 03:33 PM, Craig Ringer wrote:
On 08/22/2013 01:39 PM, PostgreSQL - Hans-J�rgen Sch�nig wrote:
what would be a reasonable scenario where limiting streaming would make sense? i cannot think of any to be honest.
I tend to agree. If anything we're likely to want the reverse - the
ability to throttle WAL *generation* on the master so streaming can keep up.(I assume you mean WAL *sending* rather than *generation*.)
I think he meant *generation*, that is making sure that no more
than X amount of WAL is generated in Y amount of time, thereby
making sure that you can stream less WAL without falling behind.
I don't think we want to throttle WAL sending/receiving at all. The WAL
senders should always keep up with the amount of WAL generated. If the
rate of WAL sending is restricted, then the pg_basebackup (or another
client application) might never catch up.
Yes, and this is exactly why restricting *generation* is needed.
Also, IMO, pg_basebackup starts at the current WAL segment. Thus the
unthrottled WAL transfer shouldn't cause significant additional load,
such as reading many old WAL segments from the master's disk.
The possible extra load happens if the *network* not because of disk.
Think of replication over - possibly slow - WAN.
Cheers
--
Hannu Krosing
PostgreSQL Consultant
Performance, Scalability and High Availability
2ndQuadrant Nordic O�
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 08/26/2013 08:15 PM, Hannu Krosing wrote:
On 08/26/2013 12:50 PM, Antonin Houska wrote:
On 08/22/2013 03:33 PM, Craig Ringer wrote:
On 08/22/2013 01:39 PM, PostgreSQL - Hans-J�rgen Sch�nig wrote:
what would be a reasonable scenario where limiting streaming would make sense? i cannot think of any to be honest.
I tend to agree. If anything we're likely to want the reverse - the
ability to throttle WAL *generation* on the master so streaming can keep up.(I assume you mean WAL *sending* rather than *generation*.)
I think he meant *generation*, that is making sure that no more
than X amount of WAL is generated in Y amount of time, thereby
making sure that you can stream less WAL without falling behind.
Exactly so.
Sometimes one doesn't want the latency of synchronous replication, but
still wants to set a limit on how far behind the standby can fall. It
might be for limiting data loss, for making sure a replica can keep up
sustainably, or just to make sure the replica never gets far enough
behind that the master discards WAL that it still requires.
For many people the idea of the replica(s) affecting the master is
horrifying. For others, missing a single transaction on a crash is
unthinkable. We handle both those pretty well, but the middle ground is
currently very hard to walk, and I think that's actually where many
people will want to be.
I'd certainly rather throttle the master (and send a loud, angry alert
to the admin via icinga/nagios) than have a replica fall too far behind
and need to be re-inited from scratch.
--
Craig Ringer http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 08/26/2013 02:33 PM, Craig Ringer wrote:
On 08/26/2013 08:15 PM, Hannu Krosing wrote:
On 08/26/2013 12:50 PM, Antonin Houska wrote:
On 08/22/2013 03:33 PM, Craig Ringer wrote:
On 08/22/2013 01:39 PM, PostgreSQL - Hans-J�rgen Sch�nig wrote:
what would be a reasonable scenario where limiting streaming would make sense? i cannot think of any to be honest.
I tend to agree. If anything we're likely to want the reverse - the
ability to throttle WAL *generation* on the master so streaming can keep up.(I assume you mean WAL *sending* rather than *generation*.)
I think he meant *generation*, that is making sure that no more
than X amount of WAL is generated in Y amount of time, thereby
making sure that you can stream less WAL without falling behind.Exactly so.
Sometimes one doesn't want the latency of synchronous replication, but
still wants to set a limit on how far behind the standby can fall. It
might be for limiting data loss, for making sure a replica can keep up
sustainably, or just to make sure the replica never gets far enough
behind that the master discards WAL that it still requires.
I think the question that brought us here was whether new functionality
of pg_basebackup should be implemented on server side, so that other
client applications - including the Barman that you mentioned in the
previous mail - don't have to duplicate it. Ability to throttle (one
time) transfer of all the files that standby setup requires falls into
this category.
However what you stress now is control of the (continuous) WAL stream
and thus something that affects normal operation, rather than setup. I
still think the pg_basebackup does not have to throttle the WAL stream,
so this your request does not overlap with the current patch.
// Antonin Houska (Tony)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 08/27/2013 01:56 AM, Antonin Houska wrote:
However what you stress now is control of the (continuous) WAL stream
and thus something that affects normal operation, rather than setup. I
still think the pg_basebackup does not have to throttle the WAL stream,
so this your request does not overlap with the current patch.
I totally agree; I was getting off topic for this thread, and it's not
relevant to the patch as it stands.
--
Craig Ringer http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Tue, Aug 20, 2013 at 2:37 AM, Heikki Linnakangas
<hlinnakangas@vmware.com> wrote:
Throttling in the client seems much better to me. TCP is designed to handle
a slow client.
Other people have already offered some good points in this area, but
let me just add one thought that I don't think has been mentioned yet.
We have a *general* need to be able to throttle server-side resource
utilization, particularly I/O. This is a problem not only for
pg_basebackup, but for COPY, CLUSTER, VACUUM, and even things like
UPDATE. Of all of those, the only one for which we currently have any
kind of a solution is VACUUM. Now, maybe pg_basebackup also needs its
own special-purpose solution, but I think we'd do well to consider a
general I/O rate-limiting strategy and then consider particular needs
in the light of that framework. In that context, server-side seems
better to me, because something like CLUSTER isn't going to produce
anything that the client can effectively limit.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Tue, Aug 27, 2013 at 12:58 PM, Robert Haas <robertmhaas@gmail.com> wrote:
On Tue, Aug 20, 2013 at 2:37 AM, Heikki Linnakangas
<hlinnakangas@vmware.com> wrote:Throttling in the client seems much better to me. TCP is designed to
handle
a slow client.
Other people have already offered some good points in this area, but
let me just add one thought that I don't think has been mentioned yet.
We have a *general* need to be able to throttle server-side resource
utilization, particularly I/O. This is a problem not only for
pg_basebackup, but for COPY, CLUSTER, VACUUM, and even things like
UPDATE. Of all of those, the only one for which we currently have any
kind of a solution is VACUUM. Now, maybe pg_basebackup also needs its
own special-purpose solution, but I think we'd do well to consider a
general I/O rate-limiting strategy and then consider particular needs
in the light of that framework. In that context, server-side seems
better to me, because something like CLUSTER isn't going to produce
anything that the client can effectively limit.
+1 it is very easy at the moment to for example run a manual vacuum
full/cluster against a big table and generate WAL so quickly that the hot
standby disconnects because it gets "too far behind".
On 08/27/2013 01:58 PM, Robert Haas wrote:
On Tue, Aug 20, 2013 at 2:37 AM, Heikki Linnakangas
<hlinnakangas@vmware.com> wrote:Throttling in the client seems much better to me. TCP is designed to handle
a slow client.Other people have already offered some good points in this area, but
let me just add one thought that I don't think has been mentioned yet.
We have a *general* need to be able to throttle server-side resource
utilization, particularly I/O. This is a problem not only for
pg_basebackup, but for COPY, CLUSTER, VACUUM, and even things like
UPDATE. Of all of those, the only one for which we currently have any
kind of a solution is VACUUM. Now, maybe pg_basebackup also needs its
own special-purpose solution, but I think we'd do well to consider a
general I/O rate-limiting strategy and then consider particular needs
in the light of that framework. In that context, server-side seems
better to me, because something like CLUSTER isn't going to produce
anything that the client can effectively limit.
In fact I already concluded that server-side control is better and even
started to implement it for the next version of the patch. However the
pg_basebackup is different from VACUUM, CLUSTER, etc. in that it
retrieves the data directly from file system, as opposed to buffers. So
there seems to be little room here for utilization of (future)
'throttling infrastructure'.
// Antonin Houska (Tony)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 8/27/13 7:58 AM, Robert Haas wrote:
We have a *general* need to be able to throttle server-side resource
utilization, particularly I/O. This is a problem not only for
pg_basebackup, but for COPY, CLUSTER, VACUUM, and even things like
UPDATE. Of all of those, the only one for which we currently have any
kind of a solution is VACUUM.
I didn't mention it specifically, but I always presumed that the "Cost
limited statements RFC" proposal I floated:
/messages/by-id/519EA5FF.5040606@2ndQuadrant.com
(and am still working on) would handle the base backup case too.
pg_basebackup is just another client. Some sort of purpose made
solution for pg_basebackup alone may be useful, but I'd be shocked if
the sort of general mechanism I described there wasn't good enough to
handle many of the backup limiting cases too.
Also, once that and the block write counters I also sent an RFC out for
are in place, I have a plan for adding a server-wide throttling
mechanism. I want to extract a cluster wide read and write rate and put
a cluster wide limit on that whole thing. It seemed too hard to jump
into without these other two pieces of plumbing in place first.
--
Greg Smith 2ndQuadrant US greg@2ndQuadrant.com Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.com
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 07/24/2013 09:20 AM, Antonin Houska wrote:
Hello,
the purpose of this patch is to limit impact of pg_backup on running
server.
Attached is a new version. Server-side implementation this time.
Antonin Houska (Tony)
Attachments:
backup_throttling_v2.patchtext/x-patch; name=backup_throttling_v2.patchDownload
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index eb0c1d6..f58eb60 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -189,6 +189,22 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>-r</option></term>
+ <term><option>--max-rate</option></term>
+ <listitem>
+ <para>
+ The maximum amount of data transferred from server per second.
+ The purpose is to limit impact of <application>pg_basebackup</application>
+ on a running master server while transfering data directory.
+ </para>
+ <para>
+ Suffixes <literal>k</literal> (kilobytes) and <literal>m</literal>
+ (megabytes) are accepted. For example: <literal>10m</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-R</option></term>
<term><option>--write-recovery-conf</option></term>
<listitem>
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index ba8d173..08e4f26 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -33,6 +33,7 @@
#include "utils/builtins.h"
#include "utils/elog.h"
#include "utils/ps_status.h"
+#include "utils/timestamp.h"
#include "pgtar.h"
typedef struct
@@ -42,6 +43,7 @@ typedef struct
bool fastcheckpoint;
bool nowait;
bool includewal;
+ uint32 maxrate;
} basebackup_options;
@@ -59,6 +61,7 @@ static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
static void parse_basebackup_options(List *options, basebackup_options *opt);
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
static int compareWalFileNames(const void *a, const void *b);
+static void throttle(size_t increment);
/* Was the backup currently in-progress initiated in recovery mode? */
static bool backup_started_in_recovery = false;
@@ -68,6 +71,41 @@ static bool backup_started_in_recovery = false;
*/
#define TAR_SEND_SIZE 32768
+
+/*
+ * The maximum amount of data per second - bounds of the user input.
+ *
+ * If the maximum should be increased to more than 4 GB, uint64 must
+ * be introduced for the related variables. However such high values have
+ * little to do with throttling.
+ */
+#define MAX_RATE_LOWER 32768
+#define MAX_RATE_UPPER (1024 << 20)
+
+/*
+ * Transfer rate is only measured when this number of bytes has been sent.
+ * (Too frequent checks would impose too high CPU overhead.)
+ *
+ * The default value is used unless it'd result in too frequent checks.
+ */
+#define THROTTLING_SAMPLE_MIN 32768
+
+/* The maximum number of checks per second. */
+#define THROTTLING_MAX_FREQUENCY 256
+
+/* The actual value, transfer of which may cause sleep. */
+static uint32 throttling_sample;
+
+/* Amount of data already transfered but not yet throttled. */
+static int32 throttling_counter;
+
+/* The minimum time required to transfer throttling_sample bytes. */
+static int64 elapsed_min_unit;
+
+/* The last check of the transfer rate. */
+static int64 throttled_last;
+
+
typedef struct
{
char *oid;
@@ -171,6 +209,31 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* Send tablespace header */
SendBackupHeader(tablespaces);
+ if (opt->maxrate > 0)
+ {
+ throttling_sample = opt->maxrate / THROTTLING_MAX_FREQUENCY;
+
+ /* Don't measure too small pieces of data. */
+ if (throttling_sample < THROTTLING_SAMPLE_MIN)
+ throttling_sample = THROTTLING_SAMPLE_MIN;
+
+ /*
+ * opt->maxrate is bytes per seconds. Thus the expression in
+ * brackets is microseconds per byte.
+ */
+ elapsed_min_unit = throttling_sample *
+ ((double) USECS_PER_SEC / opt->maxrate);
+
+ /* Enable throttling. */
+ throttling_counter = 0;
+
+ /* The 'real data' starts now (header was ignored). */
+ throttled_last = GetCurrentIntegerTimestamp();
+ }
+ else
+ /* Disable throttling. */
+ throttling_counter = -1;
+
/* Send off our tablespaces one by one */
foreach(lc, tablespaces)
{
@@ -468,6 +531,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_fast = false;
bool o_nowait = false;
bool o_wal = false;
+ bool o_maxrate = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -519,6 +583,29 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->includewal = true;
o_wal = true;
}
+ else if (strcmp(defel->defname, "maxrate") == 0)
+ {
+ long maxrate;
+
+ if (o_maxrate)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ maxrate = intVal(defel->arg);
+
+ opt->maxrate = (uint32) maxrate;
+ if (opt->maxrate > 0 &&
+ (opt->maxrate < MAX_RATE_LOWER || opt->maxrate > MAX_RATE_UPPER))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("transfer rate %u bytes per second is out of range",
+ opt->maxrate),
+ errhint("The accepted range is %u through %u kB per second",
+ MAX_RATE_LOWER >> 10, MAX_RATE_UPPER >> 10)));
+ }
+ o_maxrate = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -1019,6 +1106,7 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
+ throttle(cnt);
if (len >= statbuf->st_size)
{
@@ -1040,10 +1128,14 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
cnt = Min(sizeof(buf), statbuf->st_size - len);
pq_putmessage('d', buf, cnt);
len += cnt;
+ throttle(cnt);
}
}
- /* Pad to 512 byte boundary, per tar format requirements */
+ /*
+ * Pad to 512 byte boundary, per tar format requirements. (This small
+ * piece of data is probably not worth throttling.)
+ */
pad = ((len + 511) & ~511) - len;
if (pad > 0)
{
@@ -1069,3 +1161,47 @@ _tarWriteHeader(const char *filename, const char *linktarget,
pq_putmessage('d', h, 512);
}
+
+static void
+throttle(size_t increment)
+{
+ int64 elapsed,
+ elapsed_min,
+ sleep;
+
+ if (throttling_counter < 0)
+ return;
+
+ throttling_counter += increment;
+ if (throttling_counter < throttling_sample)
+ return;
+
+ /* Time elapsed since the last measuring (and possible wake up). */
+ elapsed = GetCurrentIntegerTimestamp() - throttled_last;
+ /* How much should have elapsed at minimum? */
+ elapsed_min = elapsed_min_unit * (throttling_counter / throttling_sample);
+ sleep = elapsed_min - elapsed;
+ /* Only sleep if the transfer is faster than it should be. */
+ if (sleep > 0)
+
+ /*
+ * THROTTLING_SAMPLE_MIN / MAX_RATE_LOWER (in seconds) should be the
+ * longest possible time to sleep.
+ */
+ pg_usleep((long) sleep);
+ else
+
+ /*
+ * The actual transfer rate is below the limit. Negative value would
+ * distort the adjustment of throttled_last.
+ */
+ sleep = 0;
+
+ /*
+ * Only the whole multiples of throttling_sample processed. The rest will
+ * be done during the next call of this function.
+ */
+ throttling_counter %= throttling_sample;
+ /* Once the (possible) sleep ends, new period starts. */
+ throttled_last += elapsed + sleep;
+}
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 8c83780..1c2c31c 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -78,6 +78,7 @@ Node *replication_parse_result;
%token K_PROGRESS
%token K_FAST
%token K_NOWAIT
+%token K_MAX_RATE
%token K_WAL
%token K_TIMELINE
@@ -116,7 +117,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -156,6 +157,11 @@ base_backup_opt:
$$ = makeDefElem("nowait",
(Node *)makeInteger(TRUE));
}
+ | K_MAX_RATE UCONST
+ {
+ $$ = makeDefElem("maxrate",
+ (Node *)makeInteger($2));
+ }
;
/*
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 3d930f1..b2d5e3b 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -71,6 +71,7 @@ IDENTIFY_SYSTEM { return K_IDENTIFY_SYSTEM; }
LABEL { return K_LABEL; }
NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
+MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 94b2a36..056b234 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -1288,7 +1288,7 @@ GetCurrentTimestamp(void)
/*
* GetCurrentIntegerTimestamp -- get the current operating system time as int64
*
- * Result is the number of milliseconds since the Postgres epoch. If compiled
+ * Result is the number of microseconds since the Postgres epoch. If compiled
* with --enable-integer-datetimes, this is identical to GetCurrentTimestamp(),
* and is implemented as a macro.
*/
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index a1e12a8..c1506b9 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -45,6 +45,7 @@ bool streamwal = false;
bool fastcheckpoint = false;
bool writerecoveryconf = false;
int standby_message_timeout = 10 * 1000; /* 10 sec = default */
+uint32 maxrate = 0; /* No limit by default. */
/* Progress counters */
static uint64 totalsize;
@@ -75,6 +76,7 @@ static PQExpBuffer recoveryconfcontents = NULL;
static void usage(void);
static void verify_dir_is_empty_or_create(char *dirname);
static void progress_report(int tablespacenum, const char *filename);
+static uint32 parse_max_rate(char *src);
static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum);
static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
@@ -110,6 +112,7 @@ usage(void)
printf(_("\nOptions controlling the output:\n"));
printf(_(" -D, --pgdata=DIRECTORY receive base backup into directory\n"));
printf(_(" -F, --format=p|t output format (plain (default), tar)\n"));
+ printf(_(" -r, --max-rate maximum transfer rate to transfer data directory\n"));
printf(_(" -R, --write-recovery-conf\n"
" write recovery.conf after backup\n"));
printf(_(" -x, --xlog include required WAL files in backup (fetch mode)\n"));
@@ -474,6 +477,96 @@ progress_report(int tablespacenum, const char *filename)
}
+static uint32
+parse_max_rate(char *src)
+{
+ int factor;
+ char *after_num;
+ int64 result;
+ int errno_copy;
+
+ result = strtol(src, &after_num, 0);
+ errno_copy = errno;
+ if (src == after_num)
+ {
+ fprintf(stderr, _("%s: transfer rate %s is not a valid integer value\n"), progname, src);
+ exit(1);
+ }
+
+
+ /*
+ * Evaluate (optional) suffix.
+ *
+ * after_num should now be right behind the numeric value.
+ */
+ factor = 1;
+ switch (tolower(*after_num))
+ {
+ /*
+ * Only the following suffixes are allowed. It's not too useful to
+ * restrict the rate to gigabytes: such a rate will probably bring
+ * significant impact on the master anyway, so the throttling
+ * won't help much.
+ */
+ case 'g':
+ factor <<= 10;
+ case 'm':
+ factor <<= 10;
+ case 'k':
+ factor <<= 10;
+ after_num++;
+ break;
+
+ default:
+
+ /*
+ * If there's no suffix, byte is the unit. Possible invalid letter
+ * will make conversion to integer fail.
+ */
+ break;
+ }
+
+ /* The rest can only consist of white space. */
+ while (*after_num != '\0')
+ {
+ if (!isspace(*after_num))
+ {
+ fprintf(stderr, _("%s: invalid transfer rate \"%s\"\n"), progname, src);
+ exit(1);
+ }
+ after_num++;
+ }
+
+ /* Some checks only make sense when we know than the suffix is correct. */
+ if (result <= 0)
+ {
+ /*
+ * Reject obviously wrong values here. Exact check of the range
+ * to be done on server.
+ */
+ fprintf(stderr, _("%s: transfer must be greater than zero\n"), progname);
+ exit(1);
+ }
+ if (errno_copy == ERANGE || result != (uint64) ((uint32) result))
+ {
+ fprintf(stderr, _("%s: transfer rate %s exceeds integer range\n"), progname, src);
+ exit(1);
+ }
+
+ if (factor > 1)
+ {
+ result *= factor;
+ /* Check the integer range once again. */
+ if (result != (uint64) ((uint32) result))
+ {
+ fprintf(stderr, _("%s: transfer rate %s exceeds integer range\n"), progname, src);
+ exit(1);
+ }
+ }
+ return (uint32) result;
+}
+
+
/*
* Write a piece of tar data
*/
@@ -1304,6 +1397,7 @@ BaseBackup(void)
uint32 starttli;
char current_path[MAXPGPATH];
char escaped_label[MAXPGPATH];
+ char maxrate_clause[MAXPGPATH];
int i;
char xlogstart[64];
char xlogend[64];
@@ -1376,13 +1470,18 @@ BaseBackup(void)
* Start the actual backup
*/
PQescapeStringConn(conn, escaped_label, label, sizeof(escaped_label), &i);
+ if (maxrate > 0)
+ snprintf(maxrate_clause, sizeof(maxrate_clause), "MAX_RATE %u", maxrate);
+ else
+ maxrate_clause[0] = '\0';
snprintf(current_path, sizeof(current_path),
- "BASE_BACKUP LABEL '%s' %s %s %s %s",
+ "BASE_BACKUP LABEL '%s' %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
- includewal ? "NOWAIT" : "");
+ includewal ? "NOWAIT" : "",
+ maxrate_clause);
if (PQsendQuery(conn, current_path) == 0)
{
@@ -1651,6 +1750,7 @@ main(int argc, char **argv)
{"pgdata", required_argument, NULL, 'D'},
{"format", required_argument, NULL, 'F'},
{"checkpoint", required_argument, NULL, 'c'},
+ {"max-rate", required_argument, NULL, 'r'},
{"write-recovery-conf", no_argument, NULL, 'R'},
{"xlog", no_argument, NULL, 'x'},
{"xlog-method", required_argument, NULL, 'X'},
@@ -1690,7 +1790,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "D:F:RxX:l:zZ:d:c:h:p:U:s:wWvP",
+ while ((c = getopt_long(argc, argv, "D:F:r:RxX:l:zZ:d:c:h:p:U:s:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
@@ -1711,6 +1811,9 @@ main(int argc, char **argv)
exit(1);
}
break;
+ case 'r':
+ maxrate = parse_max_rate(optarg);
+ break;
case 'R':
writerecoveryconf = true;
break;
Hi,
On 2013-09-03 14:35:18 +0200, Antonin Houska wrote:
+ /* + * THROTTLING_SAMPLE_MIN / MAX_RATE_LOWER (in seconds) should be the + * longest possible time to sleep. + */ + pg_usleep((long) sleep); + else + + /* + * The actual transfer rate is below the limit. Negative value would + * distort the adjustment of throttled_last. + */ + sleep = 0; + + /* + * Only the whole multiples of throttling_sample processed. The rest will + * be done during the next call of this function. + */ + throttling_counter %= throttling_sample; + /* Once the (possible) sleep ends, new period starts. */ + throttled_last += elapsed + sleep; +}
It's probably better to use latches for the waiting, those have properly
defined interruption semantics. Whether pg_usleep will be interrupted is
platform dependant...
Greetings,
Andres Freund
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Antonin Houska wrote:
+ <para> + Suffixes <literal>k</literal> (kilobytes) and <literal>m</literal> + (megabytes) are accepted. For example: <literal>10m</literal> + </para>
"m" is for meters, or milli. Please use "M" here.
+static uint32 +parse_max_rate(char *src) +{ + int factor; + char *after_num; + int64 result; + int errno_copy; + + result = strtol(src, &after_num, 0); + errno_copy = errno; + if (src == after_num) + { + fprintf(stderr, _("%s: transfer rate %s is not a valid integer value\n"), progname, src); + exit(1); + }
Please add quotes to the invalid value.
+ + /* + * Evaluate (optional) suffix. + * + * after_num should now be right behind the numeric value. + */ + factor = 1; + switch (tolower(*after_num)) + { + /* + * Only the following suffixes are allowed. It's not too useful to + * restrict the rate to gigabytes: such a rate will probably bring + * significant impact on the master anyway, so the throttling + * won't help much. + */ + case 'g': + factor <<= 10;
I don't understand why you allow a 'g' here, given the comment above ...
but in any case it should be G.
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 09/03/2013 06:56 PM, Alvaro Herrera wrote:
+ /* + * Only the following suffixes are allowed. It's not too useful to + * restrict the rate to gigabytes: such a rate will probably bring + * significant impact on the master anyway, so the throttling + * won't help much. + */ + case 'g': + factor <<= 10;I don't understand why you allow a 'g' here, given the comment above ...
but in any case it should be G.
This reflects my hesitation whether GB should be accepted as a unit or
not. I'll probably remove this suffix.
(Will fix the other findings too,)
Thanks,
Tony
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 2013-09-03 12:56:53 -0400, Alvaro Herrera wrote:
Antonin Houska wrote:
+ <para> + Suffixes <literal>k</literal> (kilobytes) and <literal>m</literal> + (megabytes) are accepted. For example: <literal>10m</literal> + </para>"m" is for meters, or milli. Please use "M" here.
Shouldn't it be MB? Consistent with GUCs?
Greetings,
Andres Freund
--
Andres Freund http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Tue, Sep 3, 2013 at 8:35 AM, Antonin Houska <antonin.houska@gmail.com> wrote:
On 07/24/2013 09:20 AM, Antonin Houska wrote:
Hello,
the purpose of this patch is to limit impact of pg_backup on running
server.Attached is a new version. Server-side implementation this time.
Antonin Houska (Tony)
There seem to be several review comments made since you posted this
version. I'll mark this Waiting on Author in the CommitFest
application, since it seems that the patch needs to be further
updated.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 10/09/2013 08:56 PM, Robert Haas wrote:
There seem to be several review comments made since you posted this
version. I'll mark this Waiting on Author in the CommitFest
application, since it seems that the patch needs to be further
updated.
Since it was waiting for reviewer, I was not sure whether I should wait
for his findings and fix everything in a single transaction.
Nevertheless, the next version is attached here.
It fixes findings reported in
/messages/by-id/20130903165652.GC5227@eldon.alvh.no-ip.org
As for units
/messages/by-id/20130903224127.GD7018@awork2.anarazel.de
I'm not convinced about "MB" at the moment. Unfortunately I couldn't
find any other command-line PG utility requiring amount of data as an
option. Thus I use single-letter suffix, just as wget does for the same
purposes.
As for this
/messages/by-id/20130903125155.GA18486@awork2.anarazel.de
there eventually seems to be a consensus (I notice now the discussion
was off-list):
On 2013-09-03 23:21:57 +0200, Antonin Houska wrote:
On 09/03/2013 02:51 PM, Andres Freund wrote:
It's probably better to use latches for the waiting, those have properly
defined interruption semantics. Whether pg_usleep will be interrupted is
platform dependant...I noticed a comment about interruptions around the definition of
pg_usleep() function, but concluded that the sleeps are rather frequent
in this applications (typically in the order of tens to hundreds per
second, although the maximum of 256 might need to be decreased).
Therefore occasional interruptions shouldn't distort the actual rate
much. I'll think about it again. Thanks,The issue is rather that you might not get woken up when you want to
be. Signal handlers in postgres tend to do a
SetLatch(&MyProc->procLatch); which then will interrupt sleeps done via
WaitLatch(). It's probably not that important with the duration of the
sleeps you have.Greetings,
Andres Freund
// Antonin Houska (Tony)
Attachments:
backup_throttling_v3.patchtext/x-patch; name=backup_throttling_v3.patchDownload
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index eb0c1d6..882a26b 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -189,6 +189,22 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>-r</option></term>
+ <term><option>--max-rate</option></term>
+ <listitem>
+ <para>
+ The maximum amount of data transferred from server per second.
+ The purpose is to limit impact of <application>pg_basebackup</application>
+ on a running master server while transfering data directory.
+ </para>
+ <para>
+ Suffixes <literal>k</literal> (kilobytes) and <literal>M</literal>
+ (megabytes) are accepted. For example: <literal>10M</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-R</option></term>
<term><option>--write-recovery-conf</option></term>
<listitem>
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index ba8d173..b2f10c3 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -33,6 +33,7 @@
#include "utils/builtins.h"
#include "utils/elog.h"
#include "utils/ps_status.h"
+#include "utils/timestamp.h"
#include "pgtar.h"
typedef struct
@@ -42,6 +43,7 @@ typedef struct
bool fastcheckpoint;
bool nowait;
bool includewal;
+ uint32 maxrate;
} basebackup_options;
@@ -59,6 +61,7 @@ static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
static void parse_basebackup_options(List *options, basebackup_options *opt);
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
static int compareWalFileNames(const void *a, const void *b);
+static void throttle(size_t increment);
/* Was the backup currently in-progress initiated in recovery mode? */
static bool backup_started_in_recovery = false;
@@ -68,6 +71,41 @@ static bool backup_started_in_recovery = false;
*/
#define TAR_SEND_SIZE 32768
+
+/*
+ * The maximum amount of data per second - bounds of the user input.
+ *
+ * If the maximum should be increased to more than 4 GB, uint64 must
+ * be introduced for the related variables. However such high values have
+ * little to do with throttling.
+ */
+#define MAX_RATE_LOWER 32768
+#define MAX_RATE_UPPER (1024 << 20)
+
+/*
+ * Transfer rate is only measured when this number of bytes has been sent.
+ * (Too frequent checks would impose too high CPU overhead.)
+ *
+ * The default value is used unless it'd result in too frequent checks.
+ */
+#define THROTTLING_SAMPLE_MIN 32768
+
+/* The maximum number of checks per second. */
+#define THROTTLING_MAX_FREQUENCY 128
+
+/* The actual value, transfer of which may cause sleep. */
+static uint32 throttling_sample;
+
+/* Amount of data already transfered but not yet throttled. */
+static int32 throttling_counter;
+
+/* The minimum time required to transfer throttling_sample bytes. */
+static int64 elapsed_min_unit;
+
+/* The last check of the transfer rate. */
+static int64 throttled_last;
+
+
typedef struct
{
char *oid;
@@ -171,6 +209,31 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* Send tablespace header */
SendBackupHeader(tablespaces);
+ if (opt->maxrate > 0)
+ {
+ throttling_sample = opt->maxrate / THROTTLING_MAX_FREQUENCY;
+
+ /* Don't measure too small pieces of data. */
+ if (throttling_sample < THROTTLING_SAMPLE_MIN)
+ throttling_sample = THROTTLING_SAMPLE_MIN;
+
+ /*
+ * opt->maxrate is bytes per seconds. Thus the expression in
+ * brackets is microseconds per byte.
+ */
+ elapsed_min_unit = throttling_sample *
+ ((double) USECS_PER_SEC / opt->maxrate);
+
+ /* Enable throttling. */
+ throttling_counter = 0;
+
+ /* The 'real data' starts now (header was ignored). */
+ throttled_last = GetCurrentIntegerTimestamp();
+ }
+ else
+ /* Disable throttling. */
+ throttling_counter = -1;
+
/* Send off our tablespaces one by one */
foreach(lc, tablespaces)
{
@@ -468,6 +531,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_fast = false;
bool o_nowait = false;
bool o_wal = false;
+ bool o_maxrate = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -519,6 +583,29 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->includewal = true;
o_wal = true;
}
+ else if (strcmp(defel->defname, "maxrate") == 0)
+ {
+ long maxrate;
+
+ if (o_maxrate)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ maxrate = intVal(defel->arg);
+
+ opt->maxrate = (uint32) maxrate;
+ if (opt->maxrate > 0 &&
+ (opt->maxrate < MAX_RATE_LOWER || opt->maxrate > MAX_RATE_UPPER))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("transfer rate %u bytes per second is out of range",
+ opt->maxrate),
+ errhint("The accepted range is %u through %u kB per second",
+ MAX_RATE_LOWER >> 10, MAX_RATE_UPPER >> 10)));
+ }
+ o_maxrate = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -1019,6 +1106,7 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
+ throttle(cnt);
if (len >= statbuf->st_size)
{
@@ -1040,10 +1128,14 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
cnt = Min(sizeof(buf), statbuf->st_size - len);
pq_putmessage('d', buf, cnt);
len += cnt;
+ throttle(cnt);
}
}
- /* Pad to 512 byte boundary, per tar format requirements */
+ /*
+ * Pad to 512 byte boundary, per tar format requirements. (This small
+ * piece of data is probably not worth throttling.)
+ */
pad = ((len + 511) & ~511) - len;
if (pad > 0)
{
@@ -1069,3 +1161,47 @@ _tarWriteHeader(const char *filename, const char *linktarget,
pq_putmessage('d', h, 512);
}
+
+static void
+throttle(size_t increment)
+{
+ int64 elapsed,
+ elapsed_min,
+ sleep;
+
+ if (throttling_counter < 0)
+ return;
+
+ throttling_counter += increment;
+ if (throttling_counter < throttling_sample)
+ return;
+
+ /* Time elapsed since the last measuring (and possible wake up). */
+ elapsed = GetCurrentIntegerTimestamp() - throttled_last;
+ /* How much should have elapsed at minimum? */
+ elapsed_min = elapsed_min_unit * (throttling_counter / throttling_sample);
+ sleep = elapsed_min - elapsed;
+ /* Only sleep if the transfer is faster than it should be. */
+ if (sleep > 0)
+
+ /*
+ * THROTTLING_SAMPLE_MIN / MAX_RATE_LOWER (in seconds) should be the
+ * longest possible time to sleep. Thus the cast to long is safe.
+ */
+ pg_usleep((long) sleep);
+ else
+
+ /*
+ * The actual transfer rate is below the limit. Negative value would
+ * distort the adjustment of throttled_last.
+ */
+ sleep = 0;
+
+ /*
+ * Only the whole multiples of throttling_sample processed. The rest will
+ * be done during the next call of this function.
+ */
+ throttling_counter %= throttling_sample;
+ /* Once the (possible) sleep ends, new period starts. */
+ throttled_last += elapsed + sleep;
+}
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 8c83780..1c2c31c 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -78,6 +78,7 @@ Node *replication_parse_result;
%token K_PROGRESS
%token K_FAST
%token K_NOWAIT
+%token K_MAX_RATE
%token K_WAL
%token K_TIMELINE
@@ -116,7 +117,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -156,6 +157,11 @@ base_backup_opt:
$$ = makeDefElem("nowait",
(Node *)makeInteger(TRUE));
}
+ | K_MAX_RATE UCONST
+ {
+ $$ = makeDefElem("maxrate",
+ (Node *)makeInteger($2));
+ }
;
/*
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 3d930f1..b2d5e3b 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -71,6 +71,7 @@ IDENTIFY_SYSTEM { return K_IDENTIFY_SYSTEM; }
LABEL { return K_LABEL; }
NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
+MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 94b2a36..056b234 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -1288,7 +1288,7 @@ GetCurrentTimestamp(void)
/*
* GetCurrentIntegerTimestamp -- get the current operating system time as int64
*
- * Result is the number of milliseconds since the Postgres epoch. If compiled
+ * Result is the number of microseconds since the Postgres epoch. If compiled
* with --enable-integer-datetimes, this is identical to GetCurrentTimestamp(),
* and is implemented as a macro.
*/
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index a1e12a8..5e0b361 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -45,6 +45,7 @@ bool streamwal = false;
bool fastcheckpoint = false;
bool writerecoveryconf = false;
int standby_message_timeout = 10 * 1000; /* 10 sec = default */
+uint32 maxrate = 0; /* No limit by default. */
/* Progress counters */
static uint64 totalsize;
@@ -75,6 +76,7 @@ static PQExpBuffer recoveryconfcontents = NULL;
static void usage(void);
static void verify_dir_is_empty_or_create(char *dirname);
static void progress_report(int tablespacenum, const char *filename);
+static uint32 parse_max_rate(char *src);
static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum);
static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
@@ -110,6 +112,7 @@ usage(void)
printf(_("\nOptions controlling the output:\n"));
printf(_(" -D, --pgdata=DIRECTORY receive base backup into directory\n"));
printf(_(" -F, --format=p|t output format (plain (default), tar)\n"));
+ printf(_(" -r, --max-rate maximum transfer rate to transfer data directory\n"));
printf(_(" -R, --write-recovery-conf\n"
" write recovery.conf after backup\n"));
printf(_(" -x, --xlog include required WAL files in backup (fetch mode)\n"));
@@ -474,6 +477,94 @@ progress_report(int tablespacenum, const char *filename)
}
+static uint32
+parse_max_rate(char *src)
+{
+ int factor;
+ char *after_num;
+ int64 result;
+ int errno_copy;
+
+ result = strtol(src, &after_num, 0);
+ errno_copy = errno;
+ if (src == after_num)
+ {
+ fprintf(stderr, _("%s: transfer rate \"%s\" is not a valid integer value\n"), progname, src);
+ exit(1);
+ }
+
+
+ /*
+ * Evaluate (optional) suffix.
+ *
+ * after_num should now be right behind the numeric value.
+ */
+ factor = 1;
+ switch (*after_num)
+ {
+ /*
+ * Only the following suffixes are allowed. It's not too useful to
+ * restrict the rate to gigabytes: such a rate will probably bring
+ * significant impact on the master anyway, so the throttling
+ * won't help much.
+ */
+ case 'M':
+ factor <<= 10;
+ case 'k':
+ factor <<= 10;
+ after_num++;
+ break;
+
+ default:
+
+ /*
+ * If there's no suffix, byte is the unit. Possible invalid letter
+ * will make conversion to integer fail.
+ */
+ break;
+ }
+
+ /* The rest can only consist of white space. */
+ while (*after_num != '\0')
+ {
+ if (!isspace(*after_num))
+ {
+ fprintf(stderr, _("%s: invalid transfer rate \"%s\"\n"), progname, src);
+ exit(1);
+ }
+ after_num++;
+ }
+
+ /* Some checks only make sense when we know than the suffix is correct. */
+ if (result <= 0)
+ {
+ /*
+ * Reject obviously wrong values here. Exact check of the range to be
+ * done on server.
+ */
+ fprintf(stderr, _("%s: transfer must be greater than zero\n"), progname);
+ exit(1);
+ }
+ if (errno_copy == ERANGE || result != (uint64) ((uint32) result))
+ {
+ fprintf(stderr, _("%s: transfer rate \"%s\" exceeds integer range\n"), progname, src);
+ exit(1);
+ }
+
+ if (factor > 1)
+ {
+ result *= factor;
+ /* Check the integer range once again. */
+ if (result != (uint64) ((uint32) result))
+ {
+ fprintf(stderr, _("%s: transfer rate \"%s\" exceeds integer range\n"), progname, src);
+ exit(1);
+ }
+ }
+ return (uint32) result;
+}
+
+
/*
* Write a piece of tar data
*/
@@ -1304,6 +1395,7 @@ BaseBackup(void)
uint32 starttli;
char current_path[MAXPGPATH];
char escaped_label[MAXPGPATH];
+ char maxrate_clause[MAXPGPATH];
int i;
char xlogstart[64];
char xlogend[64];
@@ -1376,13 +1468,18 @@ BaseBackup(void)
* Start the actual backup
*/
PQescapeStringConn(conn, escaped_label, label, sizeof(escaped_label), &i);
+ if (maxrate > 0)
+ snprintf(maxrate_clause, sizeof(maxrate_clause), "MAX_RATE %u", maxrate);
+ else
+ maxrate_clause[0] = '\0';
snprintf(current_path, sizeof(current_path),
- "BASE_BACKUP LABEL '%s' %s %s %s %s",
+ "BASE_BACKUP LABEL '%s' %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
- includewal ? "NOWAIT" : "");
+ includewal ? "NOWAIT" : "",
+ maxrate_clause);
if (PQsendQuery(conn, current_path) == 0)
{
@@ -1651,6 +1748,7 @@ main(int argc, char **argv)
{"pgdata", required_argument, NULL, 'D'},
{"format", required_argument, NULL, 'F'},
{"checkpoint", required_argument, NULL, 'c'},
+ {"max-rate", required_argument, NULL, 'r'},
{"write-recovery-conf", no_argument, NULL, 'R'},
{"xlog", no_argument, NULL, 'x'},
{"xlog-method", required_argument, NULL, 'X'},
@@ -1690,7 +1788,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "D:F:RxX:l:zZ:d:c:h:p:U:s:wWvP",
+ while ((c = getopt_long(argc, argv, "D:F:r:RxX:l:zZ:d:c:h:p:U:s:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
@@ -1711,6 +1809,9 @@ main(int argc, char **argv)
exit(1);
}
break;
+ case 'r':
+ maxrate = parse_max_rate(optarg);
+ break;
case 'R':
writerecoveryconf = true;
break;
Hi,
I am reviewing your patch.
2013-10-10 15:32 keltez�ssel, Antonin Houska �rta:
On 10/09/2013 08:56 PM, Robert Haas wrote:
There seem to be several review comments made since you posted this
version. I'll mark this Waiting on Author in the CommitFest
application, since it seems that the patch needs to be further
updated.Since it was waiting for reviewer, I was not sure whether I should wait
for his findings and fix everything in a single transaction.
Nevertheless, the next version is attached here.It fixes findings reported in
/messages/by-id/20130903165652.GC5227@eldon.alvh.no-ip.orgAs for units
/messages/by-id/20130903224127.GD7018@awork2.anarazel.de
I'm not convinced about "MB" at the moment. Unfortunately I couldn't
find any other command-line PG utility requiring amount of data as an
option. Thus I use single-letter suffix, just as wget does for the same
purposes.As for this
/messages/by-id/20130903125155.GA18486@awork2.anarazel.de
there eventually seems to be a consensus (I notice now the discussion
was off-list):On 2013-09-03 23:21:57 +0200, Antonin Houska wrote:
On 09/03/2013 02:51 PM, Andres Freund wrote:
It's probably better to use latches for the waiting, those have properly
defined interruption semantics. Whether pg_usleep will be interrupted is
platform dependant...I noticed a comment about interruptions around the definition of
pg_usleep() function, but concluded that the sleeps are rather frequent
in this applications (typically in the order of tens to hundreds per
second, although the maximum of 256 might need to be decreased).
Therefore occasional interruptions shouldn't distort the actual rate
much. I'll think about it again. Thanks,The issue is rather that you might not get woken up when you want to
be. Signal handlers in postgres tend to do a
SetLatch(&MyProc->procLatch); which then will interrupt sleeps done via
WaitLatch(). It's probably not that important with the duration of the
sleeps you have.Greetings,
Andres Freund
// Antonin Houska (Tony)
* Is the patch in a patch format which has context?
Yes
* Does it apply cleanly to the current git master?
It applies with some offsets. Version "3a" that applies cleanly is attached.
* Does it include reasonable tests, necessary doc patches, etc?
Docs: yes. Tests: N/A
* Does the patch actually implement what it advertises?
Yes.
* Do we want that?
Yes.
* Do we already have it?
No.
* Does it follow SQL spec, or the community-agreed behavior?
No such SQL spec. The latest patch fixed all previously raised comments.
* Does it include pg_dump support (if applicable)?
N/A
* Are there dangers?
Yes, the "danger" of slowing down taking a base backup.
But this is the point.
* Have all the bases been covered?
Yes.
* Does the feature work as advertised?
Yes.
* Are there corner cases the author has failed to consider?
No.
* Are there any assertion failures or crashes?
No.
* Does the patch slow down simple tests?
No.
* If it claims to improve performance, does it?
N/A
* Does it slow down other things?
No.
* Does it follow the project coding guidelines?
Yes. A nitpicking: this else branch below might need brackets
because there is also a comment in that branch:
+ /* The 'real data' starts now (header was ignored). */
+ throttled_last = GetCurrentIntegerTimestamp();
+ }
+ else
+ /* Disable throttling. */
+ throttling_counter = -1;
+
* Are there portability issues?
No.
* Will it work on Windows/BSD etc?
It should, there are no dangerous calls besides the above mentioned
pg_usleep(). But waking up from pg_usleep() earlier makes rate limiting
fluctuate slightly, not fail.
* Are the comments sufficient and accurate?
Yes.
* Does it do what it says, correctly?
Yes.
Although it should be mentioned in the docs that rate limiting
applies to walsenders individually, not globally. I tried this
on a freshly created database:
$ time pg_basebackup -D data2 -r 1M -X stream -h 127.0.0.1
real 0m26.508s
user 0m0.054s
sys 0m0.360s
The source database had 3 WAL files in pg_xlog, one of them was
also streamed. The final size of "data2" was 43MB or 26MB without pg_xlog,
i.e. without the "-X stream" option. The backup used 2 walsenders
in parallel (one for WAL) which is a known feature.
* Does it produce compiler warnings?
No.
*Can you make it crash?
No.
Consider the changes to the code in the context of the project as a whole:
* Is everything done in a way that fits together coherently with other features/modules?
Yes.
* Are there interdependencies that can cause problems?
No.
Another note. This chunk should be submitted separately as a comment bugfix:
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index c3c71b7..5736fd8 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -1288,7 +1288,7 @@ GetCurrentTimestamp(void)
/*
* GetCurrentIntegerTimestamp -- get the current operating system time as int64
*
- * Result is the number of milliseconds since the Postgres epoch. If compiled
+ * Result is the number of microseconds since the Postgres epoch. If compiled
* with --enable-integer-datetimes, this is identical to GetCurrentTimestamp(),
* and is implemented as a macro.
*/
Best regards,
Zolt�n B�sz�rm�nyi
--
----------------------------------
Zolt�n B�sz�rm�nyi
Cybertec Sch�nig & Sch�nig GmbH
Gr�hrm�hlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
http://www.postgresql.at/
Attachments:
backup_throttling_v3a.patchtext/x-patch; name=backup_throttling_v3a.patchDownload
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index c379df5..73bb999 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -189,6 +189,22 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>-r</option></term>
+ <term><option>--max-rate</option></term>
+ <listitem>
+ <para>
+ The maximum amount of data transferred from server per second.
+ The purpose is to limit impact of <application>pg_basebackup</application>
+ on a running master server while transfering data directory.
+ </para>
+ <para>
+ Suffixes <literal>k</literal> (kilobytes) and <literal>M</literal>
+ (megabytes) are accepted. For example: <literal>10M</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-R</option></term>
<term><option>--write-recovery-conf</option></term>
<listitem>
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index ba8d173..b2f10c3 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -33,6 +33,7 @@
#include "utils/builtins.h"
#include "utils/elog.h"
#include "utils/ps_status.h"
+#include "utils/timestamp.h"
#include "pgtar.h"
typedef struct
@@ -42,6 +43,7 @@ typedef struct
bool fastcheckpoint;
bool nowait;
bool includewal;
+ uint32 maxrate;
} basebackup_options;
@@ -59,6 +61,7 @@ static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
static void parse_basebackup_options(List *options, basebackup_options *opt);
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
static int compareWalFileNames(const void *a, const void *b);
+static void throttle(size_t increment);
/* Was the backup currently in-progress initiated in recovery mode? */
static bool backup_started_in_recovery = false;
@@ -68,6 +71,41 @@ static bool backup_started_in_recovery = false;
*/
#define TAR_SEND_SIZE 32768
+
+/*
+ * The maximum amount of data per second - bounds of the user input.
+ *
+ * If the maximum should be increased to more than 4 GB, uint64 must
+ * be introduced for the related variables. However such high values have
+ * little to do with throttling.
+ */
+#define MAX_RATE_LOWER 32768
+#define MAX_RATE_UPPER (1024 << 20)
+
+/*
+ * Transfer rate is only measured when this number of bytes has been sent.
+ * (Too frequent checks would impose too high CPU overhead.)
+ *
+ * The default value is used unless it'd result in too frequent checks.
+ */
+#define THROTTLING_SAMPLE_MIN 32768
+
+/* The maximum number of checks per second. */
+#define THROTTLING_MAX_FREQUENCY 128
+
+/* The actual value, transfer of which may cause sleep. */
+static uint32 throttling_sample;
+
+/* Amount of data already transfered but not yet throttled. */
+static int32 throttling_counter;
+
+/* The minimum time required to transfer throttling_sample bytes. */
+static int64 elapsed_min_unit;
+
+/* The last check of the transfer rate. */
+static int64 throttled_last;
+
+
typedef struct
{
char *oid;
@@ -171,6 +209,31 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* Send tablespace header */
SendBackupHeader(tablespaces);
+ if (opt->maxrate > 0)
+ {
+ throttling_sample = opt->maxrate / THROTTLING_MAX_FREQUENCY;
+
+ /* Don't measure too small pieces of data. */
+ if (throttling_sample < THROTTLING_SAMPLE_MIN)
+ throttling_sample = THROTTLING_SAMPLE_MIN;
+
+ /*
+ * opt->maxrate is bytes per seconds. Thus the expression in
+ * brackets is microseconds per byte.
+ */
+ elapsed_min_unit = throttling_sample *
+ ((double) USECS_PER_SEC / opt->maxrate);
+
+ /* Enable throttling. */
+ throttling_counter = 0;
+
+ /* The 'real data' starts now (header was ignored). */
+ throttled_last = GetCurrentIntegerTimestamp();
+ }
+ else
+ /* Disable throttling. */
+ throttling_counter = -1;
+
/* Send off our tablespaces one by one */
foreach(lc, tablespaces)
{
@@ -468,6 +531,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_fast = false;
bool o_nowait = false;
bool o_wal = false;
+ bool o_maxrate = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -519,6 +583,29 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->includewal = true;
o_wal = true;
}
+ else if (strcmp(defel->defname, "maxrate") == 0)
+ {
+ long maxrate;
+
+ if (o_maxrate)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ maxrate = intVal(defel->arg);
+
+ opt->maxrate = (uint32) maxrate;
+ if (opt->maxrate > 0 &&
+ (opt->maxrate < MAX_RATE_LOWER || opt->maxrate > MAX_RATE_UPPER))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("transfer rate %u bytes per second is out of range",
+ opt->maxrate),
+ errhint("The accepted range is %u through %u kB per second",
+ MAX_RATE_LOWER >> 10, MAX_RATE_UPPER >> 10)));
+ }
+ o_maxrate = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -1019,6 +1106,7 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
+ throttle(cnt);
if (len >= statbuf->st_size)
{
@@ -1040,10 +1128,14 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
cnt = Min(sizeof(buf), statbuf->st_size - len);
pq_putmessage('d', buf, cnt);
len += cnt;
+ throttle(cnt);
}
}
- /* Pad to 512 byte boundary, per tar format requirements */
+ /*
+ * Pad to 512 byte boundary, per tar format requirements. (This small
+ * piece of data is probably not worth throttling.)
+ */
pad = ((len + 511) & ~511) - len;
if (pad > 0)
{
@@ -1069,3 +1161,47 @@ _tarWriteHeader(const char *filename, const char *linktarget,
pq_putmessage('d', h, 512);
}
+
+static void
+throttle(size_t increment)
+{
+ int64 elapsed,
+ elapsed_min,
+ sleep;
+
+ if (throttling_counter < 0)
+ return;
+
+ throttling_counter += increment;
+ if (throttling_counter < throttling_sample)
+ return;
+
+ /* Time elapsed since the last measuring (and possible wake up). */
+ elapsed = GetCurrentIntegerTimestamp() - throttled_last;
+ /* How much should have elapsed at minimum? */
+ elapsed_min = elapsed_min_unit * (throttling_counter / throttling_sample);
+ sleep = elapsed_min - elapsed;
+ /* Only sleep if the transfer is faster than it should be. */
+ if (sleep > 0)
+
+ /*
+ * THROTTLING_SAMPLE_MIN / MAX_RATE_LOWER (in seconds) should be the
+ * longest possible time to sleep. Thus the cast to long is safe.
+ */
+ pg_usleep((long) sleep);
+ else
+
+ /*
+ * The actual transfer rate is below the limit. Negative value would
+ * distort the adjustment of throttled_last.
+ */
+ sleep = 0;
+
+ /*
+ * Only the whole multiples of throttling_sample processed. The rest will
+ * be done during the next call of this function.
+ */
+ throttling_counter %= throttling_sample;
+ /* Once the (possible) sleep ends, new period starts. */
+ throttled_last += elapsed + sleep;
+}
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 8c83780..1c2c31c 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -78,6 +78,7 @@ Node *replication_parse_result;
%token K_PROGRESS
%token K_FAST
%token K_NOWAIT
+%token K_MAX_RATE
%token K_WAL
%token K_TIMELINE
@@ -116,7 +117,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -156,6 +157,11 @@ base_backup_opt:
$$ = makeDefElem("nowait",
(Node *)makeInteger(TRUE));
}
+ | K_MAX_RATE UCONST
+ {
+ $$ = makeDefElem("maxrate",
+ (Node *)makeInteger($2));
+ }
;
/*
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 3d930f1..b2d5e3b 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -71,6 +71,7 @@ IDENTIFY_SYSTEM { return K_IDENTIFY_SYSTEM; }
LABEL { return K_LABEL; }
NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
+MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index c3c71b7..5736fd8 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -1288,7 +1288,7 @@ GetCurrentTimestamp(void)
/*
* GetCurrentIntegerTimestamp -- get the current operating system time as int64
*
- * Result is the number of milliseconds since the Postgres epoch. If compiled
+ * Result is the number of microseconds since the Postgres epoch. If compiled
* with --enable-integer-datetimes, this is identical to GetCurrentTimestamp(),
* and is implemented as a macro.
*/
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 6706c0c..7a86cd7 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -46,6 +46,7 @@ bool streamwal = false;
bool fastcheckpoint = false;
bool writerecoveryconf = false;
int standby_message_timeout = 10 * 1000; /* 10 sec = default */
+uint32 maxrate = 0; /* No limit by default. */
/* Progress counters */
static uint64 totalsize;
@@ -76,6 +77,7 @@ static PQExpBuffer recoveryconfcontents = NULL;
static void usage(void);
static void verify_dir_is_empty_or_create(char *dirname);
static void progress_report(int tablespacenum, const char *filename);
+static uint32 parse_max_rate(char *src);
static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum);
static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
@@ -111,6 +113,7 @@ usage(void)
printf(_("\nOptions controlling the output:\n"));
printf(_(" -D, --pgdata=DIRECTORY receive base backup into directory\n"));
printf(_(" -F, --format=p|t output format (plain (default), tar)\n"));
+ printf(_(" -r, --max-rate maximum transfer rate to transfer data directory\n"));
printf(_(" -R, --write-recovery-conf\n"
" write recovery.conf after backup\n"));
printf(_(" -x, --xlog include required WAL files in backup (fetch mode)\n"));
@@ -476,6 +479,94 @@ progress_report(int tablespacenum, const char *filename)
}
+static uint32
+parse_max_rate(char *src)
+{
+ int factor;
+ char *after_num;
+ int64 result;
+ int errno_copy;
+
+ result = strtol(src, &after_num, 0);
+ errno_copy = errno;
+ if (src == after_num)
+ {
+ fprintf(stderr, _("%s: transfer rate \"%s\" is not a valid integer value\n"), progname, src);
+ exit(1);
+ }
+
+
+ /*
+ * Evaluate (optional) suffix.
+ *
+ * after_num should now be right behind the numeric value.
+ */
+ factor = 1;
+ switch (*after_num)
+ {
+ /*
+ * Only the following suffixes are allowed. It's not too useful to
+ * restrict the rate to gigabytes: such a rate will probably bring
+ * significant impact on the master anyway, so the throttling
+ * won't help much.
+ */
+ case 'M':
+ factor <<= 10;
+ case 'k':
+ factor <<= 10;
+ after_num++;
+ break;
+
+ default:
+
+ /*
+ * If there's no suffix, byte is the unit. Possible invalid letter
+ * will make conversion to integer fail.
+ */
+ break;
+ }
+
+ /* The rest can only consist of white space. */
+ while (*after_num != '\0')
+ {
+ if (!isspace(*after_num))
+ {
+ fprintf(stderr, _("%s: invalid transfer rate \"%s\"\n"), progname, src);
+ exit(1);
+ }
+ after_num++;
+ }
+
+ /* Some checks only make sense when we know than the suffix is correct. */
+ if (result <= 0)
+ {
+ /*
+ * Reject obviously wrong values here. Exact check of the range to be
+ * done on server.
+ */
+ fprintf(stderr, _("%s: transfer must be greater than zero\n"), progname);
+ exit(1);
+ }
+ if (errno_copy == ERANGE || result != (uint64) ((uint32) result))
+ {
+ fprintf(stderr, _("%s: transfer rate \"%s\" exceeds integer range\n"), progname, src);
+ exit(1);
+ }
+
+ if (factor > 1)
+ {
+ result *= factor;
+ /* Check the integer range once again. */
+ if (result != (uint64) ((uint32) result))
+ {
+ fprintf(stderr, _("%s: transfer rate \"%s\" exceeds integer range\n"), progname, src);
+ exit(1);
+ }
+ }
+ return (uint32) result;
+}
+
+
/*
* Write a piece of tar data
*/
@@ -1310,6 +1401,7 @@ BaseBackup(void)
uint32 starttli;
char current_path[MAXPGPATH];
char escaped_label[MAXPGPATH];
+ char maxrate_clause[MAXPGPATH];
int i;
char xlogstart[64];
char xlogend[64];
@@ -1382,13 +1474,18 @@ BaseBackup(void)
* Start the actual backup
*/
PQescapeStringConn(conn, escaped_label, label, sizeof(escaped_label), &i);
+ if (maxrate > 0)
+ snprintf(maxrate_clause, sizeof(maxrate_clause), "MAX_RATE %u", maxrate);
+ else
+ maxrate_clause[0] = '\0';
snprintf(current_path, sizeof(current_path),
- "BASE_BACKUP LABEL '%s' %s %s %s %s",
+ "BASE_BACKUP LABEL '%s' %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
- includewal ? "NOWAIT" : "");
+ includewal ? "NOWAIT" : "",
+ maxrate_clause);
if (PQsendQuery(conn, current_path) == 0)
{
@@ -1657,6 +1754,7 @@ main(int argc, char **argv)
{"pgdata", required_argument, NULL, 'D'},
{"format", required_argument, NULL, 'F'},
{"checkpoint", required_argument, NULL, 'c'},
+ {"max-rate", required_argument, NULL, 'r'},
{"write-recovery-conf", no_argument, NULL, 'R'},
{"xlog", no_argument, NULL, 'x'},
{"xlog-method", required_argument, NULL, 'X'},
@@ -1697,7 +1795,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "D:F:RxX:l:zZ:d:c:h:p:U:s:wWvP",
+ while ((c = getopt_long(argc, argv, "D:F:r:RxX:l:zZ:d:c:h:p:U:s:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
@@ -1718,6 +1816,9 @@ main(int argc, char **argv)
exit(1);
}
break;
+ case 'r':
+ maxrate = parse_max_rate(optarg);
+ break;
case 'R':
writerecoveryconf = true;
break;
On 12/02/2013 02:23 PM, Boszormenyi Zoltan wrote:
Hi,
I am reviewing your patch.
Thanks. New version attached.
* Does it follow the project coding guidelines?
Yes. A nitpicking: this else branch below might need brackets
because there is also a comment in that branch:+ /* The 'real data' starts now (header was ignored). */ + throttled_last = GetCurrentIntegerTimestamp(); + } + else + /* Disable throttling. */ + throttling_counter = -1; +
Done.
* Does it do what it says, correctly?
Yes.
Although it should be mentioned in the docs that rate limiting
applies to walsenders individually, not globally. I tried this
on a freshly created database:$ time pg_basebackup -D data2 -r 1M -X stream -h 127.0.0.1
real 0m26.508s
user 0m0.054s
sys 0m0.360sThe source database had 3 WAL files in pg_xlog, one of them was
also streamed. The final size of "data2" was 43MB or 26MB without pg_xlog,
i.e. without the "-X stream" option. The backup used 2 walsenders
in parallel (one for WAL) which is a known feature.
Right, if the method is 'stream', a separate WAL sender is used and the
transfer is not limited: client must keep up with the stream
unconditionally. I added a note to documentation.
But there's no reason not to throttle WAL files if the method is
'fetch'. It's fixed now.
Another note. This chunk should be submitted separately as a comment bugfix:
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index c3c71b7..5736fd8 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -1288,7 +1288,7 @@ GetCurrentTimestamp(void) /* * GetCurrentIntegerTimestamp -- get the current operating system time as int64 * - * Result is the number of milliseconds since the Postgres epoch. If compiled + * Result is the number of microseconds since the Postgres epoch. If compiled * with --enable-integer-datetimes, this is identical to GetCurrentTimestamp(), * and is implemented as a macro. */
Will do.
// Tony
Attachments:
backup_throttling_v4.patchtext/x-patch; name=backup_throttling_v4.patchDownload
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index c379df5..e878592 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -189,6 +189,26 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>-r</option></term>
+ <term><option>--max-rate</option></term>
+ <listitem>
+ <para>
+ The maximum amount of data transferred from server per second.
+ The purpose is to limit impact of <application>pg_basebackup</application>
+ on a running master server.
+ </para>
+ <para>
+ This option always affects transfer of the data directory. Transfer of
+ WAL files is only affected if the collection method is <literal>fetch</literal>.
+ </para>
+ <para>
+ Suffixes <literal>k</literal> (kilobytes) and <literal>M</literal>
+ (megabytes) are accepted. For example: <literal>10M</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-R</option></term>
<term><option>--write-recovery-conf</option></term>
<listitem>
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index ba8d173..f194746 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -33,6 +33,7 @@
#include "utils/builtins.h"
#include "utils/elog.h"
#include "utils/ps_status.h"
+#include "utils/timestamp.h"
#include "pgtar.h"
typedef struct
@@ -42,6 +43,7 @@ typedef struct
bool fastcheckpoint;
bool nowait;
bool includewal;
+ uint32 maxrate;
} basebackup_options;
@@ -59,6 +61,7 @@ static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
static void parse_basebackup_options(List *options, basebackup_options *opt);
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
static int compareWalFileNames(const void *a, const void *b);
+static void throttle(size_t increment);
/* Was the backup currently in-progress initiated in recovery mode? */
static bool backup_started_in_recovery = false;
@@ -68,6 +71,41 @@ static bool backup_started_in_recovery = false;
*/
#define TAR_SEND_SIZE 32768
+
+/*
+ * The maximum amount of data per second - bounds of the user input.
+ *
+ * If the maximum should be increased to more than 4 GB, uint64 must
+ * be introduced for the related variables. However such high values have
+ * little to do with throttling.
+ */
+#define MAX_RATE_LOWER 32768
+#define MAX_RATE_UPPER (1024 << 20)
+
+/*
+ * Transfer rate is only measured when this number of bytes has been sent.
+ * (Too frequent checks would impose too high CPU overhead.)
+ *
+ * The default value is used unless it'd result in too frequent checks.
+ */
+#define THROTTLING_SAMPLE_MIN 32768
+
+/* The maximum number of checks per second. */
+#define THROTTLING_MAX_FREQUENCY 128
+
+/* The actual value, transfer of which may cause sleep. */
+static uint32 throttling_sample;
+
+/* Amount of data already transfered but not yet throttled. */
+static int32 throttling_counter;
+
+/* The minimum time required to transfer throttling_sample bytes. */
+static int64 elapsed_min_unit;
+
+/* The last check of the transfer rate. */
+static int64 throttled_last;
+
+
typedef struct
{
char *oid;
@@ -171,6 +209,33 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* Send tablespace header */
SendBackupHeader(tablespaces);
+ if (opt->maxrate > 0)
+ {
+ throttling_sample = opt->maxrate / THROTTLING_MAX_FREQUENCY;
+
+ /* Don't measure too small pieces of data. */
+ if (throttling_sample < THROTTLING_SAMPLE_MIN)
+ throttling_sample = THROTTLING_SAMPLE_MIN;
+
+ /*
+ * opt->maxrate is bytes per seconds. Thus the expression in
+ * brackets is microseconds per byte.
+ */
+ elapsed_min_unit = throttling_sample *
+ ((double) USECS_PER_SEC / opt->maxrate);
+
+ /* Enable throttling. */
+ throttling_counter = 0;
+
+ /* The 'real data' starts now (header was ignored). */
+ throttled_last = GetCurrentIntegerTimestamp();
+ }
+ else
+ {
+ /* Disable throttling. */
+ throttling_counter = -1;
+ }
+
/* Send off our tablespaces one by one */
foreach(lc, tablespaces)
{
@@ -398,6 +463,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
+ throttle(cnt);
+
if (len == XLogSegSize)
break;
}
@@ -468,6 +535,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_fast = false;
bool o_nowait = false;
bool o_wal = false;
+ bool o_maxrate = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -519,6 +587,29 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->includewal = true;
o_wal = true;
}
+ else if (strcmp(defel->defname, "maxrate") == 0)
+ {
+ long maxrate;
+
+ if (o_maxrate)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ maxrate = intVal(defel->arg);
+
+ opt->maxrate = (uint32) maxrate;
+ if (opt->maxrate > 0 &&
+ (opt->maxrate < MAX_RATE_LOWER || opt->maxrate > MAX_RATE_UPPER))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("transfer rate %u bytes per second is out of range",
+ opt->maxrate),
+ errhint("The accepted range is %u through %u kB per second",
+ MAX_RATE_LOWER >> 10, MAX_RATE_UPPER >> 10)));
+ }
+ o_maxrate = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -1019,6 +1110,7 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
+ throttle(cnt);
if (len >= statbuf->st_size)
{
@@ -1040,10 +1132,14 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
cnt = Min(sizeof(buf), statbuf->st_size - len);
pq_putmessage('d', buf, cnt);
len += cnt;
+ throttle(cnt);
}
}
- /* Pad to 512 byte boundary, per tar format requirements */
+ /*
+ * Pad to 512 byte boundary, per tar format requirements. (This small
+ * piece of data is probably not worth throttling.)
+ */
pad = ((len + 511) & ~511) - len;
if (pad > 0)
{
@@ -1069,3 +1165,51 @@ _tarWriteHeader(const char *filename, const char *linktarget,
pq_putmessage('d', h, 512);
}
+
+static void
+throttle(size_t increment)
+{
+ int64 elapsed,
+ elapsed_min,
+ sleep;
+
+ if (throttling_counter < 0)
+ return;
+
+ throttling_counter += increment;
+ if (throttling_counter < throttling_sample)
+ return;
+
+ /* Time elapsed since the last measuring (and possible wake up). */
+ elapsed = GetCurrentIntegerTimestamp() - throttled_last;
+ /* How much should have elapsed at minimum? */
+ elapsed_min = elapsed_min_unit * (throttling_counter / throttling_sample);
+ sleep = elapsed_min - elapsed;
+ /* Only sleep if the transfer is faster than it should be. */
+ if (sleep > 0)
+ {
+
+ /*
+ * THROTTLING_SAMPLE_MIN / MAX_RATE_LOWER (in seconds) should be the
+ * longest possible time to sleep. Thus the cast to long is safe.
+ */
+ pg_usleep((long) sleep);
+ }
+ else
+ {
+
+ /*
+ * The actual transfer rate is below the limit. Negative value would
+ * distort the adjustment of throttled_last.
+ */
+ sleep = 0;
+ }
+
+ /*
+ * Only the whole multiples of throttling_sample processed. The rest will
+ * be done during the next call of this function.
+ */
+ throttling_counter %= throttling_sample;
+ /* Once the (possible) sleep ends, new period starts. */
+ throttled_last += elapsed + sleep;
+}
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 8c83780..1c2c31c 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -78,6 +78,7 @@ Node *replication_parse_result;
%token K_PROGRESS
%token K_FAST
%token K_NOWAIT
+%token K_MAX_RATE
%token K_WAL
%token K_TIMELINE
@@ -116,7 +117,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -156,6 +157,11 @@ base_backup_opt:
$$ = makeDefElem("nowait",
(Node *)makeInteger(TRUE));
}
+ | K_MAX_RATE UCONST
+ {
+ $$ = makeDefElem("maxrate",
+ (Node *)makeInteger($2));
+ }
;
/*
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 3d930f1..b2d5e3b 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -71,6 +71,7 @@ IDENTIFY_SYSTEM { return K_IDENTIFY_SYSTEM; }
LABEL { return K_LABEL; }
NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
+MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 6706c0c..7a86cd7 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -46,6 +46,7 @@ bool streamwal = false;
bool fastcheckpoint = false;
bool writerecoveryconf = false;
int standby_message_timeout = 10 * 1000; /* 10 sec = default */
+uint32 maxrate = 0; /* No limit by default. */
/* Progress counters */
static uint64 totalsize;
@@ -76,6 +77,7 @@ static PQExpBuffer recoveryconfcontents = NULL;
static void usage(void);
static void verify_dir_is_empty_or_create(char *dirname);
static void progress_report(int tablespacenum, const char *filename);
+static uint32 parse_max_rate(char *src);
static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum);
static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
@@ -111,6 +113,7 @@ usage(void)
printf(_("\nOptions controlling the output:\n"));
printf(_(" -D, --pgdata=DIRECTORY receive base backup into directory\n"));
printf(_(" -F, --format=p|t output format (plain (default), tar)\n"));
+ printf(_(" -r, --max-rate maximum transfer rate to transfer data directory\n"));
printf(_(" -R, --write-recovery-conf\n"
" write recovery.conf after backup\n"));
printf(_(" -x, --xlog include required WAL files in backup (fetch mode)\n"));
@@ -476,6 +479,94 @@ progress_report(int tablespacenum, const char *filename)
}
+static uint32
+parse_max_rate(char *src)
+{
+ int factor;
+ char *after_num;
+ int64 result;
+ int errno_copy;
+
+ result = strtol(src, &after_num, 0);
+ errno_copy = errno;
+ if (src == after_num)
+ {
+ fprintf(stderr, _("%s: transfer rate \"%s\" is not a valid integer value\n"), progname, src);
+ exit(1);
+ }
+
+
+ /*
+ * Evaluate (optional) suffix.
+ *
+ * after_num should now be right behind the numeric value.
+ */
+ factor = 1;
+ switch (*after_num)
+ {
+ /*
+ * Only the following suffixes are allowed. It's not too useful to
+ * restrict the rate to gigabytes: such a rate will probably bring
+ * significant impact on the master anyway, so the throttling
+ * won't help much.
+ */
+ case 'M':
+ factor <<= 10;
+ case 'k':
+ factor <<= 10;
+ after_num++;
+ break;
+
+ default:
+
+ /*
+ * If there's no suffix, byte is the unit. Possible invalid letter
+ * will make conversion to integer fail.
+ */
+ break;
+ }
+
+ /* The rest can only consist of white space. */
+ while (*after_num != '\0')
+ {
+ if (!isspace(*after_num))
+ {
+ fprintf(stderr, _("%s: invalid transfer rate \"%s\"\n"), progname, src);
+ exit(1);
+ }
+ after_num++;
+ }
+
+ /* Some checks only make sense when we know than the suffix is correct. */
+ if (result <= 0)
+ {
+ /*
+ * Reject obviously wrong values here. Exact check of the range to be
+ * done on server.
+ */
+ fprintf(stderr, _("%s: transfer must be greater than zero\n"), progname);
+ exit(1);
+ }
+ if (errno_copy == ERANGE || result != (uint64) ((uint32) result))
+ {
+ fprintf(stderr, _("%s: transfer rate \"%s\" exceeds integer range\n"), progname, src);
+ exit(1);
+ }
+
+ if (factor > 1)
+ {
+ result *= factor;
+ /* Check the integer range once again. */
+ if (result != (uint64) ((uint32) result))
+ {
+ fprintf(stderr, _("%s: transfer rate \"%s\" exceeds integer range\n"), progname, src);
+ exit(1);
+ }
+ }
+ return (uint32) result;
+}
+
+
/*
* Write a piece of tar data
*/
@@ -1310,6 +1401,7 @@ BaseBackup(void)
uint32 starttli;
char current_path[MAXPGPATH];
char escaped_label[MAXPGPATH];
+ char maxrate_clause[MAXPGPATH];
int i;
char xlogstart[64];
char xlogend[64];
@@ -1382,13 +1474,18 @@ BaseBackup(void)
* Start the actual backup
*/
PQescapeStringConn(conn, escaped_label, label, sizeof(escaped_label), &i);
+ if (maxrate > 0)
+ snprintf(maxrate_clause, sizeof(maxrate_clause), "MAX_RATE %u", maxrate);
+ else
+ maxrate_clause[0] = '\0';
snprintf(current_path, sizeof(current_path),
- "BASE_BACKUP LABEL '%s' %s %s %s %s",
+ "BASE_BACKUP LABEL '%s' %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
- includewal ? "NOWAIT" : "");
+ includewal ? "NOWAIT" : "",
+ maxrate_clause);
if (PQsendQuery(conn, current_path) == 0)
{
@@ -1657,6 +1754,7 @@ main(int argc, char **argv)
{"pgdata", required_argument, NULL, 'D'},
{"format", required_argument, NULL, 'F'},
{"checkpoint", required_argument, NULL, 'c'},
+ {"max-rate", required_argument, NULL, 'r'},
{"write-recovery-conf", no_argument, NULL, 'R'},
{"xlog", no_argument, NULL, 'x'},
{"xlog-method", required_argument, NULL, 'X'},
@@ -1697,7 +1795,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "D:F:RxX:l:zZ:d:c:h:p:U:s:wWvP",
+ while ((c = getopt_long(argc, argv, "D:F:r:RxX:l:zZ:d:c:h:p:U:s:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
@@ -1718,6 +1816,9 @@ main(int argc, char **argv)
exit(1);
}
break;
+ case 'r':
+ maxrate = parse_max_rate(optarg);
+ break;
case 'R':
writerecoveryconf = true;
break;
Hi,
2013-12-05 15:36 keltez�ssel, Antonin Houska �rta:
On 12/02/2013 02:23 PM, Boszormenyi Zoltan wrote:
Hi,
I am reviewing your patch.
Thanks. New version attached.
I have reviewed and tested it and marked it as ready for committer.
Best regards,
Zolt�n B�sz�rm�nyi
--
----------------------------------
Zolt�n B�sz�rm�nyi
Cybertec Sch�nig & Sch�nig GmbH
Gr�hrm�hlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
http://www.postgresql.at/
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Fri, Dec 6, 2013 at 6:43 PM, Boszormenyi Zoltan <zb@cybertec.at> wrote:
Hi,
2013-12-05 15:36 keltezéssel, Antonin Houska írta:
On 12/02/2013 02:23 PM, Boszormenyi Zoltan wrote:
Hi,
I am reviewing your patch.
Thanks. New version attached.
I have reviewed and tested it and marked it as ready for committer.
Here are the review comments:
+ <term><option>-r</option></term>
+ <term><option>--max-rate</option></term>
You need to add something like <replaceable
class="parameter">rate</replaceable>.
+ The purpose is to limit impact of
<application>pg_basebackup</application>
+ on a running master server.
s/"master server"/"server" because we can take a backup from also the standby.
I think that it's better to document the default value and the accepted range of
the rate that we can specify.
You need to change the protocol.sgml because you changed BASE_BACKUP
replication command.
+ printf(_(" -r, --max-rate maximum transfer rate to
transfer data directory\n"));
You need to add something like =RATE just after --max-rate.
+ result = strtol(src, &after_num, 0);
errno should be set to 0 just before calling strtol().
+ if (errno_copy == ERANGE || result != (uint64) ((uint32) result))
+ {
+ fprintf(stderr, _("%s: transfer rate \"%s\" exceeds integer
range\n"), progname, src);
+ exit(1);
+ }
We can move this check after the check of "src == after_num" like
parse_int() in guc.c does.
If we do this, the local variable 'errno_copy' is no longer necessary.
I think that it's better to output the hint message like "Valid units for
the transfer rate are \"k\" and \"M\"." when a user specified wrong unit.
+ /*
+ * THROTTLING_SAMPLE_MIN / MAX_RATE_LOWER (in seconds) should be the
+ * longest possible time to sleep. Thus the cast to long is safe.
+ */
+ pg_usleep((long) sleep);
It's better to use the latch here so that we can interrupt immediately.
Regards,
--
Fujii Masao
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Thanks for checking. The new version addresses your findings.
// Antonin Houska (Tony)
Show quoted text
On 12/09/2013 03:49 PM, Fujii Masao wrote:
On Fri, Dec 6, 2013 at 6:43 PM, Boszormenyi Zoltan <zb@cybertec.at> wrote:
Hi,
2013-12-05 15:36 keltez�ssel, Antonin Houska �rta:
On 12/02/2013 02:23 PM, Boszormenyi Zoltan wrote:
Hi,
I am reviewing your patch.
Thanks. New version attached.
I have reviewed and tested it and marked it as ready for committer.
Here are the review comments:
+ <term><option>-r</option></term> + <term><option>--max-rate</option></term>You need to add something like <replaceable
class="parameter">rate</replaceable>.+ The purpose is to limit impact of <application>pg_basebackup</application> + on a running master server.s/"master server"/"server" because we can take a backup from also the standby.
I think that it's better to document the default value and the accepted range of
the rate that we can specify.You need to change the protocol.sgml because you changed BASE_BACKUP
replication command.+ printf(_(" -r, --max-rate maximum transfer rate to
transfer data directory\n"));You need to add something like =RATE just after --max-rate.
+ result = strtol(src, &after_num, 0);
errno should be set to 0 just before calling strtol().
+ if (errno_copy == ERANGE || result != (uint64) ((uint32) result)) + { + fprintf(stderr, _("%s: transfer rate \"%s\" exceeds integer range\n"), progname, src); + exit(1); + }We can move this check after the check of "src == after_num" like
parse_int() in guc.c does.
If we do this, the local variable 'errno_copy' is no longer necessary.I think that it's better to output the hint message like "Valid units for
the transfer rate are \"k\" and \"M\"." when a user specified wrong unit.+ /* + * THROTTLING_SAMPLE_MIN / MAX_RATE_LOWER (in seconds) should be the + * longest possible time to sleep. Thus the cast to long is safe. + */ + pg_usleep((long) sleep);It's better to use the latch here so that we can interrupt immediately.
Regards,
Attachments:
backup_throttling_v5.patchtext/x-patch; name=backup_throttling_v5.patchDownload
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 0b2e60e..2f24fff 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1719,7 +1719,7 @@ The commands accepted in walsender mode are:
</varlistentry>
<varlistentry>
- <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>]</term>
+ <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal>]</term>
<listitem>
<para>
Instructs the server to start streaming a base backup.
@@ -1787,7 +1787,23 @@ The commands accepted in walsender mode are:
the waiting and the warning, leaving the client responsible for
ensuring the required log is available.
</para>
- </listitem>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><literal>MAX_RATE</literal></term>
+ <listitem>
+ <para>
+ Limit the maximum amount of data transferred to client per unit of
+ time. The expected unit is bytes per second. If
+ <literal>MAX_RATE</literal> is passed, it must be either equal to
+ zero or fall to range 32 kB through 1 GB (inclusive). If zero is
+ passed or the option is not passed at all, no restriction is imposed
+ on the transfer.
+ </para>
+ <para>
+ <literal>MAX_RATE</literal> does not affect WAL streaming.
+ </para>
+ </listitem>
</varlistentry>
</variablelist>
</para>
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index c379df5..caede77 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -189,6 +189,28 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>-r <replaceable class="parameter">rate</replaceable></option></term>
+ <term><option>--max-rate=<replaceable class="parameter">rate</replaceable></option></term>
+ <listitem>
+ <para>
+ The maximum amount of data transferred from server per second.
+ The purpose is to limit impact of <application>pg_basebackup</application>
+ on running server.
+ </para>
+ <para>
+ This option always affects transfer of the data directory. Transfer of
+ WAL files is only affected if the collection method is <literal>fetch</literal>.
+ </para>
+ <para>
+ Value between <literal>32 kB</literal> and <literal>1024 MB</literal>
+ is expected. Suffixes <literal>k</literal> (kilobytes) and
+ <literal>M</literal> (megabytes) are accepted. For example:
+ <literal>10M</literal>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-R</option></term>
<term><option>--write-recovery-conf</option></term>
<listitem>
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index ba8d173..e3ff2cf 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -30,9 +30,11 @@
#include "replication/walsender_private.h"
#include "storage/fd.h"
#include "storage/ipc.h"
+#include "storage/latch.h"
#include "utils/builtins.h"
#include "utils/elog.h"
#include "utils/ps_status.h"
+#include "utils/timestamp.h"
#include "pgtar.h"
typedef struct
@@ -42,6 +44,7 @@ typedef struct
bool fastcheckpoint;
bool nowait;
bool includewal;
+ uint32 maxrate;
} basebackup_options;
@@ -59,6 +62,7 @@ static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
static void parse_basebackup_options(List *options, basebackup_options *opt);
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
static int compareWalFileNames(const void *a, const void *b);
+static void throttle(size_t increment);
/* Was the backup currently in-progress initiated in recovery mode? */
static bool backup_started_in_recovery = false;
@@ -68,6 +72,42 @@ static bool backup_started_in_recovery = false;
*/
#define TAR_SEND_SIZE 32768
+
+/*
+ * The maximum amount of data per second - bounds of the user input.
+ *
+ * If the maximum should be increased to more than 4 GB, uint64 must
+ * be introduced for the related variables. However such high values have
+ * little to do with throttling.
+ */
+#define MAX_RATE_LOWER 32768
+#define MAX_RATE_UPPER (1024 << 20)
+
+/*
+ * Transfer rate is only measured when this number of bytes has been sent.
+ * (Too frequent checks would impose too high CPU overhead.)
+ *
+ * The default value is used unless it'd result in too frequent checks.
+ */
+#define THROTTLING_SAMPLE_MIN 32768
+
+/* The maximum number of checks per second. */
+#define THROTTLING_MAX_FREQUENCY 128
+
+/* The actual value, transfer of which may cause sleep. */
+static uint32 throttling_sample;
+
+/* Amount of data already transfered but not yet throttled. */
+static int32 throttling_counter;
+
+/* The minimum time required to transfer throttling_sample bytes. */
+static int64 elapsed_min_unit;
+
+/* The last check of the transfer rate. */
+static int64 throttled_last;
+
+static Latch throttling_latch;
+
typedef struct
{
char *oid;
@@ -171,6 +211,35 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* Send tablespace header */
SendBackupHeader(tablespaces);
+ if (opt->maxrate > 0)
+ {
+ throttling_sample = opt->maxrate / THROTTLING_MAX_FREQUENCY;
+
+ /* Don't measure too small pieces of data. */
+ if (throttling_sample < THROTTLING_SAMPLE_MIN)
+ throttling_sample = THROTTLING_SAMPLE_MIN;
+
+ /*
+ * opt->maxrate is bytes per second. Thus the expression in
+ * brackets is microseconds per byte.
+ */
+ elapsed_min_unit = throttling_sample *
+ ((double) USECS_PER_SEC / opt->maxrate);
+
+ /* Enable throttling. */
+ throttling_counter = 0;
+
+ /* The 'real data' starts now (header was ignored). */
+ throttled_last = GetCurrentIntegerTimestamp();
+
+ InitLatch(&throttling_latch);
+ }
+ else
+ {
+ /* Disable throttling. */
+ throttling_counter = -1;
+ }
+
/* Send off our tablespaces one by one */
foreach(lc, tablespaces)
{
@@ -398,6 +467,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
+ throttle(cnt);
+
if (len == XLogSegSize)
break;
}
@@ -468,6 +539,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_fast = false;
bool o_nowait = false;
bool o_wal = false;
+ bool o_maxrate = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -519,6 +591,29 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->includewal = true;
o_wal = true;
}
+ else if (strcmp(defel->defname, "maxrate") == 0)
+ {
+ long maxrate;
+
+ if (o_maxrate)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ maxrate = intVal(defel->arg);
+
+ opt->maxrate = (uint32) maxrate;
+ if (opt->maxrate > 0 &&
+ (opt->maxrate < MAX_RATE_LOWER || opt->maxrate > MAX_RATE_UPPER))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("transfer rate %u bytes per second is out of range",
+ opt->maxrate),
+ errhint("The accepted range is %u through %u kB per second",
+ MAX_RATE_LOWER >> 10, MAX_RATE_UPPER >> 10)));
+ }
+ o_maxrate = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -1019,6 +1114,7 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
+ throttle(cnt);
if (len >= statbuf->st_size)
{
@@ -1040,10 +1136,14 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
cnt = Min(sizeof(buf), statbuf->st_size - len);
pq_putmessage('d', buf, cnt);
len += cnt;
+ throttle(cnt);
}
}
- /* Pad to 512 byte boundary, per tar format requirements */
+ /*
+ * Pad to 512 byte boundary, per tar format requirements. (This small
+ * piece of data is probably not worth throttling.)
+ */
pad = ((len + 511) & ~511) - len;
if (pad > 0)
{
@@ -1069,3 +1169,52 @@ _tarWriteHeader(const char *filename, const char *linktarget,
pq_putmessage('d', h, 512);
}
+
+static void
+throttle(size_t increment)
+{
+ int64 elapsed,
+ elapsed_min,
+ sleep;
+
+ if (throttling_counter < 0)
+ return;
+
+ throttling_counter += increment;
+ if (throttling_counter < throttling_sample)
+ return;
+
+ /* Time elapsed since the last measuring (and possible wake up). */
+ elapsed = GetCurrentIntegerTimestamp() - throttled_last;
+ /* How much should have elapsed at minimum? */
+ elapsed_min = elapsed_min_unit * (throttling_counter / throttling_sample);
+ sleep = elapsed_min - elapsed;
+ /* Only sleep if the transfer is faster than it should be. */
+ if (sleep > 0)
+ {
+ ResetLatch(&throttling_latch);
+ /*
+ * THROTTLING_SAMPLE_MIN / MAX_RATE_LOWER (in seconds) should be the
+ * longest possible time to sleep. Thus the cast to long is safe.
+ */
+ WaitLatch(&throttling_latch, WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ (long) (sleep / 1000));
+ }
+ else
+ {
+
+ /*
+ * The actual transfer rate is below the limit. Negative value would
+ * distort the adjustment of throttled_last.
+ */
+ sleep = 0;
+ }
+
+ /*
+ * Only the whole multiples of throttling_sample processed. The rest will
+ * be done during the next call of this function.
+ */
+ throttling_counter %= throttling_sample;
+ /* Once the (possible) sleep ends, new period starts. */
+ throttled_last += elapsed + sleep;
+}
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 8c83780..1c2c31c 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -78,6 +78,7 @@ Node *replication_parse_result;
%token K_PROGRESS
%token K_FAST
%token K_NOWAIT
+%token K_MAX_RATE
%token K_WAL
%token K_TIMELINE
@@ -116,7 +117,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -156,6 +157,11 @@ base_backup_opt:
$$ = makeDefElem("nowait",
(Node *)makeInteger(TRUE));
}
+ | K_MAX_RATE UCONST
+ {
+ $$ = makeDefElem("maxrate",
+ (Node *)makeInteger($2));
+ }
;
/*
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 3d930f1..b2d5e3b 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -71,6 +71,7 @@ IDENTIFY_SYSTEM { return K_IDENTIFY_SYSTEM; }
LABEL { return K_LABEL; }
NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
+MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 6706c0c..997b48ee 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -46,6 +46,7 @@ bool streamwal = false;
bool fastcheckpoint = false;
bool writerecoveryconf = false;
int standby_message_timeout = 10 * 1000; /* 10 sec = default */
+uint32 maxrate = 0; /* No limit by default. */
/* Progress counters */
static uint64 totalsize;
@@ -76,6 +77,7 @@ static PQExpBuffer recoveryconfcontents = NULL;
static void usage(void);
static void verify_dir_is_empty_or_create(char *dirname);
static void progress_report(int tablespacenum, const char *filename);
+static uint32 parse_max_rate(char *src);
static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum);
static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
@@ -111,6 +113,7 @@ usage(void)
printf(_("\nOptions controlling the output:\n"));
printf(_(" -D, --pgdata=DIRECTORY receive base backup into directory\n"));
printf(_(" -F, --format=p|t output format (plain (default), tar)\n"));
+ printf(_(" -r, --max-rate=RATE maximum transfer rate to transfer data directory\n"));
printf(_(" -R, --write-recovery-conf\n"
" write recovery.conf after backup\n"));
printf(_(" -x, --xlog include required WAL files in backup (fetch mode)\n"));
@@ -476,6 +479,91 @@ progress_report(int tablespacenum, const char *filename)
}
+static uint32
+parse_max_rate(char *src)
+{
+ int factor;
+ char *after_num;
+ int64 result;
+
+ errno = 0;
+ result = strtol(src, &after_num, 0);
+ if (src == after_num)
+ {
+ fprintf(stderr, _("%s: transfer rate \"%s\" is not a valid integer value\n"), progname, src);
+ exit(1);
+ }
+ if (errno == ERANGE)
+ {
+ fprintf(stderr, _("%s: transfer rate \"%s\" exceeds integer range\n"), progname, src);
+ exit(1);
+ }
+
+
+ /*
+ * Evaluate (optional) suffix.
+ *
+ * after_num should now be right behind the numeric value.
+ */
+ factor = 1;
+ switch (*after_num)
+ {
+ /*
+ * Only the following suffixes are allowed. It's not too useful to
+ * restrict the rate to gigabytes: such a rate will probably bring
+ * significant impact on the master anyway, so the throttling
+ * won't help much.
+ */
+ case 'M':
+ factor <<= 10;
+ case 'k':
+ factor <<= 10;
+ after_num++;
+ break;
+
+ default:
+
+ /*
+ * If there's no suffix, byte is the unit. Possible invalid letter
+ * will be detected later.
+ */
+ break;
+ }
+
+ /* The rest can only consist of white space. */
+ while (*after_num != '\0')
+ {
+ if (!isspace(*after_num))
+ {
+ fprintf(stderr, _("%s: valid units for the transfer rate are \"k\" and \"M\"\n"), progname);
+ exit(1);
+ }
+ after_num++;
+ }
+
+ if (result <= 0)
+ {
+ /*
+ * Reject obviously wrong values here. Exact check of the range to be
+ * done on server.
+ */
+ fprintf(stderr, _("%s: transfer rate must be greater than zero\n"), progname);
+ exit(1);
+ }
+ if (factor > 1)
+ {
+ result *= factor;
+ /* Check the integer range once again. */
+ if (result != (uint64) ((uint32) result))
+ {
+ fprintf(stderr, _("%s: transfer rate \"%s\" exceeds integer range\n"), progname, src);
+ exit(1);
+ }
+ }
+ return (uint32) result;
+}
+
+
/*
* Write a piece of tar data
*/
@@ -1310,6 +1398,7 @@ BaseBackup(void)
uint32 starttli;
char current_path[MAXPGPATH];
char escaped_label[MAXPGPATH];
+ char maxrate_clause[MAXPGPATH];
int i;
char xlogstart[64];
char xlogend[64];
@@ -1382,13 +1471,18 @@ BaseBackup(void)
* Start the actual backup
*/
PQescapeStringConn(conn, escaped_label, label, sizeof(escaped_label), &i);
+ if (maxrate > 0)
+ snprintf(maxrate_clause, sizeof(maxrate_clause), "MAX_RATE %u", maxrate);
+ else
+ maxrate_clause[0] = '\0';
snprintf(current_path, sizeof(current_path),
- "BASE_BACKUP LABEL '%s' %s %s %s %s",
+ "BASE_BACKUP LABEL '%s' %s %s %s %s %s",
escaped_label,
showprogress ? "PROGRESS" : "",
includewal && !streamwal ? "WAL" : "",
fastcheckpoint ? "FAST" : "",
- includewal ? "NOWAIT" : "");
+ includewal ? "NOWAIT" : "",
+ maxrate_clause);
if (PQsendQuery(conn, current_path) == 0)
{
@@ -1657,6 +1751,7 @@ main(int argc, char **argv)
{"pgdata", required_argument, NULL, 'D'},
{"format", required_argument, NULL, 'F'},
{"checkpoint", required_argument, NULL, 'c'},
+ {"max-rate", required_argument, NULL, 'r'},
{"write-recovery-conf", no_argument, NULL, 'R'},
{"xlog", no_argument, NULL, 'x'},
{"xlog-method", required_argument, NULL, 'X'},
@@ -1697,7 +1792,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "D:F:RxX:l:zZ:d:c:h:p:U:s:wWvP",
+ while ((c = getopt_long(argc, argv, "D:F:r:RxX:l:zZ:d:c:h:p:U:s:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
@@ -1718,6 +1813,9 @@ main(int argc, char **argv)
exit(1);
}
break;
+ case 'r':
+ maxrate = parse_max_rate(optarg);
+ break;
case 'R':
writerecoveryconf = true;
break;
Antonin Houska escribi�:
Thanks for checking. The new version addresses your findings.
I gave this patch a look. There was a bug that the final bounds check
for int32 range was not done when there was no suffix, so in effect you
could pass numbers larger than UINT_MAX and pg_basebackup would not
complain until the number reached the server via BASE_BACKUP. Maybe
that's fine, given that numbers above 1G will cause a failure on the
server side anyway, but it looked like a bug to me. I tweaked the parse
routine slightly; other than fix the bug, I made it accept fractional
numbers, so you can say 0.5M for instance.
Perhaps we should make sure pg_basebackup rejects numbers larger than 1G
as well.
Another thing I found a bit strange was the use of the latch. What this
patch does is create a separate latch which is used for the throttling.
This means that if the walsender process receives a signal, it will not
wake up if it's sleeping in throttling. Perhaps this is okay: as Andres
was quoted upthread as saying, maybe this is not a problem because the
sleep times are typically short anyway. But we're pretty much used to
the idea that whenever a signal is sent, processes act on it
*immediately*. Maybe some admin will not feel comfortable about waiting
some extra 20ms when they cancel their base backups. In any case,
having a secondary latch to sleep on in a process seems weird. Maybe
this should be using MyWalSnd->latch somehow.
You have this interesting THROTTLING_MAX_FREQUENCY constant defined to
128, with the comment "check this many times per second".
Let's see: if the user requests 1MB/s, this value results in
throttling_sample = 1MB / 128 = 8192. So for every 8kB transferred, we
would stop, check the wall clock time, and if less time has lapsed than
we were supposed to spend transferring those 8kB then we sleep. Isn't a
check every 8kB a bit too frequent? This doesn't seem sensible to me.
I think we should be checking perhaps every tenth of the requested
maximum rate, or something like that, not every 1/128th.
Now, what the code actually does is not necessarily that, because the
sampling value is clamped to a minimum of 32 kB. But then if we're
going to do that, why use such a large divisor value in the first place?
I propose we set that constant to a smaller value such as 8.
Other thoughts?
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
Attachments:
backup_throttling_v6.patchtext/x-diff; charset=us-asciiDownload
*** a/doc/src/sgml/protocol.sgml
--- b/doc/src/sgml/protocol.sgml
***************
*** 1719,1725 **** The commands accepted in walsender mode are:
</varlistentry>
<varlistentry>
! <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>]</term>
<listitem>
<para>
Instructs the server to start streaming a base backup.
--- 1719,1725 ----
</varlistentry>
<varlistentry>
! <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal>]</term>
<listitem>
<para>
Instructs the server to start streaming a base backup.
***************
*** 1787,1793 **** The commands accepted in walsender mode are:
the waiting and the warning, leaving the client responsible for
ensuring the required log is available.
</para>
! </listitem>
</varlistentry>
</variablelist>
</para>
--- 1787,1809 ----
the waiting and the warning, leaving the client responsible for
ensuring the required log is available.
</para>
! </listitem>
! </varlistentry>
! <varlistentry>
! <term><literal>MAX_RATE</literal> <replaceable>rate</></term>
! <listitem>
! <para>
! Limit (throttle) the maximum amount of data transferred from server
! to client per unit of time. The expected unit is bytes per second.
! If this option is specified, the value must either be equal to zero
! or it must fall within the range from 32 kB through 1 GB (inclusive).
! If zero is passed or the option is not specified, no restriction is
! imposed on the transfer.
! </para>
! <para>
! <literal>MAX_RATE</literal> does not affect WAL streaming.
! </para>
! </listitem>
</varlistentry>
</variablelist>
</para>
*** a/doc/src/sgml/ref/pg_basebackup.sgml
--- b/doc/src/sgml/ref/pg_basebackup.sgml
***************
*** 189,194 **** PostgreSQL documentation
--- 189,215 ----
</varlistentry>
<varlistentry>
+ <term><option>-r <replaceable class="parameter">rate</replaceable></option></term>
+ <term><option>--max-rate=<replaceable class="parameter">rate</replaceable></option></term>
+ <listitem>
+ <para>
+ The maximum amount of data transferred from server per second.
+ The purpose is to limit the impact of <application>pg_basebackup</application>
+ on the running server.
+ </para>
+ <para>
+ This option always affects transfer of the data directory. Transfer of
+ WAL files is only affected if the collection method is <literal>fetch</literal>.
+ </para>
+ <para>
+ Valid values are between <literal>32 kB</literal> and <literal>1024 MB</literal>
+ is expected. Suffixes <literal>k</literal> (kilobytes) and
+ <literal>M</literal> (megabytes) are accepted.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-R</option></term>
<term><option>--write-recovery-conf</option></term>
<listitem>
*** a/src/backend/replication/basebackup.c
--- b/src/backend/replication/basebackup.c
***************
*** 30,38 ****
--- 30,40 ----
#include "replication/walsender_private.h"
#include "storage/fd.h"
#include "storage/ipc.h"
+ #include "storage/latch.h"
#include "utils/builtins.h"
#include "utils/elog.h"
#include "utils/ps_status.h"
+ #include "utils/timestamp.h"
#include "pgtar.h"
typedef struct
***************
*** 42,47 **** typedef struct
--- 44,50 ----
bool fastcheckpoint;
bool nowait;
bool includewal;
+ uint32 maxrate;
} basebackup_options;
***************
*** 59,64 **** static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
--- 62,68 ----
static void parse_basebackup_options(List *options, basebackup_options *opt);
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
static int compareWalFileNames(const void *a, const void *b);
+ static void throttle(size_t increment);
/* Was the backup currently in-progress initiated in recovery mode? */
static bool backup_started_in_recovery = false;
***************
*** 68,73 **** static bool backup_started_in_recovery = false;
--- 72,115 ----
*/
#define TAR_SEND_SIZE 32768
+
+ /*
+ * The maximum amount of data per second - bounds of the user input.
+ *
+ * If the maximum should be increased to more than 4 GB, uint64 must
+ * be introduced for the related variables. However such high values have
+ * little to do with throttling.
+ */
+ #define MAX_RATE_LOWER 32768
+ #define MAX_RATE_UPPER (1024 << 20)
+
+ /*
+ * Transfer rate is only measured when this number of bytes has been sent.
+ * (Too frequent checks would impose too high CPU overhead.)
+ *
+ * The default value is used unless it'd result in too frequent checks.
+ */
+ #define THROTTLING_SAMPLE_MIN 32768
+
+ /*
+ * How frequently to throttle, as a fraction of the specified rate-second.
+ */
+ #define THROTTLING_MAX_FREQUENCY 8
+
+ /* The actual value, transfer of which may cause sleep. */
+ static uint32 throttling_sample;
+
+ /* Amount of data already transfered but not yet throttled. */
+ static int32 throttling_counter;
+
+ /* The minimum time required to transfer throttling_sample bytes. */
+ static int64 elapsed_min_unit;
+
+ /* The last check of the transfer rate. */
+ static int64 throttled_last;
+
+ static Latch throttling_latch;
+
typedef struct
{
char *oid;
***************
*** 187,192 **** perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
--- 229,264 ----
/* Send tablespace header */
SendBackupHeader(tablespaces);
+ /* Setup and activate network throttling, if client requested it */
+ if (opt->maxrate > 0)
+ {
+ throttling_sample = opt->maxrate / THROTTLING_MAX_FREQUENCY;
+
+ /* Don't measure too small pieces of data. */
+ if (throttling_sample < THROTTLING_SAMPLE_MIN)
+ throttling_sample = THROTTLING_SAMPLE_MIN;
+
+ /*
+ * opt->maxrate is bytes per second. Thus the expression in
+ * brackets is microseconds per byte.
+ */
+ elapsed_min_unit = throttling_sample *
+ ((double) USECS_PER_SEC / opt->maxrate);
+
+ /* Enable throttling. */
+ throttling_counter = 0;
+
+ /* The 'real data' starts now (header was ignored). */
+ throttled_last = GetCurrentIntegerTimestamp();
+
+ InitLatch(&throttling_latch);
+ }
+ else
+ {
+ /* Disable throttling. */
+ throttling_counter = -1;
+ }
+
/* Send off our tablespaces one by one */
foreach(lc, tablespaces)
{
***************
*** 414,419 **** perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
--- 486,493 ----
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
+ throttle(cnt);
+
if (len == XLogSegSize)
break;
}
***************
*** 484,489 **** parse_basebackup_options(List *options, basebackup_options *opt)
--- 558,564 ----
bool o_fast = false;
bool o_nowait = false;
bool o_wal = false;
+ bool o_maxrate = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
***************
*** 535,540 **** parse_basebackup_options(List *options, basebackup_options *opt)
--- 610,638 ----
opt->includewal = true;
o_wal = true;
}
+ else if (strcmp(defel->defname, "maxrate") == 0)
+ {
+ long maxrate;
+
+ if (o_maxrate)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ maxrate = intVal(defel->arg);
+
+ opt->maxrate = (uint32) maxrate;
+ if (opt->maxrate > 0 &&
+ (opt->maxrate < MAX_RATE_LOWER || opt->maxrate > MAX_RATE_UPPER))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("transfer rate %u bytes per second is out of range",
+ opt->maxrate),
+ errhint("The accepted range is %u through %u kB per second",
+ MAX_RATE_LOWER >> 10, MAX_RATE_UPPER >> 10)));
+ }
+ o_maxrate = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
***************
*** 1071,1076 **** sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
--- 1169,1175 ----
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
+ throttle(cnt);
if (len >= statbuf->st_size)
{
***************
*** 1092,1101 **** sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
cnt = Min(sizeof(buf), statbuf->st_size - len);
pq_putmessage('d', buf, cnt);
len += cnt;
}
}
! /* Pad to 512 byte boundary, per tar format requirements */
pad = ((len + 511) & ~511) - len;
if (pad > 0)
{
--- 1191,1204 ----
cnt = Min(sizeof(buf), statbuf->st_size - len);
pq_putmessage('d', buf, cnt);
len += cnt;
+ throttle(cnt);
}
}
! /*
! * Pad to 512 byte boundary, per tar format requirements. (This small
! * piece of data is probably not worth throttling.)
! */
pad = ((len + 511) & ~511) - len;
if (pad > 0)
{
***************
*** 1121,1123 **** _tarWriteHeader(const char *filename, const char *linktarget,
--- 1224,1279 ----
pq_putmessage('d', h, 512);
}
+
+ /*
+ * Increment the network transfer counter by the given number of bytes,
+ * and sleep if necessary to comply with the requested network transfer
+ * rate.
+ */
+ static void
+ throttle(size_t increment)
+ {
+ int64 elapsed,
+ elapsed_min,
+ sleep;
+
+ if (throttling_counter < 0)
+ return;
+
+ throttling_counter += increment;
+ if (throttling_counter < throttling_sample)
+ return;
+
+ /* Time elapsed since the last measuring (and possible wake up). */
+ elapsed = GetCurrentIntegerTimestamp() - throttled_last;
+ /* How much should have elapsed at minimum? */
+ elapsed_min = elapsed_min_unit * (throttling_counter / throttling_sample);
+ sleep = elapsed_min - elapsed;
+ /* Only sleep if the transfer is faster than it should be. */
+ if (sleep > 0)
+ {
+ ResetLatch(&throttling_latch);
+ /*
+ * THROTTLING_SAMPLE_MIN / MAX_RATE_LOWER (in seconds) should be the
+ * longest possible time to sleep. Thus the cast to long is safe.
+ */
+ WaitLatch(&throttling_latch, WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ (long) (sleep / 1000));
+ }
+ else
+ {
+ /*
+ * The actual transfer rate is below the limit. Negative value would
+ * distort the adjustment of throttled_last.
+ */
+ sleep = 0;
+ }
+
+ /*
+ * Only a whole multiple of throttling_sample was processed. The rest will
+ * be done during the next call of this function.
+ */
+ throttling_counter %= throttling_sample;
+ /* Once the (possible) sleep ends, new period starts. */
+ throttled_last += elapsed + sleep;
+ }
*** a/src/backend/replication/repl_gram.y
--- b/src/backend/replication/repl_gram.y
***************
*** 78,83 **** Node *replication_parse_result;
--- 78,84 ----
%token K_PROGRESS
%token K_FAST
%token K_NOWAIT
+ %token K_MAX_RATE
%token K_WAL
%token K_TIMELINE
***************
*** 116,122 **** identify_system:
;
/*
! * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
--- 117,123 ----
;
/*
! * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
***************
*** 156,161 **** base_backup_opt:
--- 157,167 ----
$$ = makeDefElem("nowait",
(Node *)makeInteger(TRUE));
}
+ | K_MAX_RATE UCONST
+ {
+ $$ = makeDefElem("maxrate",
+ (Node *)makeInteger($2));
+ }
;
/*
*** a/src/backend/replication/repl_scanner.l
--- b/src/backend/replication/repl_scanner.l
***************
*** 71,76 **** IDENTIFY_SYSTEM { return K_IDENTIFY_SYSTEM; }
--- 71,77 ----
LABEL { return K_LABEL; }
NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
+ MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
*** a/src/bin/pg_basebackup/pg_basebackup.c
--- b/src/bin/pg_basebackup/pg_basebackup.c
***************
*** 46,51 **** static bool streamwal = false;
--- 46,52 ----
static bool fastcheckpoint = false;
static bool writerecoveryconf = false;
static int standby_message_timeout = 10 * 1000; /* 10 sec = default */
+ static uint32 maxrate = 0; /* no limit by default */
/* Progress counters */
static uint64 totalsize;
***************
*** 76,81 **** static PQExpBuffer recoveryconfcontents = NULL;
--- 77,83 ----
static void usage(void);
static void verify_dir_is_empty_or_create(char *dirname);
static void progress_report(int tablespacenum, const char *filename);
+ static uint32 parse_max_rate(char *src);
static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum);
static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
***************
*** 111,116 **** usage(void)
--- 113,119 ----
printf(_("\nOptions controlling the output:\n"));
printf(_(" -D, --pgdata=DIRECTORY receive base backup into directory\n"));
printf(_(" -F, --format=p|t output format (plain (default), tar)\n"));
+ printf(_(" -r, --max-rate=RATE maximum transfer rate to transfer data directory\n"));
printf(_(" -R, --write-recovery-conf\n"
" write recovery.conf after backup\n"));
printf(_(" -x, --xlog include required WAL files in backup (fetch mode)\n"));
***************
*** 475,480 **** progress_report(int tablespacenum, const char *filename)
--- 478,563 ----
fprintf(stderr, "\r");
}
+ static uint32
+ parse_max_rate(char *src)
+ {
+ double factor;
+ double result;
+ char *after_num;
+
+ errno = 0;
+ result = strtod(src, &after_num);
+ if (src == after_num)
+ {
+ fprintf(stderr,
+ _("%s: transfer rate \"%s\" is not a valid value\n"),
+ progname, src);
+ exit(1);
+ }
+ if (errno != 0)
+ {
+ fprintf(stderr,
+ _("%s: invalid transfer rate \"%s\": %s\n"),
+ progname, src, strerror(errno));
+ exit(1);
+ }
+
+ if (result <= 0)
+ {
+ /*
+ * Reject obviously wrong values here. Exact check of the range to be
+ * done on server.
+ */
+ fprintf(stderr, _("%s: transfer rate must be greater than zero\n"),
+ progname);
+ exit(1);
+ }
+
+ /*
+ * Evaluate (optional) suffix after skipping over possible whitespace.
+ */
+ factor = 1.0;
+ while (*after_num != '\0' && isspace(*after_num))
+ after_num++;
+ switch (*after_num)
+ {
+ case 'M':
+ factor = 1048576.0;
+ after_num++;
+ break;
+ case 'k':
+ factor = 1024.0;
+ after_num++;
+ break;
+ }
+
+ /* The rest can only consist of white space. */
+ while (*after_num != '\0')
+ {
+ if (!isspace(*after_num))
+ {
+ fprintf(stderr,
+ _("%s: invalid --max-rate units: \"%s\"\n"),
+ progname, after_num);
+ exit(1);
+ }
+ after_num++;
+ }
+
+ if (factor > 1)
+ result *= factor;
+
+ /* Check the integer range */
+ if ((uint64) result != (uint64) ((uint32) result))
+ {
+ fprintf(stderr,
+ _("%s: transfer rate \"%s\" exceeds integer range\n"),
+ progname, src);
+ exit(1);
+ }
+
+ return (uint32) result;
+ }
/*
* Write a piece of tar data
***************
*** 1308,1315 **** BaseBackup(void)
char *sysidentifier;
uint32 latesttli;
uint32 starttli;
! char current_path[MAXPGPATH];
char escaped_label[MAXPGPATH];
int i;
char xlogstart[64];
char xlogend[64];
--- 1391,1399 ----
char *sysidentifier;
uint32 latesttli;
uint32 starttli;
! char *basebkp;
char escaped_label[MAXPGPATH];
+ char *maxrate_clause = NULL;
int i;
char xlogstart[64];
char xlogend[64];
***************
*** 1382,1396 **** BaseBackup(void)
* Start the actual backup
*/
PQescapeStringConn(conn, escaped_label, label, sizeof(escaped_label), &i);
! snprintf(current_path, sizeof(current_path),
! "BASE_BACKUP LABEL '%s' %s %s %s %s",
! escaped_label,
! showprogress ? "PROGRESS" : "",
! includewal && !streamwal ? "WAL" : "",
! fastcheckpoint ? "FAST" : "",
! includewal ? "NOWAIT" : "");
!
! if (PQsendQuery(conn, current_path) == 0)
{
fprintf(stderr, _("%s: could not send replication command \"%s\": %s"),
progname, "BASE_BACKUP", PQerrorMessage(conn));
--- 1466,1485 ----
* Start the actual backup
*/
PQescapeStringConn(conn, escaped_label, label, sizeof(escaped_label), &i);
!
! if (maxrate > 0)
! maxrate_clause = psprintf("MAX_RATE %u", maxrate);
!
! basebkp =
! psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
! escaped_label,
! showprogress ? "PROGRESS" : "",
! includewal && !streamwal ? "WAL" : "",
! fastcheckpoint ? "FAST" : "",
! includewal ? "NOWAIT" : "",
! maxrate_clause ? maxrate_clause : "");
!
! if (PQsendQuery(conn, basebkp) == 0)
{
fprintf(stderr, _("%s: could not send replication command \"%s\": %s"),
progname, "BASE_BACKUP", PQerrorMessage(conn));
***************
*** 1657,1662 **** main(int argc, char **argv)
--- 1746,1752 ----
{"pgdata", required_argument, NULL, 'D'},
{"format", required_argument, NULL, 'F'},
{"checkpoint", required_argument, NULL, 'c'},
+ {"max-rate", required_argument, NULL, 'r'},
{"write-recovery-conf", no_argument, NULL, 'R'},
{"xlog", no_argument, NULL, 'x'},
{"xlog-method", required_argument, NULL, 'X'},
***************
*** 1697,1703 **** main(int argc, char **argv)
}
}
! while ((c = getopt_long(argc, argv, "D:F:RxX:l:zZ:d:c:h:p:U:s:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
--- 1787,1793 ----
}
}
! while ((c = getopt_long(argc, argv, "D:F:r:RxX:l:zZ:d:c:h:p:U:s:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
***************
*** 1718,1723 **** main(int argc, char **argv)
--- 1808,1816 ----
exit(1);
}
break;
+ case 'r':
+ maxrate = parse_max_rate(optarg);
+ break;
case 'R':
writerecoveryconf = true;
break;
Alvaro Herrera escribi�:
Antonin Houska escribi�:
Thanks for checking. The new version addresses your findings.
I gave this patch a look.
BTW I also moved the patch the commitfest currently running, and set it
waiting-on-author.
Your move.
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Hi,
On 2014-01-15 18:52:32 -0300, Alvaro Herrera wrote:
Another thing I found a bit strange was the use of the latch. What this
patch does is create a separate latch which is used for the throttling.
This means that if the walsender process receives a signal, it will not
wake up if it's sleeping in throttling. Perhaps this is okay: as Andres
was quoted upthread as saying, maybe this is not a problem because the
sleep times are typically short anyway. But we're pretty much used to
the idea that whenever a signal is sent, processes act on it
*immediately*. Maybe some admin will not feel comfortable about waiting
some extra 20ms when they cancel their base backups. In any case,
having a secondary latch to sleep on in a process seems weird. Maybe
this should be using MyWalSnd->latch somehow.
Yes, this definitely should reuse MyWalSnd->latch.
slightly related: we should start to reuse procLatch for walsenders
instead of having a separate latch someday.
Greetings,
Andres Freund
--
Andres Freund http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Thu, Jan 16, 2014 at 12:03 PM, Andres Freund <andres@2ndquadrant.com> wrote:
slightly related: we should start to reuse procLatch for walsenders
instead of having a separate latch someday.
+1. The potential for bugs from failing to account for this within
signal handlers seems like a concern. I think that every process
should use the process latch, except for the archiver which uses a
local latch because it pointedly does not touch shared memory. I think
I wrote a comment that made it into the latch header comments
encouraging this, but never saw to it that it was universally adhered
to.
--
Peter Geoghegan
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Fri, Jan 17, 2014 at 5:03 AM, Andres Freund <andres@2ndquadrant.com> wrote:
slightly related: we should start to reuse procLatch for walsenders
instead of having a separate latch someday.
+ 1 on that.
--
Michael
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 01/15/2014 10:52 PM, Alvaro Herrera wrote:
I gave this patch a look. There was a bug that the final bounds check
for int32 range was not done when there was no suffix, so in effect you
could pass numbers larger than UINT_MAX and pg_basebackup would not
complain until the number reached the server via BASE_BACKUP. Maybe
that's fine, given that numbers above 1G will cause a failure on the
server side anyway, but it looked like a bug to me. I tweaked the parse
routine slightly; other than fix the bug, I made it accept fractional
numbers, so you can say 0.5M for instance.
Thanks.
Perhaps we should make sure pg_basebackup rejects numbers larger than 1G
as well.
Is there a good place to define the constant, so that both backend and
client can use it? I'd say 'include/common' but no existing file seems
to be appropriate. I'm not sure if it's worth to add a new file there.
Another thing I found a bit strange was the use of the latch. What this
patch does is create a separate latch which is used for the throttling.
This means that if the walsender process receives a signal, it will not
wake up if it's sleeping in throttling. Perhaps this is okay: as Andres
was quoted upthread as saying, maybe this is not a problem because the
sleep times are typically short anyway. But we're pretty much used to
the idea that whenever a signal is sent, processes act on it
*immediately*. Maybe some admin will not feel comfortable about waiting
some extra 20ms when they cancel their base backups. In any case,
having a secondary latch to sleep on in a process seems weird. Maybe
this should be using MyWalSnd->latch somehow.
o.k., MyWalSnd->latch is used now.
You have this interesting THROTTLING_MAX_FREQUENCY constant defined to
128, with the comment "check this many times per second".
Let's see: if the user requests 1MB/s, this value results in
throttling_sample = 1MB / 128 = 8192. So for every 8kB transferred, we
would stop, check the wall clock time, and if less time has lapsed than
we were supposed to spend transferring those 8kB then we sleep. Isn't a
check every 8kB a bit too frequent? This doesn't seem sensible to me.
I think we should be checking perhaps every tenth of the requested
maximum rate, or something like that, not every 1/128th.Now, what the code actually does is not necessarily that, because the
sampling value is clamped to a minimum of 32 kB. But then if we're
going to do that, why use such a large divisor value in the first place?
I propose we set that constant to a smaller value such as 8.
I tried to use THROTTLING_SAMPLE_MIN and THROTTLING_MAX_FREQUENCY to
control both the minimum and maximum chunk size. It was probably too
generic, THROTTLING_SAMPLE_MIN is no longer there.
New patch version is attached.
// Antonin Houska (Tony)
Attachments:
backup_throttling_v7.patchtext/x-patch; name=backup_throttling_v7.patchDownload
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 7d99976..799d214 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1720,7 +1720,7 @@ The commands accepted in walsender mode are:
</varlistentry>
<varlistentry>
- <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>]</term>
+ <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal>]</term>
<listitem>
<para>
Instructs the server to start streaming a base backup.
@@ -1788,7 +1788,23 @@ The commands accepted in walsender mode are:
the waiting and the warning, leaving the client responsible for
ensuring the required log is available.
</para>
- </listitem>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><literal>MAX_RATE</literal> <replaceable>rate</></term>
+ <listitem>
+ <para>
+ Limit (throttle) the maximum amount of data transferred from server
+ to client per unit of time. The expected unit is bytes per second.
+ If this option is specified, the value must either be equal to zero
+ or it must fall within the range from 32 kB through 1 GB (inclusive).
+ If zero is passed or the option is not specified, no restriction is
+ imposed on the transfer.
+ </para>
+ <para>
+ <literal>MAX_RATE</literal> does not affect WAL streaming.
+ </para>
+ </listitem>
</varlistentry>
</variablelist>
</para>
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index c379df5..2ec81b7 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -189,6 +189,27 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>-r <replaceable class="parameter">rate</replaceable></option></term>
+ <term><option>--max-rate=<replaceable class="parameter">rate</replaceable></option></term>
+ <listitem>
+ <para>
+ The maximum amount of data transferred from server per second.
+ The purpose is to limit the impact of <application>pg_basebackup</application>
+ on the running server.
+ </para>
+ <para>
+ This option always affects transfer of the data directory. Transfer of
+ WAL files is only affected if the collection method is <literal>fetch</literal>.
+ </para>
+ <para>
+ Valid values are between <literal>32 kB</literal> and <literal>1024 MB</literal>
+ is expected. Suffixes <literal>k</literal> (kilobytes) and
+ <literal>M</literal> (megabytes) are accepted.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-R</option></term>
<term><option>--write-recovery-conf</option></term>
<listitem>
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index fc35f5b..a0216c1 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -33,6 +33,7 @@
#include "utils/builtins.h"
#include "utils/elog.h"
#include "utils/ps_status.h"
+#include "utils/timestamp.h"
#include "pgtar.h"
typedef struct
@@ -42,6 +43,7 @@ typedef struct
bool fastcheckpoint;
bool nowait;
bool includewal;
+ uint32 maxrate;
} basebackup_options;
@@ -59,6 +61,7 @@ static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
static void parse_basebackup_options(List *options, basebackup_options *opt);
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
static int compareWalFileNames(const void *a, const void *b);
+static void throttle(size_t increment);
/* Was the backup currently in-progress initiated in recovery mode? */
static bool backup_started_in_recovery = false;
@@ -68,6 +71,35 @@ static bool backup_started_in_recovery = false;
*/
#define TAR_SEND_SIZE 32768
+
+/*
+ * The maximum amount of data per second - bounds of the user input.
+ *
+ * If the maximum should be increased to more than 4 GB, uint64 must
+ * be introduced for the related variables. However such high values have
+ * little to do with throttling.
+ */
+#define MAX_RATE_LOWER 32768
+#define MAX_RATE_UPPER (1024 << 20)
+
+
+/*
+ * How frequently to throttle, as a fraction of the specified rate-second.
+ */
+#define THROTTLING_FREQUENCY 8
+
+/* The actual value, transfer of which may cause sleep. */
+static uint32 throttling_sample;
+
+/* Amount of data already transfered but not yet throttled. */
+static int32 throttling_counter;
+
+/* The minimum time required to transfer throttling_sample bytes. */
+static int64 elapsed_min_unit;
+
+/* The last check of the transfer rate. */
+static int64 throttled_last;
+
typedef struct
{
char *oid;
@@ -187,6 +219,30 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* Send tablespace header */
SendBackupHeader(tablespaces);
+ /* Setup and activate network throttling, if client requested it */
+ if (opt->maxrate > 0)
+ {
+ throttling_sample = opt->maxrate / THROTTLING_FREQUENCY;
+
+ /*
+ * opt->maxrate is bytes per second. Thus the expression in
+ * brackets is microseconds per byte.
+ */
+ elapsed_min_unit = throttling_sample *
+ ((double) USECS_PER_SEC / opt->maxrate);
+
+ /* Enable throttling. */
+ throttling_counter = 0;
+
+ /* The 'real data' starts now (header was ignored). */
+ throttled_last = GetCurrentIntegerTimestamp();
+ }
+ else
+ {
+ /* Disable throttling. */
+ throttling_counter = -1;
+ }
+
/* Send off our tablespaces one by one */
foreach(lc, tablespaces)
{
@@ -414,6 +470,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
+ throttle(cnt);
+
if (len == XLogSegSize)
break;
}
@@ -484,6 +542,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_fast = false;
bool o_nowait = false;
bool o_wal = false;
+ bool o_maxrate = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -535,6 +594,29 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->includewal = true;
o_wal = true;
}
+ else if (strcmp(defel->defname, "maxrate") == 0)
+ {
+ long maxrate;
+
+ if (o_maxrate)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ maxrate = intVal(defel->arg);
+
+ opt->maxrate = (uint32) maxrate;
+ if (opt->maxrate > 0 &&
+ (opt->maxrate < MAX_RATE_LOWER || opt->maxrate > MAX_RATE_UPPER))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("transfer rate %u bytes per second is out of range",
+ opt->maxrate),
+ errhint("The accepted range is %u through %u kB per second",
+ MAX_RATE_LOWER >> 10, MAX_RATE_UPPER >> 10)));
+ }
+ o_maxrate = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -1071,6 +1153,7 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
+ throttle(cnt);
if (len >= statbuf->st_size)
{
@@ -1092,10 +1175,14 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
cnt = Min(sizeof(buf), statbuf->st_size - len);
pq_putmessage('d', buf, cnt);
len += cnt;
+ throttle(cnt);
}
}
- /* Pad to 512 byte boundary, per tar format requirements */
+ /*
+ * Pad to 512 byte boundary, per tar format requirements. (This small
+ * piece of data is probably not worth throttling.)
+ */
pad = ((len + 511) & ~511) - len;
if (pad > 0)
{
@@ -1121,3 +1208,64 @@ _tarWriteHeader(const char *filename, const char *linktarget,
pq_putmessage('d', h, 512);
}
+
+/*
+ * Increment the network transfer counter by the given number of bytes,
+ * and sleep if necessary to comply with the requested network transfer
+ * rate.
+ */
+static void
+throttle(size_t increment)
+{
+ int64 elapsed,
+ elapsed_min,
+ sleep;
+ int wait_result = 0;
+
+ if (throttling_counter < 0)
+ return;
+
+ throttling_counter += increment;
+ if (throttling_counter < throttling_sample)
+ return;
+
+ /* Time elapsed since the last measuring (and possible wake up). */
+ elapsed = GetCurrentIntegerTimestamp() - throttled_last;
+ /* How much should have elapsed at minimum? */
+ elapsed_min = elapsed_min_unit * (throttling_counter / throttling_sample);
+ sleep = elapsed_min - elapsed;
+ /* Only sleep if the transfer is faster than it should be. */
+ if (sleep > 0)
+ {
+ Assert(MyWalSnd);
+ ResetLatch(&MyWalSnd->latch);
+
+ /*
+ * (TAR_SEND_SIZE / throttling_sample * elapsed_min_unit) should be
+ * the maximum time to sleep. Thus the cast to long is safe.
+ */
+ wait_result = WaitLatch(&MyWalSnd->latch, WL_LATCH_SET | WL_TIMEOUT
+ | WL_POSTMASTER_DEATH, (long) (sleep / 1000));
+ }
+ else
+ {
+ /*
+ * The actual transfer rate is below the limit. Negative value would
+ * distort the adjustment of throttled_last.
+ */
+ sleep = 0;
+ }
+
+ /*
+ * Only a whole multiple of throttling_sample was processed. The rest will
+ * be done during the next call of this function.
+ */
+ throttling_counter %= throttling_sample;
+
+ /* Once the (possible) sleep has ended, new period starts. */
+ if (wait_result | WL_TIMEOUT)
+ throttled_last += elapsed + sleep;
+ else if (sleep > 0)
+ /* Sleep was necessary but might have been interrupted. */
+ throttled_last = GetCurrentIntegerTimestamp();
+}
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 015aa44..5f1f091 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -78,6 +78,7 @@ Node *replication_parse_result;
%token K_PROGRESS
%token K_FAST
%token K_NOWAIT
+%token K_MAX_RATE
%token K_WAL
%token K_TIMELINE
@@ -116,7 +117,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -156,6 +157,11 @@ base_backup_opt:
$$ = makeDefElem("nowait",
(Node *)makeInteger(TRUE));
}
+ | K_MAX_RATE UCONST
+ {
+ $$ = makeDefElem("maxrate",
+ (Node *)makeInteger($2));
+ }
;
/*
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 01e5ac6..74f7a34 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -71,6 +71,7 @@ IDENTIFY_SYSTEM { return K_IDENTIFY_SYSTEM; }
LABEL { return K_LABEL; }
NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
+MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 9d13d57..1d198ad 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -46,6 +46,7 @@ static bool streamwal = false;
static bool fastcheckpoint = false;
static bool writerecoveryconf = false;
static int standby_message_timeout = 10 * 1000; /* 10 sec = default */
+static uint32 maxrate = 0; /* no limit by default */
/* Progress counters */
static uint64 totalsize;
@@ -76,6 +77,7 @@ static PQExpBuffer recoveryconfcontents = NULL;
static void usage(void);
static void verify_dir_is_empty_or_create(char *dirname);
static void progress_report(int tablespacenum, const char *filename);
+static uint32 parse_max_rate(char *src);
static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum);
static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
@@ -111,6 +113,7 @@ usage(void)
printf(_("\nOptions controlling the output:\n"));
printf(_(" -D, --pgdata=DIRECTORY receive base backup into directory\n"));
printf(_(" -F, --format=p|t output format (plain (default), tar)\n"));
+ printf(_(" -r, --max-rate=RATE maximum transfer rate to transfer data directory\n"));
printf(_(" -R, --write-recovery-conf\n"
" write recovery.conf after backup\n"));
printf(_(" -x, --xlog include required WAL files in backup (fetch mode)\n"));
@@ -475,6 +478,86 @@ progress_report(int tablespacenum, const char *filename)
fprintf(stderr, "\r");
}
+static uint32
+parse_max_rate(char *src)
+{
+ double factor;
+ double result;
+ char *after_num;
+
+ errno = 0;
+ result = strtod(src, &after_num);
+ if (src == after_num)
+ {
+ fprintf(stderr,
+ _("%s: transfer rate \"%s\" is not a valid value\n"),
+ progname, src);
+ exit(1);
+ }
+ if (errno != 0)
+ {
+ fprintf(stderr,
+ _("%s: invalid transfer rate \"%s\": %s\n"),
+ progname, src, strerror(errno));
+ exit(1);
+ }
+
+ if (result <= 0)
+ {
+ /*
+ * Reject obviously wrong values here. Exact check of the range to be
+ * done on server.
+ */
+ fprintf(stderr, _("%s: transfer rate must be greater than zero\n"),
+ progname);
+ exit(1);
+ }
+
+ /*
+ * Evaluate (optional) suffix after skipping over possible whitespace.
+ */
+ factor = 1.0;
+ while (*after_num != '\0' && isspace(*after_num))
+ after_num++;
+ switch (*after_num)
+ {
+ case 'M':
+ factor = 1048576.0;
+ after_num++;
+ break;
+ case 'k':
+ factor = 1024.0;
+ after_num++;
+ break;
+ }
+
+ /* The rest can only consist of white space. */
+ while (*after_num != '\0')
+ {
+ if (!isspace(*after_num))
+ {
+ fprintf(stderr,
+ _("%s: invalid --max-rate units: \"%s\"\n"),
+ progname, after_num);
+ exit(1);
+ }
+ after_num++;
+ }
+
+ if (factor > 1)
+ result *= factor;
+
+ /* Check the integer range */
+ if ((uint64) result != (uint64) ((uint32) result))
+ {
+ fprintf(stderr,
+ _("%s: transfer rate \"%s\" exceeds integer range\n"),
+ progname, src);
+ exit(1);
+ }
+
+ return (uint32) result;
+}
/*
* Write a piece of tar data
@@ -1308,8 +1391,9 @@ BaseBackup(void)
char *sysidentifier;
uint32 latesttli;
uint32 starttli;
- char current_path[MAXPGPATH];
+ char *basebkp;
char escaped_label[MAXPGPATH];
+ char *maxrate_clause = NULL;
int i;
char xlogstart[64];
char xlogend[64];
@@ -1382,15 +1466,20 @@ BaseBackup(void)
* Start the actual backup
*/
PQescapeStringConn(conn, escaped_label, label, sizeof(escaped_label), &i);
- snprintf(current_path, sizeof(current_path),
- "BASE_BACKUP LABEL '%s' %s %s %s %s",
- escaped_label,
- showprogress ? "PROGRESS" : "",
- includewal && !streamwal ? "WAL" : "",
- fastcheckpoint ? "FAST" : "",
- includewal ? "NOWAIT" : "");
-
- if (PQsendQuery(conn, current_path) == 0)
+
+ if (maxrate > 0)
+ maxrate_clause = psprintf("MAX_RATE %u", maxrate);
+
+ basebkp =
+ psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
+ escaped_label,
+ showprogress ? "PROGRESS" : "",
+ includewal && !streamwal ? "WAL" : "",
+ fastcheckpoint ? "FAST" : "",
+ includewal ? "NOWAIT" : "",
+ maxrate_clause ? maxrate_clause : "");
+
+ if (PQsendQuery(conn, basebkp) == 0)
{
fprintf(stderr, _("%s: could not send replication command \"%s\": %s"),
progname, "BASE_BACKUP", PQerrorMessage(conn));
@@ -1657,6 +1746,7 @@ main(int argc, char **argv)
{"pgdata", required_argument, NULL, 'D'},
{"format", required_argument, NULL, 'F'},
{"checkpoint", required_argument, NULL, 'c'},
+ {"max-rate", required_argument, NULL, 'r'},
{"write-recovery-conf", no_argument, NULL, 'R'},
{"xlog", no_argument, NULL, 'x'},
{"xlog-method", required_argument, NULL, 'X'},
@@ -1697,7 +1787,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "D:F:RxX:l:zZ:d:c:h:p:U:s:wWvP",
+ while ((c = getopt_long(argc, argv, "D:F:r:RxX:l:zZ:d:c:h:p:U:s:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
@@ -1718,6 +1808,9 @@ main(int argc, char **argv)
exit(1);
}
break;
+ case 'r':
+ maxrate = parse_max_rate(optarg);
+ break;
case 'R':
writerecoveryconf = true;
break;
I realize the following should be applied on the top of v7:
index a0216c1..16dd939 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -1263,7 +1263,7 @@ throttle(size_t increment)
throttling_counter %= throttling_sample;
/* Once the (possible) sleep has ended, new period starts. */
- if (wait_result | WL_TIMEOUT)
+ if (wait_result & WL_TIMEOUT)
throttled_last += elapsed + sleep;
else if (sleep > 0)
/* Sleep was necessary but might have been interrupted. */
// Tony
On 01/20/2014 05:10 PM, Antonin Houska wrote:
On 01/15/2014 10:52 PM, Alvaro Herrera wrote:
I gave this patch a look. There was a bug that the final bounds check
for int32 range was not done when there was no suffix, so in effect you
could pass numbers larger than UINT_MAX and pg_basebackup would not
complain until the number reached the server via BASE_BACKUP. Maybe
that's fine, given that numbers above 1G will cause a failure on the
server side anyway, but it looked like a bug to me. I tweaked the parse
routine slightly; other than fix the bug, I made it accept fractional
numbers, so you can say 0.5M for instance.Thanks.
Perhaps we should make sure pg_basebackup rejects numbers larger than 1G
as well.Is there a good place to define the constant, so that both backend and
client can use it? I'd say 'include/common' but no existing file seems
to be appropriate. I'm not sure if it's worth to add a new file there.Another thing I found a bit strange was the use of the latch. What this
patch does is create a separate latch which is used for the throttling.
This means that if the walsender process receives a signal, it will not
wake up if it's sleeping in throttling. Perhaps this is okay: as Andres
was quoted upthread as saying, maybe this is not a problem because the
sleep times are typically short anyway. But we're pretty much used to
the idea that whenever a signal is sent, processes act on it
*immediately*. Maybe some admin will not feel comfortable about waiting
some extra 20ms when they cancel their base backups. In any case,
having a secondary latch to sleep on in a process seems weird. Maybe
this should be using MyWalSnd->latch somehow.o.k., MyWalSnd->latch is used now.
You have this interesting THROTTLING_MAX_FREQUENCY constant defined to
128, with the comment "check this many times per second".
Let's see: if the user requests 1MB/s, this value results in
throttling_sample = 1MB / 128 = 8192. So for every 8kB transferred, we
would stop, check the wall clock time, and if less time has lapsed than
we were supposed to spend transferring those 8kB then we sleep. Isn't a
check every 8kB a bit too frequent? This doesn't seem sensible to me.
I think we should be checking perhaps every tenth of the requested
maximum rate, or something like that, not every 1/128th.Now, what the code actually does is not necessarily that, because the
sampling value is clamped to a minimum of 32 kB. But then if we're
going to do that, why use such a large divisor value in the first place?
I propose we set that constant to a smaller value such as 8.I tried to use THROTTLING_SAMPLE_MIN and THROTTLING_MAX_FREQUENCY to
control both the minimum and maximum chunk size. It was probably too
generic, THROTTLING_SAMPLE_MIN is no longer there.New patch version is attached.
// Antonin Houska (Tony)
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Tue, Jan 21, 2014 at 1:10 AM, Antonin Houska
<antonin.houska@gmail.com> wrote:
On 01/15/2014 10:52 PM, Alvaro Herrera wrote:
I gave this patch a look. There was a bug that the final bounds check
for int32 range was not done when there was no suffix, so in effect you
could pass numbers larger than UINT_MAX and pg_basebackup would not
complain until the number reached the server via BASE_BACKUP. Maybe
that's fine, given that numbers above 1G will cause a failure on the
server side anyway, but it looked like a bug to me. I tweaked the parse
routine slightly; other than fix the bug, I made it accept fractional
numbers, so you can say 0.5M for instance.Thanks.
Perhaps we should make sure pg_basebackup rejects numbers larger than 1G
as well.Is there a good place to define the constant, so that both backend and
client can use it? I'd say 'include/common' but no existing file seems
to be appropriate. I'm not sure if it's worth to add a new file there.
If there is no good place to define them, it's okay to define them
also in client side
for now.
+ <term>BASE_BACKUP [<literal>LABEL</literal>
<replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>]
[<literal>FAST</literal>] [<literal>WAL</literal>]
[<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal>]</term>
It's better to add something like <replaceable>'rate'</replaceable> just after
<literal>MAX_RATE</literal>.
+ <para>
+ <literal>MAX_RATE</literal> does not affect WAL streaming.
+ </para>
I don't think that this paragraph is required here because BASE_BACKUP is
basically independent from WAL streaming.
Why did you choose "bytes per second" as a valid rate which we can specify?
Since the minimum rate is 32kB, isn't it better to use "KB per second" for that?
If we do that, we can easily increase the maximum rate from 1GB to very large
number in the future if required.
+ wait_result = WaitLatch(&MyWalSnd->latch, WL_LATCH_SET | WL_TIMEOUT
+ | WL_POSTMASTER_DEATH, (long) (sleep / 1000));
If WL_POSTMASTER_DEATH is triggered, we should exit immediately like
other process does? This is not a problem of this patch. This problem exists
also in current master. But ISTM it's better to solve that together. Thought?
Regards,
--
Fujii Masao
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 01/31/2014 06:26 AM, Fujii Masao wrote:
Is there a good place to define the constant, so that both backend and
client can use it? I'd say 'include/common' but no existing file seems
to be appropriate. I'm not sure if it's worth to add a new file there.If there is no good place to define them, it's okay to define them
also in client side
for now.
+ <term>BASE_BACKUP [<literal>LABEL</literal>
<replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>]
[<literal>FAST</literal>] [<literal>WAL</literal>]
[<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal>]</term>It's better to add something like <replaceable>'rate'</replaceable> just after
<literal>MAX_RATE</literal>.+ <para> + <literal>MAX_RATE</literal> does not affect WAL streaming. + </para>I don't think that this paragraph is required here because BASE_BACKUP is
basically independent from WAL streaming.Why did you choose "bytes per second" as a valid rate which we can specify?
Since the minimum rate is 32kB, isn't it better to use "KB per second" for that?
If we do that, we can easily increase the maximum rate from 1GB to very large
number in the future if required.
The attached version addresses all the comments above.
+ wait_result = WaitLatch(&MyWalSnd->latch, WL_LATCH_SET | WL_TIMEOUT + | WL_POSTMASTER_DEATH, (long) (sleep / 1000));If WL_POSTMASTER_DEATH is triggered, we should exit immediately like
other process does? This is not a problem of this patch. This problem exists
also in current master. But ISTM it's better to solve that together. Thought?
Once we're careful about not missing signals, I think PM death should be
noticed too. The backup functionality itself would probably manage to
finish without postmaster, however it's executed under walsender process.
Question is where !PostmasterIsAlive() check should be added. I think it
should go to the main loop of perform_base_backup(), but that's probably
not in the scope of this patch.
Do you think that my patch should only add a comment like "Don't forget
to set WL_POSTMASTER_DEATH flag when making basebackup.c sensitive to PM
death?"
// Antonin Houska (Tony)
Attachments:
backup_throttling_v8.patchtext/x-patch; name=backup_throttling_v8.patchDownload
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 832524e..704b653 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1772,7 +1772,7 @@ The commands accepted in walsender mode are:
</varlistentry>
<varlistentry>
- <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>]</term>
+ <term>BASE_BACKUP [<literal>LABEL</literal> <replaceable>'label'</replaceable>] [<literal>PROGRESS</literal>] [<literal>FAST</literal>] [<literal>WAL</literal>] [<literal>NOWAIT</literal>] [<literal>MAX_RATE</literal> <replaceable>'rate'</replaceable>]</term>
<listitem>
<para>
Instructs the server to start streaming a base backup.
@@ -1840,7 +1840,20 @@ The commands accepted in walsender mode are:
the waiting and the warning, leaving the client responsible for
ensuring the required log is available.
</para>
- </listitem>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><literal>MAX_RATE</literal> <replaceable>rate</></term>
+ <listitem>
+ <para>
+ Limit (throttle) the maximum amount of data transferred from server
+ to client per unit of time. The expected unit is kilobytes per second.
+ If this option is specified, the value must either be equal to zero
+ or it must fall within the range from 32 kB through 1 GB (inclusive).
+ If zero is passed or the option is not specified, no restriction is
+ imposed on the transfer.
+ </para>
+ </listitem>
</varlistentry>
</variablelist>
</para>
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index c379df5..f8866db 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -189,6 +189,27 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>-r <replaceable class="parameter">rate</replaceable></option></term>
+ <term><option>--max-rate=<replaceable class="parameter">rate</replaceable></option></term>
+ <listitem>
+ <para>
+ The maximum amount of data transferred from server per second.
+ The purpose is to limit the impact of <application>pg_basebackup</application>
+ on the running server.
+ </para>
+ <para>
+ This option always affects transfer of the data directory. Transfer of
+ WAL files is only affected if the collection method is <literal>fetch</literal>.
+ </para>
+ <para>
+ Valid values are between <literal>32 kB</literal> and <literal>1024 MB</literal>.
+ Suffixes <literal>k</literal> (kilobytes) and <literal>M</literal>
+ (megabytes) are accepted.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-R</option></term>
<term><option>--write-recovery-conf</option></term>
<listitem>
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 06e54bc..1395542 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -34,6 +34,7 @@
#include "utils/builtins.h"
#include "utils/elog.h"
#include "utils/ps_status.h"
+#include "utils/timestamp.h"
#include "pgtar.h"
typedef struct
@@ -43,6 +44,7 @@ typedef struct
bool fastcheckpoint;
bool nowait;
bool includewal;
+ uint32 maxrate;
} basebackup_options;
@@ -60,6 +62,7 @@ static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
static void parse_basebackup_options(List *options, basebackup_options *opt);
static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
static int compareWalFileNames(const void *a, const void *b);
+static void throttle(size_t increment);
/* Was the backup currently in-progress initiated in recovery mode? */
static bool backup_started_in_recovery = false;
@@ -72,6 +75,31 @@ static char *statrelpath = NULL;
*/
#define TAR_SEND_SIZE 32768
+
+/*
+ * The maximum amount of data (kB per second) - bounds of the user input.
+ */
+#define MAX_RATE_LOWER 32
+#define MAX_RATE_UPPER 1048576
+
+
+/*
+ * How frequently to throttle, as a fraction of the specified rate-second.
+ */
+#define THROTTLING_FREQUENCY 8
+
+/* The actual number of bytes, transfer of which may cause sleep. */
+static uint64 throttling_sample;
+
+/* Amount of data already transfered but not yet throttled. */
+static int64 throttling_counter;
+
+/* The minimum time required to transfer throttling_sample bytes. */
+static int64 elapsed_min_unit;
+
+/* The last check of the transfer rate. */
+static int64 throttled_last;
+
typedef struct
{
char *oid;
@@ -203,6 +231,29 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
/* Send tablespace header */
SendBackupHeader(tablespaces);
+ /* Setup and activate network throttling, if client requested it */
+ if (opt->maxrate > 0)
+ {
+ throttling_sample = opt->maxrate * 1024 / THROTTLING_FREQUENCY;
+
+ /*
+ * The minimum amount of time for throttling_sample
+ * bytes to be transfered.
+ */
+ elapsed_min_unit = USECS_PER_SEC / THROTTLING_FREQUENCY;
+
+ /* Enable throttling. */
+ throttling_counter = 0;
+
+ /* The 'real data' starts now (header was ignored). */
+ throttled_last = GetCurrentIntegerTimestamp();
+ }
+ else
+ {
+ /* Disable throttling. */
+ throttling_counter = -1;
+ }
+
/* Send off our tablespaces one by one */
foreach(lc, tablespaces)
{
@@ -430,6 +481,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
+ throttle(cnt);
+
if (len == XLogSegSize)
break;
}
@@ -500,6 +553,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
bool o_fast = false;
bool o_nowait = false;
bool o_wal = false;
+ bool o_maxrate = false;
MemSet(opt, 0, sizeof(*opt));
foreach(lopt, options)
@@ -551,6 +605,28 @@ parse_basebackup_options(List *options, basebackup_options *opt)
opt->includewal = true;
o_wal = true;
}
+ else if (strcmp(defel->defname, "maxrate") == 0)
+ {
+ long maxrate;
+
+ if (o_maxrate)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("duplicate option \"%s\"", defel->defname)));
+ maxrate = intVal(defel->arg);
+
+ opt->maxrate = (uint32) maxrate;
+ if (opt->maxrate < MAX_RATE_LOWER || opt->maxrate > MAX_RATE_UPPER)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("transfer rate %u kilobytes per second is out of range",
+ opt->maxrate),
+ errhint("The accepted range is %u through %u kB per second",
+ MAX_RATE_LOWER, MAX_RATE_UPPER)));
+ }
+ o_maxrate = true;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@@ -1104,6 +1180,7 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
(errmsg("base backup could not send data, aborting backup")));
len += cnt;
+ throttle(cnt);
if (len >= statbuf->st_size)
{
@@ -1125,10 +1202,14 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
cnt = Min(sizeof(buf), statbuf->st_size - len);
pq_putmessage('d', buf, cnt);
len += cnt;
+ throttle(cnt);
}
}
- /* Pad to 512 byte boundary, per tar format requirements */
+ /*
+ * Pad to 512 byte boundary, per tar format requirements. (This small
+ * piece of data is probably not worth throttling.)
+ */
pad = ((len + 511) & ~511) - len;
if (pad > 0)
{
@@ -1154,3 +1235,64 @@ _tarWriteHeader(const char *filename, const char *linktarget,
pq_putmessage('d', h, 512);
}
+
+/*
+ * Increment the network transfer counter by the given number of bytes,
+ * and sleep if necessary to comply with the requested network transfer
+ * rate.
+ */
+static void
+throttle(size_t increment)
+{
+ int64 elapsed,
+ elapsed_min,
+ sleep;
+ int wait_result = 0;
+
+ if (throttling_counter < 0)
+ return;
+
+ throttling_counter += increment;
+ if (throttling_counter < throttling_sample)
+ return;
+
+ /* Time elapsed since the last measuring (and possible wake up). */
+ elapsed = GetCurrentIntegerTimestamp() - throttled_last;
+ /* How much should have elapsed at minimum? */
+ elapsed_min = elapsed_min_unit * (throttling_counter / throttling_sample);
+ sleep = elapsed_min - elapsed;
+ /* Only sleep if the transfer is faster than it should be. */
+ if (sleep > 0)
+ {
+ Assert(MyWalSnd);
+ ResetLatch(&MyWalSnd->latch);
+
+ /*
+ * (TAR_SEND_SIZE / throttling_sample * elapsed_min_unit) should be
+ * the maximum time to sleep. Thus the cast to long is safe.
+ */
+ wait_result = WaitLatch(&MyWalSnd->latch, WL_LATCH_SET | WL_TIMEOUT
+ | WL_POSTMASTER_DEATH, (long) (sleep / 1000));
+ }
+ else
+ {
+ /*
+ * The actual transfer rate is below the limit. Negative value would
+ * distort the adjustment of throttled_last.
+ */
+ sleep = 0;
+ }
+
+ /*
+ * Only a whole multiple of throttling_sample was processed. The rest will
+ * be done during the next call of this function.
+ */
+ throttling_counter %= throttling_sample;
+
+ /* Once the (possible) sleep has ended, new period starts. */
+ if (wait_result & WL_TIMEOUT)
+ throttled_last += elapsed + sleep;
+ else if (sleep > 0)
+ /* Sleep was necessary but might have been interrupted. */
+ throttled_last = GetCurrentIntegerTimestamp();
+}
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index c3f4a24..ba8f885 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -69,6 +69,7 @@ Node *replication_parse_result;
%token K_PROGRESS
%token K_FAST
%token K_NOWAIT
+%token K_MAX_RATE
%token K_WAL
%token K_TIMELINE
%token K_PHYSICAL
@@ -113,7 +114,7 @@ identify_system:
;
/*
- * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT]
+ * BASE_BACKUP [LABEL '<label>'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE %d]
*/
base_backup:
K_BASE_BACKUP base_backup_opt_list
@@ -157,6 +158,11 @@ base_backup_opt:
$$ = makeDefElem("nowait",
(Node *)makeInteger(TRUE));
}
+ | K_MAX_RATE UCONST
+ {
+ $$ = makeDefElem("maxrate",
+ (Node *)makeInteger($2));
+ }
;
/* CREATE_REPLICATION_SLOT SLOT slot PHYSICAL */
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 24195a5..ca32aa6 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -86,6 +86,7 @@ IDENTIFY_SYSTEM { return K_IDENTIFY_SYSTEM; }
LABEL { return K_LABEL; }
NOWAIT { return K_NOWAIT; }
PROGRESS { return K_PROGRESS; }
+MAX_RATE { return K_MAX_RATE; }
WAL { return K_WAL; }
TIMELINE { return K_TIMELINE; }
START_REPLICATION { return K_START_REPLICATION; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 9d13d57..0dfb60c 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -46,6 +46,14 @@ static bool streamwal = false;
static bool fastcheckpoint = false;
static bool writerecoveryconf = false;
static int standby_message_timeout = 10 * 1000; /* 10 sec = default */
+static int32 maxrate = 0; /* no limit by default */
+
+/*
+ * Minimum and maximum values of maxrate (kB per second), to check user input
+ * on client side.
+ */
+#define MAX_RATE_LOWER 32
+#define MAX_RATE_UPPER 1048576
/* Progress counters */
static uint64 totalsize;
@@ -76,6 +84,7 @@ static PQExpBuffer recoveryconfcontents = NULL;
static void usage(void);
static void verify_dir_is_empty_or_create(char *dirname);
static void progress_report(int tablespacenum, const char *filename);
+static int32 parse_max_rate(char *src);
static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum);
static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
@@ -111,6 +120,7 @@ usage(void)
printf(_("\nOptions controlling the output:\n"));
printf(_(" -D, --pgdata=DIRECTORY receive base backup into directory\n"));
printf(_(" -F, --format=p|t output format (plain (default), tar)\n"));
+ printf(_(" -r, --max-rate=RATE maximum transfer rate to transfer data directory\n"));
printf(_(" -R, --write-recovery-conf\n"
" write recovery.conf after backup\n"));
printf(_(" -x, --xlog include required WAL files in backup (fetch mode)\n"));
@@ -475,6 +485,101 @@ progress_report(int tablespacenum, const char *filename)
fprintf(stderr, "\r");
}
+static int32
+parse_max_rate(char *src)
+{
+ double result;
+ char *after_num;
+ char *suffix = NULL;
+
+ errno = 0;
+ result = strtod(src, &after_num);
+ if (src == after_num)
+ {
+ fprintf(stderr,
+ _("%s: transfer rate \"%s\" is not a valid value\n"),
+ progname, src);
+ exit(1);
+ }
+ if (errno != 0)
+ {
+ fprintf(stderr,
+ _("%s: invalid transfer rate \"%s\": %s\n"),
+ progname, src, strerror(errno));
+ exit(1);
+ }
+
+ if (result <= 0)
+ {
+ /*
+ * Reject obviously wrong values here.
+ */
+ fprintf(stderr, _("%s: transfer rate must be greater than zero\n"),
+ progname);
+ exit(1);
+ }
+
+ /*
+ * Evaluate suffix, after skipping over possible whitespace.
+ */
+ while (*after_num != '\0' && isspace(*after_num))
+ after_num++;
+
+ if (*after_num == '\0')
+ {
+ fprintf(stderr,
+ _("%s: missing --max-rate units\n"), progname);
+ exit(1);
+ }
+
+ suffix = after_num;
+ if (*after_num == 'k')
+ {
+ /* kilobyte is the expected unit. */
+ after_num++;
+ }
+ else if (*after_num == 'M')
+ {
+ after_num++;
+ result *= 1024.0;
+ }
+
+ /* The rest can only consist of white space. */
+ while (*after_num != '\0' && isspace(*after_num))
+ after_num++;
+
+ if (*after_num != '\0')
+ {
+ fprintf(stderr,
+ _("%s: invalid --max-rate units: \"%s\"\n"),
+ progname, suffix);
+ exit(1);
+ }
+
+
+ /* Valid integer? */
+ if ((uint64) result != (uint64) ((uint32) result))
+ {
+ fprintf(stderr,
+ _("%s: transfer rate \"%s\" exceeds integer range\n"),
+ progname, src);
+ exit(1);
+ }
+
+ /*
+ * The range is checked on server side too, but let's avoid server
+ * connection if non-sensible value has been passed.
+ */
+ if (result < MAX_RATE_LOWER || result > MAX_RATE_UPPER)
+ {
+ fprintf(stderr,
+ _("%s: transfer rate \"%s\" is out of range\n"),
+ progname, src);
+ exit(1);
+ }
+
+ return (int32) result;
+}
/*
* Write a piece of tar data
@@ -1308,8 +1413,9 @@ BaseBackup(void)
char *sysidentifier;
uint32 latesttli;
uint32 starttli;
- char current_path[MAXPGPATH];
+ char *basebkp;
char escaped_label[MAXPGPATH];
+ char *maxrate_clause = NULL;
int i;
char xlogstart[64];
char xlogend[64];
@@ -1382,15 +1488,20 @@ BaseBackup(void)
* Start the actual backup
*/
PQescapeStringConn(conn, escaped_label, label, sizeof(escaped_label), &i);
- snprintf(current_path, sizeof(current_path),
- "BASE_BACKUP LABEL '%s' %s %s %s %s",
- escaped_label,
- showprogress ? "PROGRESS" : "",
- includewal && !streamwal ? "WAL" : "",
- fastcheckpoint ? "FAST" : "",
- includewal ? "NOWAIT" : "");
- if (PQsendQuery(conn, current_path) == 0)
+ if (maxrate > 0)
+ maxrate_clause = psprintf("MAX_RATE %u", maxrate);
+
+ basebkp =
+ psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s",
+ escaped_label,
+ showprogress ? "PROGRESS" : "",
+ includewal && !streamwal ? "WAL" : "",
+ fastcheckpoint ? "FAST" : "",
+ includewal ? "NOWAIT" : "",
+ maxrate_clause ? maxrate_clause : "");
+
+ if (PQsendQuery(conn, basebkp) == 0)
{
fprintf(stderr, _("%s: could not send replication command \"%s\": %s"),
progname, "BASE_BACKUP", PQerrorMessage(conn));
@@ -1657,6 +1768,7 @@ main(int argc, char **argv)
{"pgdata", required_argument, NULL, 'D'},
{"format", required_argument, NULL, 'F'},
{"checkpoint", required_argument, NULL, 'c'},
+ {"max-rate", required_argument, NULL, 'r'},
{"write-recovery-conf", no_argument, NULL, 'R'},
{"xlog", no_argument, NULL, 'x'},
{"xlog-method", required_argument, NULL, 'X'},
@@ -1697,7 +1809,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "D:F:RxX:l:zZ:d:c:h:p:U:s:wWvP",
+ while ((c = getopt_long(argc, argv, "D:F:r:RxX:l:zZ:d:c:h:p:U:s:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
@@ -1718,6 +1830,9 @@ main(int argc, char **argv)
exit(1);
}
break;
+ case 'r':
+ maxrate = parse_max_rate(optarg);
+ break;
case 'R':
writerecoveryconf = true;
break;
Antonin Houska escribi�:
Why did you choose "bytes per second" as a valid rate which we can specify?
Since the minimum rate is 32kB, isn't it better to use "KB per second" for that?
If we do that, we can easily increase the maximum rate from 1GB to very large
number in the future if required.The attached version addresses all the comments above.
I pushed this patch with a few further tweaks. In your changes to
address the above point, you made the suffix mandatory in the
pg_basebackup -r option. This seemed a strange restriction, so I
removed it. It seems more user-friendly to me to accept the value as
being expressed in kilobytes per second without requiring the suffix to
be there; the 'k' suffix is then also accepted and has no effect. I
amended the docs to say that also.
If you or others feel strongly about this, we can still tweak it, of
course.
I also moved the min/max #defines to replication/basebackup.h, and
included that file in pg_basebackup.c. This avoids the duplicated
values. That file is okay to be included there.
If WL_POSTMASTER_DEATH is triggered, we should exit immediately like
other process does? This is not a problem of this patch. This problem exists
also in current master. But ISTM it's better to solve that together. Thought?Once we're careful about not missing signals, I think PM death should be
noticed too. The backup functionality itself would probably manage to
finish without postmaster, however it's executed under walsender process.Question is where !PostmasterIsAlive() check should be added. I think it
should go to the main loop of perform_base_backup(), but that's probably
not in the scope of this patch.
Feel free to submit patches about this.
Thanks for your patch, and the numerous reviewers who took part.
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 02/27/2014 11:04 PM, Alvaro Herrera wrote:
I pushed this patch with a few further tweaks. In your changes to
address the above point, you made the suffix mandatory in the
pg_basebackup -r option. This seemed a strange restriction, so I
removed it. It seems more user-friendly to me to accept the value as
being expressed in kilobytes per second without requiring the suffix to
be there; the 'k' suffix is then also accepted and has no effect. I
amended the docs to say that also.If you or others feel strongly about this, we can still tweak it, of
course.
I'm used to assume the base unit if there's no suffix, but have no
objections against considering kB as the default. I see you adjusted
documentation too.
I also moved the min/max #defines to replication/basebackup.h, and
included that file in pg_basebackup.c. This avoids the duplicated
values. That file is okay to be included there.
I kept in mind that pg_basebackup.c is not linked to the backend, but
you're right, mere inclusion is something else.
Thanks for your patch, and the numerous reviewers who took part.
Thanks for committing - this is my first patch :-)
// Tony
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers