Add --include-table-data-where option to pg_dump, to export only a subset of table data
Many times I've wanted to export a subset of a database, using some sort of
row filter condition on some of the large tables. E.g. copying a
production database to a staging environment, but with some time series
data only from the past month.
We have the existing options:
--include-table=table (and its -t synonym)
--exclude-table=table
--exclude-table-data=table
I propose a new option:
--include-table-data-where=table:filter_clause
One would use this option as follows:
pg_dump --include-table-data-where=largetable:"created_at >=
'2018-05-01'" database_name
The filter_clause is used as the contents of a WHERE clause when querying
the data to generate the COPY statement produced by pg_dump.
I've prepared a proposed patch for this, which is attached. The code
changes are rather straightforward. I did have to add the ability to carry
around an extra pointer-sized object to the simple_list implementation, in
order to allow the filter clause to be associated to the matching oids of
the table pattern. It seemed the best way to augment the existing
simple_list implementation, but change as little as possible elsewhere in
the codebase. (Note that SimpleOidList is actually only used by pg_dump).
Feel free to review and propose any amendments.
Attachments:
pgdump-include-table-data-where-v1.patchapplication/octet-stream; name=pgdump-include-table-data-where-v1.patchDownload
[1mdiff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml[m
[1mindex 50809b4..e02359e 100644[m
[1m--- a/doc/src/sgml/ref/pg_dump.sgml[m
[1m+++ b/doc/src/sgml/ref/pg_dump.sgml[m
[36m@@ -771,6 +771,28 @@[m [mPostgreSQL documentation[m
</varlistentry>[m
[m
<varlistentry>[m
[32m+[m[32m <term><option>--include-table-data-where=<replaceable class="parameter">table</replaceable>:<replaceable class="parameter">filter_clause</replaceable></option></term>[m
[32m+[m[32m <listitem>[m
[32m+[m[32m <para>[m
[32m+[m[32m When dumping data for any tables matching the <replaceable[m
[32m+[m[32m class="parameter">table</replaceable> pattern, only include rows[m
[32m+[m[32m that meet the <replaceable[m
[32m+[m[32m class="parameter">filter_clause</replaceable> condition.[m
[32m+[m[32m This option is useful when you want to dump only a subset of a particular table.[m
[32m+[m[32m The table pattern is interpreted according to the same rules as for <option>-t</option>.[m
[32m+[m[32m <option>--include-table-data</option> can be given more than once to[m
[32m+[m[32m provide different filters for multiple tables.[m
[32m+[m[32m Note that if multiple options refer to the same table, only the first filter_clause will be applied.[m
[32m+[m[32m If necessary, use quotes in your shell to provide an argument that contains spaces.[m
[32m+[m[32m E.g. --include-table-data-where=mytable:"created_at >= '2018-01-01' AND test = 'f'"[m
[32m+[m[32m </para>[m
[32m+[m[32m <para>[m
[32m+[m[32m To exclude data for all tables in the database, see <option>--schema-only</option>.[m
[32m+[m[32m </para>[m
[32m+[m[32m </listitem>[m
[32m+[m[32m </varlistentry>[m
[32m+[m
[32m+[m[32m <varlistentry>[m
<term><option>--inserts</option></term>[m
<listitem>[m
<para>[m
[1mdiff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c[m
[1mindex d6ceb72..e2cfea1 100644[m
[1m--- a/src/bin/pg_dump/pg_dump.c[m
[1m+++ b/src/bin/pg_dump/pg_dump.c[m
[36m@@ -119,6 +119,8 @@[m [mstatic SimpleOidList schema_exclude_oids = {NULL, NULL};[m
[m
static SimpleStringList table_include_patterns = {NULL, NULL};[m
static SimpleOidList table_include_oids = {NULL, NULL};[m
[32m+[m[32mstatic SimpleStringList tabledata_include_where_patterns = {NULL, NULL};[m
[32m+[m[32mstatic SimpleOidList tabledata_include_where_oids = {NULL, NULL};[m
static SimpleStringList table_exclude_patterns = {NULL, NULL};[m
static SimpleOidList table_exclude_oids = {NULL, NULL};[m
static SimpleStringList tabledata_exclude_patterns = {NULL, NULL};[m
[36m@@ -154,7 +156,8 @@[m [mstatic void expand_schema_name_patterns(Archive *fout,[m
static void expand_table_name_patterns(Archive *fout,[m
SimpleStringList *patterns,[m
SimpleOidList *oids,[m
[31m- bool strict_names);[m
[32m+[m [32m bool strict_names,[m
[32m+[m [32m bool match_data);[m
static NamespaceInfo *findNamespace(Archive *fout, Oid nsoid);[m
static void dumpTableData(Archive *fout, TableDataInfo *tdinfo);[m
static void refreshMatViewData(Archive *fout, TableDataInfo *tdinfo);[m
[36m@@ -360,6 +363,7 @@[m [mmain(int argc, char **argv)[m
{"enable-row-security", no_argument, &dopt.enable_row_security, 1},[m
{"exclude-table-data", required_argument, NULL, 4},[m
{"if-exists", no_argument, &dopt.if_exists, 1},[m
[32m+[m [32m{"include-table-data-where", required_argument, NULL, 8},[m
{"inserts", no_argument, &dopt.dump_inserts, 1},[m
{"lock-wait-timeout", required_argument, NULL, 2},[m
{"no-tablespaces", no_argument, &dopt.outputNoTablespaces, 1},[m
[36m@@ -562,6 +566,10 @@[m [mmain(int argc, char **argv)[m
dosync = false;[m
break;[m
[m
[32m+[m [32mcase 8: /* include table(s) data WHERE clause */[m
[32m+[m [32msimple_string_list_append(&tabledata_include_where_patterns, optarg);[m
[32m+[m [32mbreak;[m
[32m+[m
default:[m
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);[m
exit_nicely(1);[m
[36m@@ -761,17 +769,26 @@[m [mmain(int argc, char **argv)[m
{[m
expand_table_name_patterns(fout, &table_include_patterns,[m
&table_include_oids,[m
[31m- strict_names);[m
[32m+[m [32m strict_names, false);[m
if (table_include_oids.head == NULL)[m
exit_horribly(NULL, "no matching tables were found\n");[m
}[m
[32m+[m [32mif (tabledata_include_where_patterns.head != NULL)[m
[32m+[m [32m{[m
[32m+[m [32mexpand_table_name_patterns(fout, &tabledata_include_where_patterns,[m
[32m+[m [32m &tabledata_include_where_oids,[m
[32m+[m [32m true, /* Always use strict names for WHERE pattern */[m
[32m+[m [32m true); /* Match extra data after ':' character in each argument */[m
[32m+[m [32mif (tabledata_include_where_oids.head == NULL)[m
[32m+[m [32mexit_horribly(NULL, "no matching tables were found for WHERE clause\n");[m
[32m+[m [32m}[m
expand_table_name_patterns(fout, &table_exclude_patterns,[m
&table_exclude_oids,[m
[31m- false);[m
[32m+[m [32m false, false);[m
[m
expand_table_name_patterns(fout, &tabledata_exclude_patterns,[m
&tabledata_exclude_oids,[m
[31m- false);[m
[32m+[m [32m false, false);[m
[m
/* non-matching exclusion patterns aren't an error */[m
[m
[36m@@ -980,6 +997,8 @@[m [mhelp(const char *progname)[m
" access to)\n"));[m
printf(_(" --exclude-table-data=TABLE do NOT dump data for the named table(s)\n"));[m
printf(_(" --if-exists use IF EXISTS when dropping objects\n"));[m
[32m+[m [32mprintf(_(" --include-table-data-where=TABLE:WHERE_CLAUSE\n"));[m
[32m+[m [32mprintf(_(" only dump selected rows for the given table(s)\n"));[m
printf(_(" --inserts dump data as INSERT commands, rather than COPY\n"));[m
printf(_(" --no-comments do not dump comments\n"));[m
printf(_(" --no-publications do not dump publications\n"));[m
[36m@@ -1290,11 +1309,13 @@[m [mexpand_schema_name_patterns(Archive *fout,[m
/*[m
* Find the OIDs of all tables matching the given list of patterns,[m
* and append them to the given OID list.[m
[32m+[m[32m * If match_data is set, then each pattern is first split on the ':' character,[m
[32m+[m[32m * and the portion after the colon is appended to the SimpleOidList extra data.[m
*/[m
static void[m
expand_table_name_patterns(Archive *fout,[m
SimpleStringList *patterns, SimpleOidList *oids,[m
[31m- bool strict_names)[m
[32m+[m [32m bool strict_names, bool match_data)[m
{[m
PQExpBuffer query;[m
PGresult *res;[m
[36m@@ -1313,6 +1334,17 @@[m [mexpand_table_name_patterns(Archive *fout,[m
[m
for (cell = patterns->head; cell; cell = cell->next)[m
{[m
[32m+[m [32m/* When match_data is set, split the pattern on the ':' chararcter,[m
[32m+[m [32m * and treat the second-half as extra data to append to the list.[m
[32m+[m [32m */[m
[32m+[m [32mchar *extra_data = NULL;[m
[32m+[m [32mif (match_data) {[m
[32m+[m [32mchar *colon_char = strchr(cell->val, ':');[m
[32m+[m [32mif (colon_char) {[m
[32m+[m [32m*colon_char = '\0'; /* overwrite the colon, terminating the string before it */[m
[32m+[m [32mextra_data = colon_char+1; /* use remaining portion of the string as extra data */[m
[32m+[m [32m}[m
[32m+[m [32m}[m
/*[m
* Query must remain ABSOLUTELY devoid of unqualified names. This[m
* would be unnecessary given a pg_table_is_visible() variant taking a[m
[36m@@ -1341,7 +1373,7 @@[m [mexpand_table_name_patterns(Archive *fout,[m
[m
for (i = 0; i < PQntuples(res); i++)[m
{[m
[31m- simple_oid_list_append(oids, atooid(PQgetvalue(res, i, 0)));[m
[32m+[m [32msimple_oid_list_append2(oids, atooid(PQgetvalue(res, i, 0)), extra_data);[m
}[m
[m
PQclear(res);[m
[36m@@ -2281,7 +2313,29 @@[m [mmakeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo, bool oids)[m
tdinfo->dobj.namespace = tbinfo->dobj.namespace;[m
tdinfo->tdtable = tbinfo;[m
tdinfo->oids = oids;[m
[31m- tdinfo->filtercond = NULL; /* might get set later */[m
[32m+[m [32mtdinfo->filtercond = NULL;[m
[32m+[m
[32m+[m [32m/*[m
[32m+[m [32m * --include-table-data-where=<table_name>:<filter_clause> may be provided for this table.[m
[32m+[m [32m * If provided, filter_clause will be something like "foo < 5".[m
[32m+[m [32m * Prepend "WHERE (" and append ")" to produce "WHERE (foo < 5)" as filtercond.[m
[32m+[m [32m */[m
[32m+[m [32mchar *filter_clause = NULL;[m
[32m+[m [32mif (simple_oid_list_member2(&tabledata_include_where_oids,[m
[32m+[m [32mtbinfo->dobj.catId.oid,[m
[32m+[m [32m(void**) &filter_clause))[m
[32m+[m [32m{[m
[32m+[m [32mif (filter_clause) {[m
[32m+[m [32mchar *where_clause = pg_malloc(strlen(filter_clause) + 8 + 1);[m
[32m+[m [32mstrcpy(where_clause, "WHERE (");[m
[32m+[m [32mstrcat(where_clause, filter_clause);[m
[32m+[m [32mstrcat(where_clause, ")");[m
[32m+[m [32mtdinfo->filtercond = where_clause;[m
[32m+[m [32m} else {[m
[32m+[m [32mexit_horribly(NULL, "invalid pattern provided for --include-table-data-where\n");[m
[32m+[m [32m}[m
[32m+[m [32m}[m
[32m+[m
addObjectDependency(&tdinfo->dobj, tbinfo->dobj.dumpId);[m
[m
tbinfo->dataObj = tdinfo;[m
[1mdiff --git a/src/fe_utils/simple_list.c b/src/fe_utils/simple_list.c[m
[1mindex ef94b34..726b54f 100644[m
[1m--- a/src/fe_utils/simple_list.c[m
[1m+++ b/src/fe_utils/simple_list.c[m
[36m@@ -25,11 +25,30 @@[m
void[m
simple_oid_list_append(SimpleOidList *list, Oid val)[m
{[m
[32m+[m [32msimple_oid_list_append2(list, val, NULL);[m
[32m+[m[32m}[m
[32m+[m
[32m+[m[32m/*[m
[32m+[m[32m * Is OID present in the list?[m
[32m+[m[32m */[m
[32m+[m[32mbool[m
[32m+[m[32msimple_oid_list_member(SimpleOidList *list, Oid val)[m
[32m+[m[32m{[m
[32m+[m [32mreturn simple_oid_list_member2(list, val, NULL);[m
[32m+[m[32m}[m
[32m+[m
[32m+[m[32m/*[m
[32m+[m[32m * Append an OID to the list, along with extra pointer-sized data.[m
[32m+[m[32m */[m
[32m+[m[32mvoid[m
[32m+[m[32msimple_oid_list_append2(SimpleOidList *list, Oid val, void *extra_data)[m
[32m+[m[32m{[m
SimpleOidListCell *cell;[m
[m
cell = (SimpleOidListCell *) pg_malloc(sizeof(SimpleOidListCell));[m
cell->next = NULL;[m
cell->val = val;[m
[32m+[m [32mcell->extra_data = extra_data;[m
[m
if (list->tail)[m
list->tail->next = cell;[m
[36m@@ -40,16 +59,20 @@[m [msimple_oid_list_append(SimpleOidList *list, Oid val)[m
[m
/*[m
* Is OID present in the list?[m
[32m+[m[32m * Also return extra pointer-sized data by setting extra_data paramter[m
*/[m
bool[m
[31m-simple_oid_list_member(SimpleOidList *list, Oid val)[m
[32m+[m[32msimple_oid_list_member2(SimpleOidList *list, Oid val, void **extra_data)[m
{[m
SimpleOidListCell *cell;[m
[m
for (cell = list->head; cell; cell = cell->next)[m
{[m
[31m- if (cell->val == val)[m
[32m+[m [32mif (cell->val == val) {[m
[32m+[m [32mif (extra_data)[m
[32m+[m [32m*extra_data = cell->extra_data;[m
return true;[m
[32m+[m [32m}[m
}[m
return false;[m
}[m
[1mdiff --git a/src/include/fe_utils/simple_list.h b/src/include/fe_utils/simple_list.h[m
[1mindex 9785489..40bc962 100644[m
[1m--- a/src/include/fe_utils/simple_list.h[m
[1m+++ b/src/include/fe_utils/simple_list.h[m
[36m@@ -21,6 +21,7 @@[m [mtypedef struct SimpleOidListCell[m
{[m
struct SimpleOidListCell *next;[m
Oid val;[m
[32m+[m [32mvoid *extra_data;[m
} SimpleOidListCell;[m
[m
typedef struct SimpleOidList[m
[36m@@ -47,6 +48,9 @@[m [mtypedef struct SimpleStringList[m
extern void simple_oid_list_append(SimpleOidList *list, Oid val);[m
extern bool simple_oid_list_member(SimpleOidList *list, Oid val);[m
[m
[32m+[m[32mextern void simple_oid_list_append2(SimpleOidList *list, Oid val, void *extra_data);[m
[32m+[m[32mextern bool simple_oid_list_member2(SimpleOidList *list, Oid val, void **extra_data);[m
[32m+[m
extern void simple_string_list_append(SimpleStringList *list, const char *val);[m
extern bool simple_string_list_member(SimpleStringList *list, const char *val);[m
[m
Greetings,
* Carter Thaxton (carter.thaxton@gmail.com) wrote:
Many times I've wanted to export a subset of a database, using some sort of
row filter condition on some of the large tables. E.g. copying a
production database to a staging environment, but with some time series
data only from the past month.We have the existing options:
--include-table=table (and its -t synonym)
--exclude-table=table
--exclude-table-data=tableI propose a new option:
--include-table-data-where=table:filter_clauseOne would use this option as follows:
pg_dump --include-table-data-where=largetable:"created_at >=
'2018-05-01'" database_nameThe filter_clause is used as the contents of a WHERE clause when querying
the data to generate the COPY statement produced by pg_dump.
I've wanted something similar to this in the past as well, and, as
you've seen, we have some support for this kind of thing in pg_dump
already and what you're doing is exposing that.
I've prepared a proposed patch for this, which is attached. The code
changes are rather straightforward. I did have to add the ability to carry
around an extra pointer-sized object to the simple_list implementation, in
order to allow the filter clause to be associated to the matching oids of
the table pattern. It seemed the best way to augment the existing
simple_list implementation, but change as little as possible elsewhere in
the codebase. (Note that SimpleOidList is actually only used by pg_dump).Feel free to review and propose any amendments.
I've only taken a quick look but I don't see any regression tests, for
starters, and it's not clear if this can be passed multiple times for
one pg_dump run (I'd certainly hope that it could be...).
Also, if you haven't already, this should be registered on the
commitfest app, so we don't lose track of it.
Thanks!
Stephen
Hello,
I've only taken a quick look but I don't see any regression tests, for
starters, and it's not clear if this can be passed multiple times for
one pg_dump run (I'd certainly hope that it could be...).
Yes, this will absolutely accept multiple options for one run, which is how
I'd imagine it would typically be used.
In fact, for each table_pattern:filter_clause you provide as an option, it
will apply a corresponding WHERE clause for *every* table that matches the
table_pattern.
So if you happened to use a wildcard in the table_pattern, you could
actually end up with multiple tables filtered by the same WHERE clause.
For example:
pg_dump --include-table-data-where="table_*:created_at >= '2018-05-01'"
--include-table-data-where="other_table:id < 100" db_name
This will filter every table named "table_*", e.g. ["table_0", "table_1",
"table_2", "table_associated"], each with "WHERE created_at >=
'2018-05-01'", and it will also filter "other_table" with "WHERE id < 100".
Not sure how useful the wildcard feature is, but it matches the behavior
of the other pg_dump options that specify tables, and came along for free
by reusing that implementation.
Also, if you haven't already, this should be registered on the
commitfest app, so we don't lose track of it.
Import Notes
Reply to msg id not found: CAGiT_HOE4QvHa4FCXQSk+kRjq+rCvUF6Lb1gxmVryXjN+RsA@mail.gmail.com
On Tue, May 22, 2018 at 4:05 AM, Stephen Frost <sfrost@snowman.net> wrote:
* Carter Thaxton (carter.thaxton@gmail.com) wrote:
pg_dump --include-table-data-where=largetable:"created_at >=
'2018-05-01'" database_nameI've wanted something similar to this in the past as well, and, as
you've seen, we have some support for this kind of thing in pg_dump
already and what you're doing is exposing that.
+1
I've prepared a proposed patch for this, which is attached. The code
changes are rather straightforward. I did have to add the ability to carry
around an extra pointer-sized object to the simple_list implementation, in
order to allow the filter clause to be associated to the matching oids of
the table pattern. It seemed the best way to augment the existing
simple_list implementation, but change as little as possible elsewhere in
the codebase. (Note that SimpleOidList is actually only used by pg_dump).Feel free to review and propose any amendments.
I've only taken a quick look but I don't see any regression tests, for
starters, and it's not clear if this can be passed multiple times for
one pg_dump run (I'd certainly hope that it could be...).Also, if you haven't already, this should be registered on the
commitfest app, so we don't lose track of it.
Thanks for doing that. Unfortunately the patch seems to be corrupted
in some way, maybe ANSI control characters or something... perhaps you
set colour.ui = always in your git config, instead of auto? You might
also consider using git format-patch so you can include a brief commit
message that explains the feature.
--
Thomas Munro
http://www.enterprisedb.com
2018-05-20 20:48 GMT-03:00 Carter Thaxton <carter.thaxton@gmail.com>:
Many times I've wanted to export a subset of a database, using some sort of
row filter condition on some of the large tables. E.g. copying a production
database to a staging environment, but with some time series data only from
the past month.
How would you handle foreign keys? It seems easier to produce a dump
that won't restore.
We have the existing options:
--include-table=table (and its -t synonym)
--exclude-table=table
--exclude-table-data=tableI propose a new option:
--include-table-data-where=table:filter_clause
I remembered an old thread [1]/messages/by-id/1212299813.17810.17.camel@ubuntu. At that time pg_dump was not so
decoupled from the backend. We are far from being decoupled in a way
that someone can write his own pg_dump using only calls from a
library. I'm not sure pg_dump is the right place to add another ETL
parameter. We already have too much parameters that could break a
restore (flexibility is always welcome but too much is not so good).
One would use this option as follows:
pg_dump --include-table-data-where=largetable:"created_at >= '2018-05-01'"
database_name
How would you check that that expression is correct? Every parameter
could quote its value. It means that your parameter have to escape the
quote in '2018-05-01'. Another problem is that your spec does not show
us how you would handle tables like Foo.Bar or "foo:bar" (colon have
to be escaped)?
The filter_clause is used as the contents of a WHERE clause when querying
the data to generate the COPY statement produced by pg_dump.
You are forgetting about --inserts parameter. Could I use
--include-table-data-where and --inserts?
[1]: /messages/by-id/1212299813.17810.17.camel@ubuntu
--
Euler Taveira Timbira -
http://www.timbira.com.br/
PostgreSQL: Consultoria, Desenvolvimento, Suporte 24x7 e Treinamento
How would you handle foreign keys? It seems easier to produce a dump
that won't restore.
This proposal will not attempt to be smart about foreign keys or anything
like that. I don't believe that would even be expected.
We have the existing options:
--include-table=table (and its -t synonym)
--exclude-table=table
--exclude-table-data=tableI propose a new option:
--include-table-data-where=table:filter_clauseI remembered an old thread [1]. At that time pg_dump was not so
decoupled from the backend. We are far from being decoupled in a way
that someone can write his own pg_dump using only calls from a
library. I'm not sure pg_dump is the right place to add another ETL
parameter. We already have too much parameters that could break a
restore (flexibility is always welcome but too much is not so good).
In general, I agree with your sentiment that we don't want too much
flexibility in this tool. However, this just seems like a very obvious
missing feature to me. I was frankly surprised that pg_dump didn't already
have it.
I've designed this feature so that it behaves like a more flexible version
between --exclude-table-data and --include-table. Instead of dumping the
schema and zero rows, or the schema and all of the rows, it dumps the
schema and some specific rows.
Providing "--include-table-data-where=table:false" behaves exactly like
--exclude-table-data, and "--include-table-data-where=table:true" behaves
exactly like --include-table.
It does no more or less to prevent a restore. Given that
--exclude-table-data already exists, this seems to introduce no new issues
with restore.
pg_dump --include-table-data-where=largetable:"created_at >=
'2018-05-01'"
database_name
How would you check that that expression is correct?
The patch as already provided produces an error message and appropriate
exit code during the dump process, presenting the invalid SQL that is
produced as part of the WHERE clause.
I could see some value in refactoring it to provide error messages earlier
in the process, but it's actually not bad as is.
Every parameter could quote its value. It means that your parameter have to
escape the
quote in '2018-05-01'.
I don't understand. The double quotes in my example are bash shell
quotes. There is no special quote parsing in this patch. The single
quotes are part of the WHERE clause.
Note that pg_dump already uses getopt_long, so it's not required to use the
= symbol to separate option from its associated value. So, it would also
be fine to call as follows:
pg_dump --include-table-data-where "largetable:created_at >=
'2018-05-01'" database_name
Another problem is that your spec does not show
us how you would handle tables like Foo.Bar or "foo:bar" (colon have
to be escaped)?
Using a dot to separate the schema works just fine. My proposal uses the
same mechanism as --include-table, --exclude-table, and
--exclude-table-data. In fact, it even supports wildcards in those
patterns.
Your point about a colon in the table name is interesting. In all my years
of working with PostgreSQL and other databases, I've never encountered a
table name that contained a colon. Perhaps an escape character, like \:
could work. Is there another separator character you would suggest, which
is illegal in table names, but also intuitive as a separator? Maybe a
comma?
The filter_clause is used as the contents of a WHERE clause when querying
the data to generate the COPY statement produced by pg_dump.You are forgetting about --inserts parameter. Could I use
--include-table-data-where and --inserts?
Yes, the --inserts parameter works just fine. Perhaps I should have said
"the COPY statement or INSERT statements".
Ah yes, thanks. I did in fact have colors enabled.
I've attached a new patch generated by `git format-patch`. Hopefully
that's correct.
On Mon, May 21, 2018 at 4:00 PM, Thomas Munro <thomas.munro@enterprisedb.com
Show quoted text
wrote:
On Tue, May 22, 2018 at 4:05 AM, Stephen Frost <sfrost@snowman.net> wrote:
* Carter Thaxton (carter.thaxton@gmail.com) wrote:
pg_dump --include-table-data-where=largetable:"created_at >=
'2018-05-01'" database_nameI've wanted something similar to this in the past as well, and, as
you've seen, we have some support for this kind of thing in pg_dump
already and what you're doing is exposing that.+1
I've prepared a proposed patch for this, which is attached. The code
changes are rather straightforward. I did have to add the ability tocarry
around an extra pointer-sized object to the simple_list implementation,
in
order to allow the filter clause to be associated to the matching oids
of
the table pattern. It seemed the best way to augment the existing
simple_list implementation, but change as little as possible elsewherein
the codebase. (Note that SimpleOidList is actually only used by
pg_dump).
Feel free to review and propose any amendments.
I've only taken a quick look but I don't see any regression tests, for
starters, and it's not clear if this can be passed multiple times for
one pg_dump run (I'd certainly hope that it could be...).Also, if you haven't already, this should be registered on the
commitfest app, so we don't lose track of it.Thanks for doing that. Unfortunately the patch seems to be corrupted
in some way, maybe ANSI control characters or something... perhaps you
set colour.ui = always in your git config, instead of auto? You might
also consider using git format-patch so you can include a brief commit
message that explains the feature.--
Thomas Munro
http://www.enterprisedb.com
Attachments:
pgdump-include-table-data-where-v2.patchapplication/octet-stream; name=pgdump-include-table-data-where-v2.patchDownload
From 7843c6d4bda8397e8d21281018285f39ac188ea1 Mon Sep 17 00:00:00 2001
From: Carter Thaxton <carter.thaxton@gmail.com>
Date: Sat, 19 May 2018 01:56:51 -0700
Subject: [PATCH] Add --include-table-data-where option to pg_dump
---
doc/src/sgml/ref/pg_dump.sgml | 22 ++++++++++++
src/bin/pg_dump/pg_dump.c | 68 ++++++++++++++++++++++++++++++++++----
src/fe_utils/simple_list.c | 27 +++++++++++++--
src/include/fe_utils/simple_list.h | 4 +++
4 files changed, 112 insertions(+), 9 deletions(-)
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 50809b4..e02359e 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -771,6 +771,28 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>--include-table-data-where=<replaceable class="parameter">table</replaceable>:<replaceable class="parameter">filter_clause</replaceable></option></term>
+ <listitem>
+ <para>
+ When dumping data for any tables matching the <replaceable
+ class="parameter">table</replaceable> pattern, only include rows
+ that meet the <replaceable
+ class="parameter">filter_clause</replaceable> condition.
+ This option is useful when you want to dump only a subset of a particular table.
+ The table pattern is interpreted according to the same rules as for <option>-t</option>.
+ <option>--include-table-data</option> can be given more than once to
+ provide different filters for multiple tables.
+ Note that if multiple options refer to the same table, only the first filter_clause will be applied.
+ If necessary, use quotes in your shell to provide an argument that contains spaces.
+ E.g. --include-table-data-where=mytable:"created_at >= '2018-01-01' AND test = 'f'"
+ </para>
+ <para>
+ To exclude data for all tables in the database, see <option>--schema-only</option>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>--inserts</option></term>
<listitem>
<para>
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index d6ceb72..e2cfea1 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -119,6 +119,8 @@ static SimpleOidList schema_exclude_oids = {NULL, NULL};
static SimpleStringList table_include_patterns = {NULL, NULL};
static SimpleOidList table_include_oids = {NULL, NULL};
+static SimpleStringList tabledata_include_where_patterns = {NULL, NULL};
+static SimpleOidList tabledata_include_where_oids = {NULL, NULL};
static SimpleStringList table_exclude_patterns = {NULL, NULL};
static SimpleOidList table_exclude_oids = {NULL, NULL};
static SimpleStringList tabledata_exclude_patterns = {NULL, NULL};
@@ -154,7 +156,8 @@ static void expand_schema_name_patterns(Archive *fout,
static void expand_table_name_patterns(Archive *fout,
SimpleStringList *patterns,
SimpleOidList *oids,
- bool strict_names);
+ bool strict_names,
+ bool match_data);
static NamespaceInfo *findNamespace(Archive *fout, Oid nsoid);
static void dumpTableData(Archive *fout, TableDataInfo *tdinfo);
static void refreshMatViewData(Archive *fout, TableDataInfo *tdinfo);
@@ -360,6 +363,7 @@ main(int argc, char **argv)
{"enable-row-security", no_argument, &dopt.enable_row_security, 1},
{"exclude-table-data", required_argument, NULL, 4},
{"if-exists", no_argument, &dopt.if_exists, 1},
+ {"include-table-data-where", required_argument, NULL, 8},
{"inserts", no_argument, &dopt.dump_inserts, 1},
{"lock-wait-timeout", required_argument, NULL, 2},
{"no-tablespaces", no_argument, &dopt.outputNoTablespaces, 1},
@@ -562,6 +566,10 @@ main(int argc, char **argv)
dosync = false;
break;
+ case 8: /* include table(s) data WHERE clause */
+ simple_string_list_append(&tabledata_include_where_patterns, optarg);
+ break;
+
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit_nicely(1);
@@ -761,17 +769,26 @@ main(int argc, char **argv)
{
expand_table_name_patterns(fout, &table_include_patterns,
&table_include_oids,
- strict_names);
+ strict_names, false);
if (table_include_oids.head == NULL)
exit_horribly(NULL, "no matching tables were found\n");
}
+ if (tabledata_include_where_patterns.head != NULL)
+ {
+ expand_table_name_patterns(fout, &tabledata_include_where_patterns,
+ &tabledata_include_where_oids,
+ true, /* Always use strict names for WHERE pattern */
+ true); /* Match extra data after ':' character in each argument */
+ if (tabledata_include_where_oids.head == NULL)
+ exit_horribly(NULL, "no matching tables were found for WHERE clause\n");
+ }
expand_table_name_patterns(fout, &table_exclude_patterns,
&table_exclude_oids,
- false);
+ false, false);
expand_table_name_patterns(fout, &tabledata_exclude_patterns,
&tabledata_exclude_oids,
- false);
+ false, false);
/* non-matching exclusion patterns aren't an error */
@@ -980,6 +997,8 @@ help(const char *progname)
" access to)\n"));
printf(_(" --exclude-table-data=TABLE do NOT dump data for the named table(s)\n"));
printf(_(" --if-exists use IF EXISTS when dropping objects\n"));
+ printf(_(" --include-table-data-where=TABLE:WHERE_CLAUSE\n"));
+ printf(_(" only dump selected rows for the given table(s)\n"));
printf(_(" --inserts dump data as INSERT commands, rather than COPY\n"));
printf(_(" --no-comments do not dump comments\n"));
printf(_(" --no-publications do not dump publications\n"));
@@ -1290,11 +1309,13 @@ expand_schema_name_patterns(Archive *fout,
/*
* Find the OIDs of all tables matching the given list of patterns,
* and append them to the given OID list.
+ * If match_data is set, then each pattern is first split on the ':' character,
+ * and the portion after the colon is appended to the SimpleOidList extra data.
*/
static void
expand_table_name_patterns(Archive *fout,
SimpleStringList *patterns, SimpleOidList *oids,
- bool strict_names)
+ bool strict_names, bool match_data)
{
PQExpBuffer query;
PGresult *res;
@@ -1313,6 +1334,17 @@ expand_table_name_patterns(Archive *fout,
for (cell = patterns->head; cell; cell = cell->next)
{
+ /* When match_data is set, split the pattern on the ':' chararcter,
+ * and treat the second-half as extra data to append to the list.
+ */
+ char *extra_data = NULL;
+ if (match_data) {
+ char *colon_char = strchr(cell->val, ':');
+ if (colon_char) {
+ *colon_char = '\0'; /* overwrite the colon, terminating the string before it */
+ extra_data = colon_char+1; /* use remaining portion of the string as extra data */
+ }
+ }
/*
* Query must remain ABSOLUTELY devoid of unqualified names. This
* would be unnecessary given a pg_table_is_visible() variant taking a
@@ -1341,7 +1373,7 @@ expand_table_name_patterns(Archive *fout,
for (i = 0; i < PQntuples(res); i++)
{
- simple_oid_list_append(oids, atooid(PQgetvalue(res, i, 0)));
+ simple_oid_list_append2(oids, atooid(PQgetvalue(res, i, 0)), extra_data);
}
PQclear(res);
@@ -2281,7 +2313,29 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo, bool oids)
tdinfo->dobj.namespace = tbinfo->dobj.namespace;
tdinfo->tdtable = tbinfo;
tdinfo->oids = oids;
- tdinfo->filtercond = NULL; /* might get set later */
+ tdinfo->filtercond = NULL;
+
+ /*
+ * --include-table-data-where=<table_name>:<filter_clause> may be provided for this table.
+ * If provided, filter_clause will be something like "foo < 5".
+ * Prepend "WHERE (" and append ")" to produce "WHERE (foo < 5)" as filtercond.
+ */
+ char *filter_clause = NULL;
+ if (simple_oid_list_member2(&tabledata_include_where_oids,
+ tbinfo->dobj.catId.oid,
+ (void**) &filter_clause))
+ {
+ if (filter_clause) {
+ char *where_clause = pg_malloc(strlen(filter_clause) + 8 + 1);
+ strcpy(where_clause, "WHERE (");
+ strcat(where_clause, filter_clause);
+ strcat(where_clause, ")");
+ tdinfo->filtercond = where_clause;
+ } else {
+ exit_horribly(NULL, "invalid pattern provided for --include-table-data-where\n");
+ }
+ }
+
addObjectDependency(&tdinfo->dobj, tbinfo->dobj.dumpId);
tbinfo->dataObj = tdinfo;
diff --git a/src/fe_utils/simple_list.c b/src/fe_utils/simple_list.c
index ef94b34..726b54f 100644
--- a/src/fe_utils/simple_list.c
+++ b/src/fe_utils/simple_list.c
@@ -25,11 +25,30 @@
void
simple_oid_list_append(SimpleOidList *list, Oid val)
{
+ simple_oid_list_append2(list, val, NULL);
+}
+
+/*
+ * Is OID present in the list?
+ */
+bool
+simple_oid_list_member(SimpleOidList *list, Oid val)
+{
+ return simple_oid_list_member2(list, val, NULL);
+}
+
+/*
+ * Append an OID to the list, along with extra pointer-sized data.
+ */
+void
+simple_oid_list_append2(SimpleOidList *list, Oid val, void *extra_data)
+{
SimpleOidListCell *cell;
cell = (SimpleOidListCell *) pg_malloc(sizeof(SimpleOidListCell));
cell->next = NULL;
cell->val = val;
+ cell->extra_data = extra_data;
if (list->tail)
list->tail->next = cell;
@@ -40,16 +59,20 @@ simple_oid_list_append(SimpleOidList *list, Oid val)
/*
* Is OID present in the list?
+ * Also return extra pointer-sized data by setting extra_data paramter
*/
bool
-simple_oid_list_member(SimpleOidList *list, Oid val)
+simple_oid_list_member2(SimpleOidList *list, Oid val, void **extra_data)
{
SimpleOidListCell *cell;
for (cell = list->head; cell; cell = cell->next)
{
- if (cell->val == val)
+ if (cell->val == val) {
+ if (extra_data)
+ *extra_data = cell->extra_data;
return true;
+ }
}
return false;
}
diff --git a/src/include/fe_utils/simple_list.h b/src/include/fe_utils/simple_list.h
index 9785489..40bc962 100644
--- a/src/include/fe_utils/simple_list.h
+++ b/src/include/fe_utils/simple_list.h
@@ -21,6 +21,7 @@ typedef struct SimpleOidListCell
{
struct SimpleOidListCell *next;
Oid val;
+ void *extra_data;
} SimpleOidListCell;
typedef struct SimpleOidList
@@ -47,6 +48,9 @@ typedef struct SimpleStringList
extern void simple_oid_list_append(SimpleOidList *list, Oid val);
extern bool simple_oid_list_member(SimpleOidList *list, Oid val);
+extern void simple_oid_list_append2(SimpleOidList *list, Oid val, void *extra_data);
+extern bool simple_oid_list_member2(SimpleOidList *list, Oid val, void **extra_data);
+
extern void simple_string_list_append(SimpleStringList *list, const char *val);
extern bool simple_string_list_member(SimpleStringList *list, const char *val);
--
2.10.2
On Wed, May 23, 2018 at 5:18 PM, Carter Thaxton
<carter.thaxton@gmail.com> wrote:
Ah yes, thanks. I did in fact have colors enabled.
I've attached a new patch generated by `git format-patch`. Hopefully that's
correct.
pg_dump.c:2323:2: warning: ISO C90 forbids mixed declarations and code
[-Wdeclaration-after-statement]
char *filter_clause = NULL;
^
You need to declare this variable at the top of its scope. If you're
using GCC or Clang you might consider building with COPT=-Werror so
that any compiler warnings will stop the build from succeeding.
This doesn't build on Windows[1]https://ci.appveyor.com/project/postgresql-cfbot/postgresql/build/1.0.311, probably for the same reason.
/*
* Is OID present in the list?
+ * Also return extra pointer-sized data by setting extra_data paramter
*/
bool
-simple_oid_list_member(SimpleOidList *list, Oid val)
+simple_oid_list_member2(SimpleOidList *list, Oid val, void **extra_data)
I feel like that isn't in the spirit of Lisp "member". It's now a
kind of association list. I wonder if we are really constrained to
use the cave-man facilities in fe_utils anyway. Though I suppose this
list is never going to be super large so maybe the data structure
doesn't matter too much (famous last words).
+ char *where_clause = pg_malloc(strlen(filter_clause) + 8 + 1);
+ strcpy(where_clause, "WHERE (");
+ strcat(where_clause, filter_clause);
+ strcat(where_clause, ")");
pg_dump.c seems to be allowed to use psprintf() which'd be less
fragile than the above code.
+ /* When match_data is set, split the pattern on the ':' chararcter,
typo
+ * Also return extra pointer-sized data by setting extra_data paramter
typo
[1]: https://ci.appveyor.com/project/postgresql-cfbot/postgresql/build/1.0.311
--
Thomas Munro
http://www.enterprisedb.com
pg_dump.c:2323:2: warning: ISO C90 forbids mixed declarations and code
[-Wdeclaration-after-statement]
char *filter_clause = NULL;
^You need to declare this variable at the top of its scope. If you're
using GCC or Clang you might consider building with COPT=-Werror so
that any compiler warnings will stop the build from succeeding.This doesn't build on Windows[1], probably for the same reason.
Done. And thanks for the tip about COPT=-Werror
/* * Is OID present in the list? + * Also return extra pointer-sized data by setting extra_data paramter */ bool -simple_oid_list_member(SimpleOidList *list, Oid val) +simple_oid_list_member2(SimpleOidList *list, Oid val, void **extra_data)I feel like that isn't in the spirit of Lisp "member". It's now a
kind of association list. I wonder if we are really constrained to
use the cave-man facilities in fe_utils anyway. Though I suppose this
list is never going to be super large so maybe the data structure
doesn't matter too much (famous last words).
Yeah, I'm just trying to fit into the surrounding code as much as
possible. If you have a specific recommendation, I'm all ears.
SimpleOidList is only used by pg_dump, so if we want to rename or refactor
this data structure, it won't have much widespread impact.
And you're right that the list is not going to be particularly large.
Consider that it's already a simple linked-list, and not some more complex
hashtable, for the use cases that it already covers in pg_dump. For all of
these uses, it will only be as large as the number of options provided on
the command-line.
+ char *where_clause = pg_malloc(strlen(filter_clause) + 8 + 1); + strcpy(where_clause, "WHERE ("); + strcat(where_clause, filter_clause); + strcat(where_clause, ")");pg_dump.c seems to be allowed to use psprintf() which'd be less
fragile than the above code.
Done. Didn't realize psprintf() was available here.
typo
And fixed typos.
Thanks for the review!
Attachments:
pgdump-include-table-data-where-v3.patchapplication/octet-stream; name=pgdump-include-table-data-where-v3.patchDownload
From 6cff0486e50f11ede3bdfde76c0f0c7414b5b051 Mon Sep 17 00:00:00 2001
From: Carter Thaxton <carter.thaxton@gmail.com>
Date: Sat, 19 May 2018 01:56:51 -0700
Subject: [PATCH] Add --include-table-data-where option to pg_dump
---
doc/src/sgml/ref/pg_dump.sgml | 22 +++++++++++++
src/bin/pg_dump/pg_dump.c | 64 +++++++++++++++++++++++++++++++++-----
src/fe_utils/simple_list.c | 27 ++++++++++++++--
src/include/fe_utils/simple_list.h | 4 +++
4 files changed, 108 insertions(+), 9 deletions(-)
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 50809b4..e02359e 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -771,6 +771,28 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>--include-table-data-where=<replaceable class="parameter">table</replaceable>:<replaceable class="parameter">filter_clause</replaceable></option></term>
+ <listitem>
+ <para>
+ When dumping data for any tables matching the <replaceable
+ class="parameter">table</replaceable> pattern, only include rows
+ that meet the <replaceable
+ class="parameter">filter_clause</replaceable> condition.
+ This option is useful when you want to dump only a subset of a particular table.
+ The table pattern is interpreted according to the same rules as for <option>-t</option>.
+ <option>--include-table-data</option> can be given more than once to
+ provide different filters for multiple tables.
+ Note that if multiple options refer to the same table, only the first filter_clause will be applied.
+ If necessary, use quotes in your shell to provide an argument that contains spaces.
+ E.g. --include-table-data-where=mytable:"created_at >= '2018-01-01' AND test = 'f'"
+ </para>
+ <para>
+ To exclude data for all tables in the database, see <option>--schema-only</option>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>--inserts</option></term>
<listitem>
<para>
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index d6ceb72..f8e62c3 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -119,6 +119,8 @@ static SimpleOidList schema_exclude_oids = {NULL, NULL};
static SimpleStringList table_include_patterns = {NULL, NULL};
static SimpleOidList table_include_oids = {NULL, NULL};
+static SimpleStringList tabledata_include_where_patterns = {NULL, NULL};
+static SimpleOidList tabledata_include_where_oids = {NULL, NULL};
static SimpleStringList table_exclude_patterns = {NULL, NULL};
static SimpleOidList table_exclude_oids = {NULL, NULL};
static SimpleStringList tabledata_exclude_patterns = {NULL, NULL};
@@ -154,7 +156,8 @@ static void expand_schema_name_patterns(Archive *fout,
static void expand_table_name_patterns(Archive *fout,
SimpleStringList *patterns,
SimpleOidList *oids,
- bool strict_names);
+ bool strict_names,
+ bool match_data);
static NamespaceInfo *findNamespace(Archive *fout, Oid nsoid);
static void dumpTableData(Archive *fout, TableDataInfo *tdinfo);
static void refreshMatViewData(Archive *fout, TableDataInfo *tdinfo);
@@ -360,6 +363,7 @@ main(int argc, char **argv)
{"enable-row-security", no_argument, &dopt.enable_row_security, 1},
{"exclude-table-data", required_argument, NULL, 4},
{"if-exists", no_argument, &dopt.if_exists, 1},
+ {"include-table-data-where", required_argument, NULL, 8},
{"inserts", no_argument, &dopt.dump_inserts, 1},
{"lock-wait-timeout", required_argument, NULL, 2},
{"no-tablespaces", no_argument, &dopt.outputNoTablespaces, 1},
@@ -562,6 +566,10 @@ main(int argc, char **argv)
dosync = false;
break;
+ case 8: /* include table(s) data WHERE clause */
+ simple_string_list_append(&tabledata_include_where_patterns, optarg);
+ break;
+
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit_nicely(1);
@@ -761,17 +769,26 @@ main(int argc, char **argv)
{
expand_table_name_patterns(fout, &table_include_patterns,
&table_include_oids,
- strict_names);
+ strict_names, false);
if (table_include_oids.head == NULL)
exit_horribly(NULL, "no matching tables were found\n");
}
+ if (tabledata_include_where_patterns.head != NULL)
+ {
+ expand_table_name_patterns(fout, &tabledata_include_where_patterns,
+ &tabledata_include_where_oids,
+ true, /* Always use strict names for WHERE pattern */
+ true); /* Match extra data after ':' character in each argument */
+ if (tabledata_include_where_oids.head == NULL)
+ exit_horribly(NULL, "no matching tables were found for WHERE clause\n");
+ }
expand_table_name_patterns(fout, &table_exclude_patterns,
&table_exclude_oids,
- false);
+ false, false);
expand_table_name_patterns(fout, &tabledata_exclude_patterns,
&tabledata_exclude_oids,
- false);
+ false, false);
/* non-matching exclusion patterns aren't an error */
@@ -980,6 +997,8 @@ help(const char *progname)
" access to)\n"));
printf(_(" --exclude-table-data=TABLE do NOT dump data for the named table(s)\n"));
printf(_(" --if-exists use IF EXISTS when dropping objects\n"));
+ printf(_(" --include-table-data-where=TABLE:WHERE_CLAUSE\n"));
+ printf(_(" only dump selected rows for the given table(s)\n"));
printf(_(" --inserts dump data as INSERT commands, rather than COPY\n"));
printf(_(" --no-comments do not dump comments\n"));
printf(_(" --no-publications do not dump publications\n"));
@@ -1290,11 +1309,13 @@ expand_schema_name_patterns(Archive *fout,
/*
* Find the OIDs of all tables matching the given list of patterns,
* and append them to the given OID list.
+ * If match_data is set, then each pattern is first split on the ':' character,
+ * and the portion after the colon is appended to the SimpleOidList extra data.
*/
static void
expand_table_name_patterns(Archive *fout,
SimpleStringList *patterns, SimpleOidList *oids,
- bool strict_names)
+ bool strict_names, bool match_data)
{
PQExpBuffer query;
PGresult *res;
@@ -1313,6 +1334,17 @@ expand_table_name_patterns(Archive *fout,
for (cell = patterns->head; cell; cell = cell->next)
{
+ /* When match_data is set, split the pattern on the ':' character,
+ * and treat the second-half as extra data to append to the list.
+ */
+ char *extra_data = NULL;
+ if (match_data) {
+ char *colon_char = strchr(cell->val, ':');
+ if (colon_char) {
+ *colon_char = '\0'; /* overwrite the colon, terminating the string before it */
+ extra_data = colon_char+1; /* use remaining portion of the string as extra data */
+ }
+ }
/*
* Query must remain ABSOLUTELY devoid of unqualified names. This
* would be unnecessary given a pg_table_is_visible() variant taking a
@@ -1341,7 +1373,7 @@ expand_table_name_patterns(Archive *fout,
for (i = 0; i < PQntuples(res); i++)
{
- simple_oid_list_append(oids, atooid(PQgetvalue(res, i, 0)));
+ simple_oid_list_append2(oids, atooid(PQgetvalue(res, i, 0)), extra_data);
}
PQclear(res);
@@ -2232,6 +2264,7 @@ static void
makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo, bool oids)
{
TableDataInfo *tdinfo;
+ char *filter_clause;
/*
* Nothing to do if we already decided to dump the table. This will
@@ -2281,7 +2314,24 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo, bool oids)
tdinfo->dobj.namespace = tbinfo->dobj.namespace;
tdinfo->tdtable = tbinfo;
tdinfo->oids = oids;
- tdinfo->filtercond = NULL; /* might get set later */
+ tdinfo->filtercond = NULL;
+
+ /*
+ * --include-table-data-where=<table_name>:<filter_clause> may be provided for this table.
+ * If provided, filter_clause will be something like "foo < 5", so wrap it in a WHERE clause.
+ */
+ filter_clause = NULL;
+ if (simple_oid_list_member2(&tabledata_include_where_oids,
+ tbinfo->dobj.catId.oid,
+ (void**) &filter_clause))
+ {
+ if (filter_clause) {
+ tdinfo->filtercond = psprintf("WHERE (%s)", filter_clause);
+ } else {
+ exit_horribly(NULL, "invalid pattern provided for --include-table-data-where\n");
+ }
+ }
+
addObjectDependency(&tdinfo->dobj, tbinfo->dobj.dumpId);
tbinfo->dataObj = tdinfo;
diff --git a/src/fe_utils/simple_list.c b/src/fe_utils/simple_list.c
index ef94b34..b0fded2 100644
--- a/src/fe_utils/simple_list.c
+++ b/src/fe_utils/simple_list.c
@@ -25,11 +25,30 @@
void
simple_oid_list_append(SimpleOidList *list, Oid val)
{
+ simple_oid_list_append2(list, val, NULL);
+}
+
+/*
+ * Is OID present in the list?
+ */
+bool
+simple_oid_list_member(SimpleOidList *list, Oid val)
+{
+ return simple_oid_list_member2(list, val, NULL);
+}
+
+/*
+ * Append an OID to the list, along with extra pointer-sized data.
+ */
+void
+simple_oid_list_append2(SimpleOidList *list, Oid val, void *extra_data)
+{
SimpleOidListCell *cell;
cell = (SimpleOidListCell *) pg_malloc(sizeof(SimpleOidListCell));
cell->next = NULL;
cell->val = val;
+ cell->extra_data = extra_data;
if (list->tail)
list->tail->next = cell;
@@ -40,16 +59,20 @@ simple_oid_list_append(SimpleOidList *list, Oid val)
/*
* Is OID present in the list?
+ * Also return extra pointer-sized data by setting extra_data parameter
*/
bool
-simple_oid_list_member(SimpleOidList *list, Oid val)
+simple_oid_list_member2(SimpleOidList *list, Oid val, void **extra_data)
{
SimpleOidListCell *cell;
for (cell = list->head; cell; cell = cell->next)
{
- if (cell->val == val)
+ if (cell->val == val) {
+ if (extra_data)
+ *extra_data = cell->extra_data;
return true;
+ }
}
return false;
}
diff --git a/src/include/fe_utils/simple_list.h b/src/include/fe_utils/simple_list.h
index 9785489..40bc962 100644
--- a/src/include/fe_utils/simple_list.h
+++ b/src/include/fe_utils/simple_list.h
@@ -21,6 +21,7 @@ typedef struct SimpleOidListCell
{
struct SimpleOidListCell *next;
Oid val;
+ void *extra_data;
} SimpleOidListCell;
typedef struct SimpleOidList
@@ -47,6 +48,9 @@ typedef struct SimpleStringList
extern void simple_oid_list_append(SimpleOidList *list, Oid val);
extern bool simple_oid_list_member(SimpleOidList *list, Oid val);
+extern void simple_oid_list_append2(SimpleOidList *list, Oid val, void *extra_data);
+extern bool simple_oid_list_member2(SimpleOidList *list, Oid val, void **extra_data);
+
extern void simple_string_list_append(SimpleStringList *list, const char *val);
extern bool simple_string_list_member(SimpleStringList *list, const char *val);
--
2.10.2
After some consideration, I've created a new patch that addresses even more
of the various concerns.
Most notably, the command-line option is shortened to simply --where, which
is much easier to read and understand,
and matches the earlier proposal [1]https://ci.appveyor.com/project/postgresql-cfbot/ postgresql/build/1.0.311.
bool -simple_oid_list_member(SimpleOidList *list, Oid val) +simple_oid_list_member2(SimpleOidList *list, Oid val, void **extra_data)I feel like that isn't in the spirit of Lisp "member". It's now a
kind of association list.
My new patch has this function named simple_oid_list_find_data, to indicate
that it's working with some extra data,
and the corresponding append is called simple_oid_list_append_data.
Another problem is that your spec does not show
us how you would handle tables like Foo.Bar or "foo:bar" (colon have
to be escaped)?
This is now addressed in the v4 patch. I've added a new function to
string_utils,
called findUnquotedChar, which is used to find the colon character in the
command-line argument,
which separates the table name from the filter clause.
For example, if you have a table called "foo:bar", then you would use the
--where option as follows:
pg_dump --where '"foo:bar":created_at >= '2018-05-01'" dbname
If you have a table with a double-quote in the name (gah!), as well as
colons, like: has"quote:and:colons
then this still works, because such quotes in the name are escaped by
doubling the quote char,
e.g. "has""quote:and:colons", and also works with this patch:
pg_dump --where '"has""quote:and:colons":created_at >= '2018-05-1'" dbname
[1]: https://ci.appveyor.com/project/postgresql-cfbot/ postgresql/build/1.0.311
postgresql/build/1.0.311
Attachments:
pgdump-include-table-data-where-v4.patchapplication/octet-stream; name=pgdump-include-table-data-where-v4.patchDownload
From a7474069dad4616639d41d79edf3d844899c5b21 Mon Sep 17 00:00:00 2001
From: Carter Thaxton <carter.thaxton@gmail.com>
Date: Sat, 19 May 2018 01:56:51 -0700
Subject: [PATCH] Add --where option to pg_dump
---
doc/src/sgml/ref/pg_dump.sgml | 18 ++++++++++
src/bin/pg_dump/pg_dump.c | 65 +++++++++++++++++++++++++++++++++----
src/fe_utils/simple_list.c | 28 ++++++++++++++--
src/fe_utils/string_utils.c | 30 +++++++++++++++++
src/include/fe_utils/simple_list.h | 4 +++
src/include/fe_utils/string_utils.h | 2 ++
6 files changed, 138 insertions(+), 9 deletions(-)
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 50809b4..ed850de 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -1040,6 +1040,24 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>--where=<replaceable class="parameter">table</replaceable>:<replaceable class="parameter">filter_clause</replaceable></option></term>
+ <listitem>
+ <para>
+ When dumping data for any tables matching the <replaceable
+ class="parameter">table</replaceable> pattern, only include rows
+ that meet the <replaceable class="parameter">filter_clause</replaceable> condition.
+ This option is useful when you want to dump only a subset of a particular table.
+ The table pattern is interpreted according to the same rules as for <option>-t</option>.
+ <option>--where</option> can be given more than once to
+ provide different filters for multiple tables.
+ Note that if multiple options refer to the same table, only the first filter_clause will be applied.
+ If necessary, use quotes in your shell to provide an argument that contains spaces.
+ E.g. --where=mytable:"created_at >= '2018-01-01' AND test = 'f'"
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-?</option></term>
<term><option>--help</option></term>
<listitem>
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index d6ceb72..3ea5cdf 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -123,6 +123,8 @@ static SimpleStringList table_exclude_patterns = {NULL, NULL};
static SimpleOidList table_exclude_oids = {NULL, NULL};
static SimpleStringList tabledata_exclude_patterns = {NULL, NULL};
static SimpleOidList tabledata_exclude_oids = {NULL, NULL};
+static SimpleStringList tabledata_where_patterns = {NULL, NULL};
+static SimpleOidList tabledata_where_oids = {NULL, NULL};
char g_opaque_type[10]; /* name for the opaque type */
@@ -154,7 +156,8 @@ static void expand_schema_name_patterns(Archive *fout,
static void expand_table_name_patterns(Archive *fout,
SimpleStringList *patterns,
SimpleOidList *oids,
- bool strict_names);
+ bool strict_names,
+ bool match_data);
static NamespaceInfo *findNamespace(Archive *fout, Oid nsoid);
static void dumpTableData(Archive *fout, TableDataInfo *tdinfo);
static void refreshMatViewData(Archive *fout, TableDataInfo *tdinfo);
@@ -371,6 +374,7 @@ main(int argc, char **argv)
{"snapshot", required_argument, NULL, 6},
{"strict-names", no_argument, &strict_names, 1},
{"use-set-session-authorization", no_argument, &dopt.use_setsessauth, 1},
+ {"where", required_argument, NULL, 8},
{"no-comments", no_argument, &dopt.no_comments, 1},
{"no-publications", no_argument, &dopt.no_publications, 1},
{"no-security-labels", no_argument, &dopt.no_security_labels, 1},
@@ -562,6 +566,10 @@ main(int argc, char **argv)
dosync = false;
break;
+ case 8: /* table(s) data WHERE clause */
+ simple_string_list_append(&tabledata_where_patterns, optarg);
+ break;
+
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit_nicely(1);
@@ -761,17 +769,26 @@ main(int argc, char **argv)
{
expand_table_name_patterns(fout, &table_include_patterns,
&table_include_oids,
- strict_names);
+ strict_names, false);
if (table_include_oids.head == NULL)
exit_horribly(NULL, "no matching tables were found\n");
}
+ if (tabledata_where_patterns.head != NULL)
+ {
+ expand_table_name_patterns(fout, &tabledata_where_patterns,
+ &tabledata_where_oids,
+ true, /* Always use strict names for WHERE pattern */
+ true); /* Match extra data after ':' character in each argument */
+ if (tabledata_where_oids.head == NULL)
+ exit_horribly(NULL, "no matching tables were found for WHERE clause\n");
+ }
expand_table_name_patterns(fout, &table_exclude_patterns,
&table_exclude_oids,
- false);
+ false, false);
expand_table_name_patterns(fout, &tabledata_exclude_patterns,
&tabledata_exclude_oids,
- false);
+ false, false);
/* non-matching exclusion patterns aren't an error */
@@ -998,6 +1015,7 @@ help(const char *progname)
printf(_(" --use-set-session-authorization\n"
" use SET SESSION AUTHORIZATION commands instead of\n"
" ALTER OWNER commands to set ownership\n"));
+ printf(_(" --where=TABLE:WHERE_CLAUSE only dump selected rows for the given table(s)\n"));
printf(_("\nConnection options:\n"));
printf(_(" -d, --dbname=DBNAME database to dump\n"));
@@ -1290,15 +1308,19 @@ expand_schema_name_patterns(Archive *fout,
/*
* Find the OIDs of all tables matching the given list of patterns,
* and append them to the given OID list.
+ * If match_data is set, then each pattern is first split on the ':' character,
+ * and the portion after the colon is appended to the SimpleOidList extra data.
*/
static void
expand_table_name_patterns(Archive *fout,
SimpleStringList *patterns, SimpleOidList *oids,
- bool strict_names)
+ bool strict_names, bool match_data)
{
PQExpBuffer query;
PGresult *res;
SimpleStringListCell *cell;
+ char *extra_data;
+ char *colon_char;
int i;
if (patterns->head == NULL)
@@ -1313,6 +1335,17 @@ expand_table_name_patterns(Archive *fout,
for (cell = patterns->head; cell; cell = cell->next)
{
+ /* When match_data is set, split the pattern on the first unquoted ':' character,
+ * and treat the second-half as extra data to append to the list.
+ */
+ extra_data = NULL;
+ if (match_data) {
+ colon_char = (char*) findUnquotedChar(cell->val, ':');
+ if (colon_char) {
+ *colon_char = '\0'; /* overwrite the colon, terminating the string before it */
+ extra_data = colon_char+1; /* use remaining portion of the string as extra data */
+ }
+ }
/*
* Query must remain ABSOLUTELY devoid of unqualified names. This
* would be unnecessary given a pg_table_is_visible() variant taking a
@@ -1341,7 +1374,7 @@ expand_table_name_patterns(Archive *fout,
for (i = 0; i < PQntuples(res); i++)
{
- simple_oid_list_append(oids, atooid(PQgetvalue(res, i, 0)));
+ simple_oid_list_append_data(oids, atooid(PQgetvalue(res, i, 0)), extra_data);
}
PQclear(res);
@@ -2232,6 +2265,7 @@ static void
makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo, bool oids)
{
TableDataInfo *tdinfo;
+ char *filter_clause;
/*
* Nothing to do if we already decided to dump the table. This will
@@ -2281,7 +2315,24 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo, bool oids)
tdinfo->dobj.namespace = tbinfo->dobj.namespace;
tdinfo->tdtable = tbinfo;
tdinfo->oids = oids;
- tdinfo->filtercond = NULL; /* might get set later */
+ tdinfo->filtercond = NULL;
+
+ /*
+ * --where=<table_name>:<filter_clause> may be provided for this table.
+ * If provided, filter_clause will be something like "foo < 5", so wrap it in a WHERE clause.
+ */
+ filter_clause = NULL;
+ if (simple_oid_list_find_data(&tabledata_where_oids,
+ tbinfo->dobj.catId.oid,
+ (void**) &filter_clause))
+ {
+ if (filter_clause) {
+ tdinfo->filtercond = psprintf("WHERE (%s)", filter_clause);
+ } else {
+ exit_horribly(NULL, "invalid pattern provided for --where\n");
+ }
+ }
+
addObjectDependency(&tdinfo->dobj, tbinfo->dobj.dumpId);
tbinfo->dataObj = tdinfo;
diff --git a/src/fe_utils/simple_list.c b/src/fe_utils/simple_list.c
index ef94b34..cd7812d 100644
--- a/src/fe_utils/simple_list.c
+++ b/src/fe_utils/simple_list.c
@@ -25,11 +25,30 @@
void
simple_oid_list_append(SimpleOidList *list, Oid val)
{
+ simple_oid_list_append_data(list, val, NULL);
+}
+
+/*
+ * Is OID present in the list?
+ */
+bool
+simple_oid_list_member(SimpleOidList *list, Oid val)
+{
+ return simple_oid_list_find_data(list, val, NULL);
+}
+
+/*
+ * Append an OID to the list, along with extra pointer-sized data.
+ */
+void
+simple_oid_list_append_data(SimpleOidList *list, Oid val, void *extra_data)
+{
SimpleOidListCell *cell;
cell = (SimpleOidListCell *) pg_malloc(sizeof(SimpleOidListCell));
cell->next = NULL;
cell->val = val;
+ cell->extra_data = extra_data;
if (list->tail)
list->tail->next = cell;
@@ -40,16 +59,21 @@ simple_oid_list_append(SimpleOidList *list, Oid val)
/*
* Is OID present in the list?
+ * If so, return true, and provide pointer-sized data by setting result of extra_data parameter.
+ * If not, return false.
*/
bool
-simple_oid_list_member(SimpleOidList *list, Oid val)
+simple_oid_list_find_data(SimpleOidList *list, Oid val, void **extra_data)
{
SimpleOidListCell *cell;
for (cell = list->head; cell; cell = cell->next)
{
- if (cell->val == val)
+ if (cell->val == val) {
+ if (extra_data)
+ *extra_data = cell->extra_data;
return true;
+ }
}
return false;
}
diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c
index b47a396..0a71bd9 100644
--- a/src/fe_utils/string_utils.c
+++ b/src/fe_utils/string_utils.c
@@ -1019,3 +1019,33 @@ processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
return added_clause;
#undef WHEREAND
}
+
+/*
+ * findUnquotedChar
+ *
+ * Scan a string and return a pointer to the first character that
+ * matches a given character, which isn't in double-quotes.
+ *
+ * For example, searching for a colon inside of:
+ * 'hello"this:is:in:quotes"but:this:is:not'
+ * would return a pointer to this ^ colon. (the one after 'but')
+ *
+ * Returns NULL if not found.
+ *
+ * str: string to scan.
+ * find_char: character to find.
+ */
+const char *
+findUnquotedChar(const char *str, char find_char)
+{
+ const char *p = str;
+ bool inquotes = false;
+
+ while (*p++) {
+ if (*p == '"')
+ inquotes = !inquotes;
+ else if ((*p == find_char) && !inquotes)
+ return p;
+ }
+ return NULL;
+}
diff --git a/src/include/fe_utils/simple_list.h b/src/include/fe_utils/simple_list.h
index 9785489..95b0a8e 100644
--- a/src/include/fe_utils/simple_list.h
+++ b/src/include/fe_utils/simple_list.h
@@ -21,6 +21,7 @@ typedef struct SimpleOidListCell
{
struct SimpleOidListCell *next;
Oid val;
+ void *extra_data;
} SimpleOidListCell;
typedef struct SimpleOidList
@@ -47,6 +48,9 @@ typedef struct SimpleStringList
extern void simple_oid_list_append(SimpleOidList *list, Oid val);
extern bool simple_oid_list_member(SimpleOidList *list, Oid val);
+extern void simple_oid_list_append_data(SimpleOidList *list, Oid val, void *extra_data);
+extern bool simple_oid_list_find_data(SimpleOidList *list, Oid val, void **extra_data);
+
extern void simple_string_list_append(SimpleStringList *list, const char *val);
extern bool simple_string_list_member(SimpleStringList *list, const char *val);
diff --git a/src/include/fe_utils/string_utils.h b/src/include/fe_utils/string_utils.h
index 9a311e0..ccba60c 100644
--- a/src/include/fe_utils/string_utils.h
+++ b/src/include/fe_utils/string_utils.h
@@ -57,4 +57,6 @@ extern bool processSQLNamePattern(PGconn *conn, PQExpBuffer buf,
const char *schemavar, const char *namevar,
const char *altnamevar, const char *visibilityrule);
+extern const char *findUnquotedChar(const char *str, char find_char);
+
#endif /* STRING_UTILS_H */
--
2.10.2
hey,
i am reviewing this patch
On Thu, May 31, 2018 at 4:49 AM, Carter Thaxton <carter.thaxton@gmail.com>
wrote:
pg_dump --where '"foo:bar":created_at >= '2018-05-01'" dbname
it would be more sqlish if it specified like:
--table=foo --where ="bar created_at >= 2018-05-01"
and i don't like the idea of duplicating the existing --table behavior it
may confuse user
i rather recommend extending it. And when i test it with --table option the
content of dump
file depend on the option specified first.
Regards
Surafel
On Fri, Jun 29, 2018 at 8:09 AM, Surafel Temesgen <surafel3000@gmail.com> wrote:
hey,
i am reviewing this patch
On Thu, May 31, 2018 at 4:49 AM, Carter Thaxton <carter.thaxton@gmail.com>
wrote:pg_dump --where '"foo:bar":created_at >= '2018-05-01'" dbname
it would be more sqlish if it specified like:
--table=foo --where ="bar created_at >= 2018-05-01"
and i don't like the idea of duplicating the existing --table behavior it
may confuse user
i rather recommend extending it. And when i test it with --table option the
content of dump
file depend on the option specified first.
But you can specify multiple tables. You wouldn't want the same WHERE
clause to apply to all of them.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
The whole reason for the colon in the --where option is to indicate which
table the WHERE clause should refer to, so that one can dump less than all
of the rows.
The --table option is totally different. It specifies which tables to dump
at all.
If I provide a --where option, and no --table option, I want the WHERE
clause to apply to the given table, and otherwise dump all tables.
If one supplies a --table option, it won't dump all tables - it will only
dump the one specified. I don't want to have to specify all the tables
with --table, just to use the --where option.
Also, there may be some misunderstanding about "foo:bar" above. That's an
example of using a namespaced table, where "bar" is a table in the
namespace "foo". Normally, assuming your table is named "bar" in the
default namespace, you would just say something like:
pg_dump --where "bar:created_at >= 2018-05-01'"
On Mon, Jul 2, 2018 at 11:27 AM, Robert Haas <robertmhaas@gmail.com> wrote:
Show quoted text
On Fri, Jun 29, 2018 at 8:09 AM, Surafel Temesgen <surafel3000@gmail.com>
wrote:hey,
i am reviewing this patch
On Thu, May 31, 2018 at 4:49 AM, Carter Thaxton <carter.thaxton@gmail.com>
wrote:
pg_dump --where '"foo:bar":created_at >= '2018-05-01'" dbname
it would be more sqlish if it specified like:
--table=foo --where ="bar created_at >= 2018-05-01"
and i don't like the idea of duplicating the existing --table behavior it
may confuse user
i rather recommend extending it. And when i test it with --table optionthe
content of dump
file depend on the option specified first.But you can specify multiple tables. You wouldn't want the same WHERE
clause to apply to all of them.--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
On Mon, Jul 02, 2018 at 03:11:46PM -0700, Carter Thaxton wrote:
Also, there may be some misunderstanding about "foo:bar" above. That's an
example of using a namespaced table, where "bar" is a table in the
namespace "foo". Normally, assuming your table is named "bar" in the
default namespace, you would just say something like:pg_dump --where "bar:created_at >= 2018-05-01'"
I am wondering how this works at parsing if the table name, or one of
the columns includes a colon character :)
--
Michael
pg_dump --where "bar:created_at >= 2018-05-01'"
I am wondering how this works at parsing if the table name, or one of
the columns includes a colon character :)
The proposed patch will handle quoted identifiers. E.g. the following will
work just fine:
pg_dump --where 'table:"column:with:colons" = 5'
Note the use of single quotes in the shell, and then double quotes in the
WHERE clause. There are also many other options for quoting in the shell,
of course.
Please don't top-post on the PostgreSQL lists. See <
http://idallen.com/topposting.html>
Sorry. Thanks for the reminder.
On Mon, Jul 2, 2018 at 9:27 PM, Robert Haas <robertmhaas@gmail.com> wrote:
But you can specify multiple tables. You wouldn't want the same WHERE
clause to apply to all of them.also with this new --where option you can specify multiple table using
wildcard and it
try to apply the same where clause to each table. may be its a desirable
feature
because such kind of table can be structurally similar too.
regards
Surafel
On Tue, Jul 3, 2018 at 6:31 AM, Surafel Temesgen <surafel3000@gmail.com> wrote:
On Mon, Jul 2, 2018 at 9:27 PM, Robert Haas <robertmhaas@gmail.com> wrote:
But you can specify multiple tables. You wouldn't want the same WHERE
clause to apply to all of them.also with this new --where option you can specify multiple table using
wildcard and it
try to apply the same where clause to each table. may be its a desirable
feature
because such kind of table can be structurally similar too.
I don't think that's likely to be very useful. I think Carter Thaxton
has the right idea, although using foo:bar to mean foo.bar doesn't
seem like a great plan.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
On Tue, Jul 3, 2018 at 1:11 AM, Carter Thaxton <carter.thaxton@gmail.com>
wrote:
The whole reason for the colon in the --where option is to indicate which
table the WHERE clause should refer to, so that one can dump less than all
of the rows.
The --table option is totally different. It specifies which tables to
dump at all.
Thank you for explaining,
I just have one comment . I found the error message generated on incorrect
where clause specification strange for pg_dump. I think query result status
check needed to handle it and generate more friendly error message.
regards
Surafel
On Mon, May 21, 2018 at 6:34 AM Carter Thaxton <carter.thaxton@gmail.com>
wrote:
Many times I've wanted to export a subset of a database, using some sort
of row filter condition on some of the large tables. E.g. copying a
production database to a staging environment, but with some time series
data only from the past month.We have the existing options:
--include-table=table (and its -t synonym)
--exclude-table=table
--exclude-table-data=tableI propose a new option:
--include-table-data-where=table:filter_clauseOne would use this option as follows:
pg_dump --include-table-data-where=largetable:"created_at >=
'2018-05-01'" database_nameThe filter_clause is used as the contents of a WHERE clause when querying
the data to generate the COPY statement produced by pg_dump.I've prepared a proposed patch for this, which is attached. The code
changes are rather straightforward. I did have to add the ability to carry
around an extra pointer-sized object to the simple_list implementation, in
order to allow the filter clause to be associated to the matching oids of
the table pattern. It seemed the best way to augment the existing
simple_list implementation, but change as little as possible elsewhere in
the codebase. (Note that SimpleOidList is actually only used by pg_dump).Feel free to review and propose any amendments.
Why not simply use \copy (select * from largetable where created_at >=
'2018-05-01') to stdout? That is what I’ve always done when I need
something like this and have not found it particularly bothersome but
rather quite powerful. And here you have tons of flexibility because you
can do joins and whatever else.
FWIW. Thanks,
Jeremy
On Thu, Sep 6, 2018 at 8:40 AM, Jeremy Finzel <finzelj@gmail.com> wrote:
Why not simply use \copy (select * from largetable where created_at >=
'2018-05-01') to stdout? That is what I’ve always done when I need
something like this and have not found it particularly bothersome but
rather quite powerful. And here you have tons of flexibility because you
can do joins and whatever else.
Just skimming the thread but I'd have to say being able to leverage
pg_dump's dependency resolution is a major reason for adding features to it
instead sticking to writing psql scripts. This feature in a multi-tenant
situation is something with, I suspect, reasonably wide appeal.
David J.
Just skimming the thread but I'd have to say being able to leverage
pg_dump's dependency resolution is a major reason for adding features to it
instead sticking to writing psql scripts. This feature in a multi-tenant
situation is something with, I suspect, reasonably wide appeal.
That I would agree with if in fact it's true people want that, but that
wasn't how the problem trying to be solved was communicated. From what I
read in the initial examples given, just using psql is more than sufficient
in those cases.
I will grant that copying the structure and data at the same time would be
much easier, however. Because using psql, you need pg_dump to create your
structure then a separate psql script to copy the data.
But again with --data-only examples given, it's so easy to do that with
psql copy I just don't understand the value of the feature unless you
really are saying you require the dependency resolution.
I agree with some of the hesitation of complicating the syntax and allowing
too much customization for what pg_dump is designed for. Really, if you
need more customization, copy gives you that. So I don't personally
consider it a missing feature because both tools have different uses and I
haven't found that any of this disrupts my workflow. FWIW...
Thanks,
Jeremy
Hi,
Unfortunately, current version of this patch has some conflicts, could you
rebase it?
Hi,
On 2018-11-29 13:50:21 +0100, Dmitry Dolgov wrote:
Unfortunately, current version of this patch has some conflicts, could you
rebase it?
Given this patch has not been updated, I'm marking this as returned with feedback.
Greetings,
Andres Freund
Unfortunately, current version of this patch has some conflicts, could
you
rebase it?
Given this patch has not been updated, I'm marking this as returned with
feedback.
Here's a rebased patch that works with the latest master branch. Very
straightforward.
Attachments:
pgdump-include-table-data-where-v5.patchapplication/octet-stream; name=pgdump-include-table-data-where-v5.patchDownload
From 906449afa5b486053a04b311e47d17ceafb6e331 Mon Sep 17 00:00:00 2001
From: Carter Thaxton <carter.thaxton@gmail.com>
Date: Sat, 19 May 2018 01:56:51 -0700
Subject: [PATCH] Add --where option to pg_dump
---
doc/src/sgml/ref/pg_dump.sgml | 18 +++++++++
src/bin/pg_dump/pg_dump.c | 63 ++++++++++++++++++++++++++---
src/fe_utils/simple_list.c | 28 ++++++++++++-
src/fe_utils/string_utils.c | 30 ++++++++++++++
src/include/fe_utils/simple_list.h | 4 ++
src/include/fe_utils/string_utils.h | 2 +
6 files changed, 137 insertions(+), 8 deletions(-)
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 9e0bb93f08..e82ebbb076 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -1050,6 +1050,24 @@ PostgreSQL documentation
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><option>--where=<replaceable class="parameter">table</replaceable>:<replaceable class="parameter">filter_clause</replaceable></option></term>
+ <listitem>
+ <para>
+ When dumping data for any tables matching the <replaceable
+ class="parameter">table</replaceable> pattern, only include rows
+ that meet the <replaceable class="parameter">filter_clause</replaceable> condition.
+ This option is useful when you want to dump only a subset of a particular table.
+ The table pattern is interpreted according to the same rules as for <option>-t</option>.
+ <option>--where</option> can be given more than once to
+ provide different filters for multiple tables.
+ Note that if multiple options refer to the same table, only the first filter_clause will be applied.
+ If necessary, use quotes in your shell to provide an argument that contains spaces.
+ E.g. --where=mytable:"created_at >= '2018-01-01' AND test = 'f'"
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><option>-?</option></term>
<term><option>--help</option></term>
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 2b1a94733b..aa1951ade0 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -124,6 +124,8 @@ static SimpleStringList table_exclude_patterns = {NULL, NULL};
static SimpleOidList table_exclude_oids = {NULL, NULL};
static SimpleStringList tabledata_exclude_patterns = {NULL, NULL};
static SimpleOidList tabledata_exclude_oids = {NULL, NULL};
+static SimpleStringList tabledata_where_patterns = {NULL, NULL};
+static SimpleOidList tabledata_where_oids = {NULL, NULL};
char g_opaque_type[10]; /* name for the opaque type */
@@ -153,7 +155,8 @@ static void expand_schema_name_patterns(Archive *fout,
static void expand_table_name_patterns(Archive *fout,
SimpleStringList *patterns,
SimpleOidList *oids,
- bool strict_names);
+ bool strict_names,
+ bool match_data);
static NamespaceInfo *findNamespace(Archive *fout, Oid nsoid);
static void dumpTableData(Archive *fout, TableDataInfo *tdinfo);
static void refreshMatViewData(Archive *fout, TableDataInfo *tdinfo);
@@ -369,6 +372,7 @@ main(int argc, char **argv)
{"snapshot", required_argument, NULL, 6},
{"strict-names", no_argument, &strict_names, 1},
{"use-set-session-authorization", no_argument, &dopt.use_setsessauth, 1},
+ {"where", required_argument, NULL, 8},
{"no-comments", no_argument, &dopt.no_comments, 1},
{"no-publications", no_argument, &dopt.no_publications, 1},
{"no-security-labels", no_argument, &dopt.no_security_labels, 1},
@@ -557,6 +561,10 @@ main(int argc, char **argv)
dosync = false;
break;
+ case 8: /* table(s) data WHERE clause */
+ simple_string_list_append(&tabledata_where_patterns, optarg);
+ break;
+
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit_nicely(1);
@@ -752,17 +760,26 @@ main(int argc, char **argv)
{
expand_table_name_patterns(fout, &table_include_patterns,
&table_include_oids,
- strict_names);
+ strict_names, false);
if (table_include_oids.head == NULL)
exit_horribly(NULL, "no matching tables were found\n");
}
+ if (tabledata_where_patterns.head != NULL)
+ {
+ expand_table_name_patterns(fout, &tabledata_where_patterns,
+ &tabledata_where_oids,
+ true, /* Always use strict names for WHERE pattern */
+ true); /* Match extra data after ':' character in each argument */
+ if (tabledata_where_oids.head == NULL)
+ exit_horribly(NULL, "no matching tables were found for WHERE clause\n");
+ }
expand_table_name_patterns(fout, &table_exclude_patterns,
&table_exclude_oids,
- false);
+ false, false);
expand_table_name_patterns(fout, &tabledata_exclude_patterns,
&tabledata_exclude_oids,
- false);
+ false, false);
/* non-matching exclusion patterns aren't an error */
@@ -985,6 +1002,7 @@ help(const char *progname)
printf(_(" --use-set-session-authorization\n"
" use SET SESSION AUTHORIZATION commands instead of\n"
" ALTER OWNER commands to set ownership\n"));
+ printf(_(" --where=TABLE:WHERE_CLAUSE only dump selected rows for the given table(s)\n"));
printf(_("\nConnection options:\n"));
printf(_(" -d, --dbname=DBNAME database to dump\n"));
@@ -1277,15 +1295,19 @@ expand_schema_name_patterns(Archive *fout,
/*
* Find the OIDs of all tables matching the given list of patterns,
* and append them to the given OID list.
+ * If match_data is set, then each pattern is first split on the ':' character,
+ * and the portion after the colon is appended to the SimpleOidList extra data.
*/
static void
expand_table_name_patterns(Archive *fout,
SimpleStringList *patterns, SimpleOidList *oids,
- bool strict_names)
+ bool strict_names, bool match_data)
{
PQExpBuffer query;
PGresult *res;
SimpleStringListCell *cell;
+ char *extra_data;
+ char *colon_char;
int i;
if (patterns->head == NULL)
@@ -1300,6 +1322,17 @@ expand_table_name_patterns(Archive *fout,
for (cell = patterns->head; cell; cell = cell->next)
{
+ /* When match_data is set, split the pattern on the first unquoted ':' character,
+ * and treat the second-half as extra data to append to the list.
+ */
+ extra_data = NULL;
+ if (match_data) {
+ colon_char = (char*) findUnquotedChar(cell->val, ':');
+ if (colon_char) {
+ *colon_char = '\0'; /* overwrite the colon, terminating the string before it */
+ extra_data = colon_char+1; /* use remaining portion of the string as extra data */
+ }
+ }
/*
* Query must remain ABSOLUTELY devoid of unqualified names. This
* would be unnecessary given a pg_table_is_visible() variant taking a
@@ -1328,7 +1361,7 @@ expand_table_name_patterns(Archive *fout,
for (i = 0; i < PQntuples(res); i++)
{
- simple_oid_list_append(oids, atooid(PQgetvalue(res, i, 0)));
+ simple_oid_list_append_data(oids, atooid(PQgetvalue(res, i, 0)), extra_data);
}
PQclear(res);
@@ -2228,6 +2261,7 @@ static void
makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo)
{
TableDataInfo *tdinfo;
+ char *filter_clause;
/*
* Nothing to do if we already decided to dump the table. This will
@@ -2277,6 +2311,23 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo)
tdinfo->dobj.namespace = tbinfo->dobj.namespace;
tdinfo->tdtable = tbinfo;
tdinfo->filtercond = NULL; /* might get set later */
+
+ /*
+ * --where=<table_name>:<filter_clause> may be provided for this table.
+ * If provided, filter_clause will be something like "foo < 5", so wrap it in a WHERE clause.
+ */
+ filter_clause = NULL;
+ if (simple_oid_list_find_data(&tabledata_where_oids,
+ tbinfo->dobj.catId.oid,
+ (void**) &filter_clause))
+ {
+ if (filter_clause) {
+ tdinfo->filtercond = psprintf("WHERE (%s)", filter_clause);
+ } else {
+ exit_horribly(NULL, "invalid pattern provided for --where\n");
+ }
+ }
+
addObjectDependency(&tdinfo->dobj, tbinfo->dobj.dumpId);
tbinfo->dataObj = tdinfo;
diff --git a/src/fe_utils/simple_list.c b/src/fe_utils/simple_list.c
index 8d605140a7..49717403e2 100644
--- a/src/fe_utils/simple_list.c
+++ b/src/fe_utils/simple_list.c
@@ -24,12 +24,31 @@
*/
void
simple_oid_list_append(SimpleOidList *list, Oid val)
+{
+ simple_oid_list_append_data(list, val, NULL);
+}
+
+/*
+ * Is OID present in the list?
+ */
+bool
+simple_oid_list_member(SimpleOidList *list, Oid val)
+{
+ return simple_oid_list_find_data(list, val, NULL);
+}
+
+/*
+ * Append an OID to the list, along with extra pointer-sized data.
+ */
+void
+simple_oid_list_append_data(SimpleOidList *list, Oid val, void *extra_data)
{
SimpleOidListCell *cell;
cell = (SimpleOidListCell *) pg_malloc(sizeof(SimpleOidListCell));
cell->next = NULL;
cell->val = val;
+ cell->extra_data = extra_data;
if (list->tail)
list->tail->next = cell;
@@ -40,16 +59,21 @@ simple_oid_list_append(SimpleOidList *list, Oid val)
/*
* Is OID present in the list?
+ * If so, return true, and provide pointer-sized data by setting result of extra_data parameter.
+ * If not, return false.
*/
bool
-simple_oid_list_member(SimpleOidList *list, Oid val)
+simple_oid_list_find_data(SimpleOidList *list, Oid val, void **extra_data)
{
SimpleOidListCell *cell;
for (cell = list->head; cell; cell = cell->next)
{
- if (cell->val == val)
+ if (cell->val == val) {
+ if (extra_data)
+ *extra_data = cell->extra_data;
return true;
+ }
}
return false;
}
diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c
index 5c1732aabe..a31aec53a9 100644
--- a/src/fe_utils/string_utils.c
+++ b/src/fe_utils/string_utils.c
@@ -1016,3 +1016,33 @@ processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
return added_clause;
#undef WHEREAND
}
+
+/*
+ * findUnquotedChar
+ *
+ * Scan a string and return a pointer to the first character that
+ * matches a given character, which isn't in double-quotes.
+ *
+ * For example, searching for a colon inside of:
+ * 'hello"this:is:in:quotes"but:this:is:not'
+ * would return a pointer to this ^ colon. (the one after 'but')
+ *
+ * Returns NULL if not found.
+ *
+ * str: string to scan.
+ * find_char: character to find.
+ */
+const char *
+findUnquotedChar(const char *str, char find_char)
+{
+ const char *p = str;
+ bool inquotes = false;
+
+ while (*p++) {
+ if (*p == '"')
+ inquotes = !inquotes;
+ else if ((*p == find_char) && !inquotes)
+ return p;
+ }
+ return NULL;
+}
diff --git a/src/include/fe_utils/simple_list.h b/src/include/fe_utils/simple_list.h
index 8a95cbb3a8..8616e412d2 100644
--- a/src/include/fe_utils/simple_list.h
+++ b/src/include/fe_utils/simple_list.h
@@ -21,6 +21,7 @@ typedef struct SimpleOidListCell
{
struct SimpleOidListCell *next;
Oid val;
+ void *extra_data;
} SimpleOidListCell;
typedef struct SimpleOidList
@@ -47,6 +48,9 @@ typedef struct SimpleStringList
extern void simple_oid_list_append(SimpleOidList *list, Oid val);
extern bool simple_oid_list_member(SimpleOidList *list, Oid val);
+extern void simple_oid_list_append_data(SimpleOidList *list, Oid val, void *extra_data);
+extern bool simple_oid_list_find_data(SimpleOidList *list, Oid val, void **extra_data);
+
extern void simple_string_list_append(SimpleStringList *list, const char *val);
extern bool simple_string_list_member(SimpleStringList *list, const char *val);
diff --git a/src/include/fe_utils/string_utils.h b/src/include/fe_utils/string_utils.h
index 31c8d7d7a3..8e07bdb571 100644
--- a/src/include/fe_utils/string_utils.h
+++ b/src/include/fe_utils/string_utils.h
@@ -56,4 +56,6 @@ extern bool processSQLNamePattern(PGconn *conn, PQExpBuffer buf,
const char *schemavar, const char *namevar,
const char *altnamevar, const char *visibilityrule);
+extern const char *findUnquotedChar(const char *str, char find_char);
+
#endif /* STRING_UTILS_H */
--
2.20.1
On Thu, Jan 31, 2019 at 10:22 PM Carter Thaxton <carter.thaxton@gmail.com>
wrote:
Here's a rebased patch that works with the latest master branch. Very
straightforward.
You forget to resubmit it to the next commitfest and the error message on
incorrect
where clause specification is still the same
regards
Surafel