pg_dump --exclude-table-data

Started by Andrew Dunstanover 14 years ago15 messages
#1Andrew Dunstan
andrew@dunslane.net
2 attachment(s)

Attached is an undocumented patch that allows a user to have pg_dump
exclude data but not DDL for a table. One use case for this is a very
large table that changes infrequently, and for which dumping data
frequently would be wasteful and unnecessary. This is especially useful
in conjunction with another patch (see next email) to do post-data items
only or omit post-data items in pg_restore.

For those who are (like my clients :-) ) anxious to get their hands on
this immediately, a backport patch is also attached which applies to 9.0
sources, and applies with offsets to 8.4 sources.

cheers

andrew

Attachments:

exclude-table-data.patch.90text/plain; name=exclude-table-data.patch.90Download
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 8721e65..aad6b00 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -104,6 +104,8 @@ static SimpleStringList table_include_patterns = {NULL, NULL};
 static SimpleOidList table_include_oids = {NULL, NULL};
 static SimpleStringList table_exclude_patterns = {NULL, NULL};
 static SimpleOidList table_exclude_oids = {NULL, NULL};
+static SimpleStringList tabledata_exclude_patterns = {NULL, NULL};
+static SimpleOidList tabledata_exclude_oids = {NULL, NULL};
 
 /* default, if no "inclusion" switches appear, is to dump everything */
 static bool include_everything = true;
@@ -261,6 +263,7 @@ main(int argc, char **argv)
 		{"blobs", no_argument, NULL, 'b'},
 		{"clean", no_argument, NULL, 'c'},
 		{"create", no_argument, NULL, 'C'},
+		{"exclude-table-data", required_argument, NULL, 'D'},
 		{"file", required_argument, NULL, 'f'},
 		{"format", required_argument, NULL, 'F'},
 		{"host", required_argument, NULL, 'h'},
@@ -334,7 +337,7 @@ main(int argc, char **argv)
 		}
 	}
 
-	while ((c = getopt_long(argc, argv, "abcCE:f:F:h:in:N:oOp:RsS:t:T:U:vwWxX:Z:",
+	while ((c = getopt_long(argc, argv, "abcCD:E:f:F:h:in:N:oOp:RsS:t:T:U:vwWxX:Z:",
 							long_options, &optindex)) != -1)
 	{
 		switch (c)
@@ -355,6 +358,10 @@ main(int argc, char **argv)
 				outputCreateDB = 1;
 				break;
 
+			case 'D':			/* exclude table(s) data */
+				simple_string_list_append(&tabledata_exclude_patterns, optarg);
+				break;
+
 			case 'E':			/* Dump encoding */
 				dumpencoding = optarg;
 				break;
@@ -689,6 +696,10 @@ main(int argc, char **argv)
 	}
 	expand_table_name_patterns(&table_exclude_patterns,
 							   &table_exclude_oids);
+
+	expand_table_name_patterns(&tabledata_exclude_patterns,
+							   &tabledata_exclude_oids);
+
 	/* non-matching exclusion patterns aren't an error */
 
 	/*
@@ -813,6 +824,8 @@ help(const char *progname)
 	printf(_("  -b, --blobs                 include large objects in dump\n"));
 	printf(_("  -c, --clean                 clean (drop) database objects before recreating\n"));
 	printf(_("  -C, --create                include commands to create database in dump\n"));
+	printf(_("  -D, --exclude-table-data=TABLE\n"
+			 "                              do NOT dump data for the named table(s)\n"));
 	printf(_("  -E, --encoding=ENCODING     dump the data in encoding ENCODING\n"));
 	printf(_("  -n, --schema=SCHEMA         dump the named schema(s) only\n"));
 	printf(_("  -N, --exclude-schema=SCHEMA do NOT dump the named schema(s)\n"));
@@ -1012,6 +1025,15 @@ selectDumpableTable(TableInfo *tbinfo)
 		simple_oid_list_member(&table_exclude_oids,
 							   tbinfo->dobj.catId.oid))
 		tbinfo->dobj.dump = false;
+
+	/* If table is to be dumped, check that the data is not excluded */
+	if (tbinfo->dobj.dump && !
+		simple_oid_list_member(&tabledata_exclude_oids,
+							   tbinfo->dobj.catId.oid))
+		tbinfo->dobj.dumpdata = true;
+	else
+		tbinfo->dobj.dumpdata = false;
+		
 }
 
 /*
@@ -1391,6 +1413,10 @@ dumpTableData(Archive *fout, TableDataInfo *tdinfo)
 	DataDumperPtr dumpFn;
 	char	   *copyStmt;
 
+	/* don't do anything if the data isn't wanted */
+	if (!tbinfo->dobj.dumpdata)
+		return;
+
 	if (!dump_inserts)
 	{
 		/* Dump/restore using COPY */
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 1dc7157..b4f5716 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -127,6 +127,7 @@ typedef struct _dumpableObject
 	char	   *name;			/* object name (should never be NULL) */
 	struct _namespaceInfo *namespace;	/* containing namespace, or NULL */
 	bool		dump;			/* true if we want to dump this object */
+	bool        dumpdata;       /* true if we want data for this object */
 	DumpId	   *dependencies;	/* dumpIds of objects this one depends on */
 	int			nDeps;			/* number of valid dependencies */
 	int			allocDeps;		/* allocated size of dependencies[] */
exclude-table-data.patchtext/x-patch; name=exclude-table-data.patchDownload
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index cf0fc4b..f6cd7eb 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -114,6 +114,8 @@ static SimpleStringList table_include_patterns = {NULL, NULL};
 static SimpleOidList table_include_oids = {NULL, NULL};
 static SimpleStringList table_exclude_patterns = {NULL, NULL};
 static SimpleOidList table_exclude_oids = {NULL, NULL};
+static SimpleStringList tabledata_exclude_patterns = {NULL, NULL};
+static SimpleOidList tabledata_exclude_oids = {NULL, NULL};
 
 /* default, if no "inclusion" switches appear, is to dump everything */
 static bool include_everything = true;
@@ -289,6 +291,7 @@ main(int argc, char **argv)
 		{"blobs", no_argument, NULL, 'b'},
 		{"clean", no_argument, NULL, 'c'},
 		{"create", no_argument, NULL, 'C'},
+		{"exclude-table-data", required_argument, NULL, 'D'},
 		{"file", required_argument, NULL, 'f'},
 		{"format", required_argument, NULL, 'F'},
 		{"host", required_argument, NULL, 'h'},
@@ -366,7 +369,7 @@ main(int argc, char **argv)
 		}
 	}
 
-	while ((c = getopt_long(argc, argv, "abcCE:f:F:h:in:N:oOp:RsS:t:T:U:vwWxZ:",
+	while ((c = getopt_long(argc, argv, "abcCD:E:f:F:h:in:N:oOp:RsS:t:T:U:vwWxZ:",
 							long_options, &optindex)) != -1)
 	{
 		switch (c)
@@ -387,6 +390,10 @@ main(int argc, char **argv)
 				outputCreateDB = 1;
 				break;
 
+			case 'D':			/* exclude table(s) data */
+				simple_string_list_append(&tabledata_exclude_patterns, optarg);
+				break;
+
 			case 'E':			/* Dump encoding */
 				dumpencoding = optarg;
 				break;
@@ -718,6 +725,10 @@ main(int argc, char **argv)
 	}
 	expand_table_name_patterns(&table_exclude_patterns,
 							   &table_exclude_oids);
+
+	expand_table_name_patterns(&tabledata_exclude_patterns,
+							   &tabledata_exclude_oids);
+
 	/* non-matching exclusion patterns aren't an error */
 
 	/*
@@ -842,6 +853,8 @@ help(const char *progname)
 	printf(_("  -b, --blobs                 include large objects in dump\n"));
 	printf(_("  -c, --clean                 clean (drop) database objects before recreating\n"));
 	printf(_("  -C, --create                include commands to create database in dump\n"));
+	printf(_("  -D, --exclude-table-data=TABLE\n"
+			 "                              do NOT dump data for the named table(s)\n"));
 	printf(_("  -E, --encoding=ENCODING     dump the data in encoding ENCODING\n"));
 	printf(_("  -n, --schema=SCHEMA         dump the named schema(s) only\n"));
 	printf(_("  -N, --exclude-schema=SCHEMA do NOT dump the named schema(s)\n"));
@@ -1090,6 +1103,15 @@ selectDumpableTable(TableInfo *tbinfo)
 		simple_oid_list_member(&table_exclude_oids,
 							   tbinfo->dobj.catId.oid))
 		tbinfo->dobj.dump = false;
+
+	/* If table is to be dumped, check that the data is not excluded */
+	if (tbinfo->dobj.dump && !
+		simple_oid_list_member(&tabledata_exclude_oids,
+							   tbinfo->dobj.catId.oid))
+		tbinfo->dobj.dumpdata = true;
+	else
+		tbinfo->dobj.dumpdata = false;
+		
 }
 
 /*
@@ -1507,6 +1529,10 @@ dumpTableData(Archive *fout, TableDataInfo *tdinfo)
 	DataDumperPtr dumpFn;
 	char	   *copyStmt;
 
+	/* don't do anything if the data isn't wanted */
+	if (!tbinfo->dobj.dumpdata)
+		return;
+
 	if (!dump_inserts)
 	{
 		/* Dump/restore using COPY */
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 3d5d534..d6b5dd4 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -129,6 +129,7 @@ typedef struct _dumpableObject
 	char	   *name;			/* object name (should never be NULL) */
 	struct _namespaceInfo *namespace;	/* containing namespace, or NULL */
 	bool		dump;			/* true if we want to dump this object */
+	bool        dumpdata;       /* true if we want data for this object */
 	bool		ext_member;		/* true if object is member of extension */
 	DumpId	   *dependencies;	/* dumpIds of objects this one depends on */
 	int			nDeps;			/* number of valid dependencies */
#2Josh Berkus
josh@agliodbs.com
In reply to: Andrew Dunstan (#1)
Re: pg_dump --exclude-table-data

For those who are (like my clients :-) ) anxious to get their hands on
this immediately, a backport patch is also attached which applies to 9.0
sources, and applies with offsets to 8.4 sources.

FWIW, I have immediate use for this in creating cut-down versions of
databases for testing purposes. It'll eliminate a couple pages of shell
scripts for me.

--
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com

#3bricklen
bricklen@gmail.com
In reply to: Josh Berkus (#2)
Re: pg_dump --exclude-table-data

On Wed, Aug 24, 2011 at 2:01 PM, Josh Berkus <josh@agliodbs.com> wrote:

FWIW, I have immediate use for this in creating cut-down versions of
databases for testing purposes.  It'll eliminate a couple pages of shell
scripts for me.

Speaking of "cut-down versions", I have recently been using pg_sample,
and been happy with the resulting subset database. I created a db <10
GB in size from a source db ~600Gb in a few minutes.

https://github.com/mla/pg_sample

#4Andrew Dunstan
andrew@dunslane.net
In reply to: Josh Berkus (#2)
Re: pg_dump --exclude-table-data

On 08/24/2011 05:01 PM, Josh Berkus wrote:

For those who are (like my clients :-) ) anxious to get their hands on
this immediately, a backport patch is also attached which applies to 9.0
sources, and applies with offsets to 8.4 sources.

FWIW, I have immediate use for this in creating cut-down versions of
databases for testing purposes. It'll eliminate a couple pages of shell
scripts for me.

OK, this seems to have some pluses and no negative comments, so it seems
worth going forward. Do we want an equivalent pg_restore option?

cheers

andrew

#5Josh Berkus
josh@agliodbs.com
In reply to: Andrew Dunstan (#4)
Re: pg_dump --exclude-table-data

OK, this seems to have some pluses and no negative comments, so it seems
worth going forward. Do we want an equivalent pg_restore option?

I'm not sure it's *as* important for pg_restore, since I can easily use
a manifest to avoid restoring data for a single table. So I guess it's
a question of "how hard is it to add it?"

--
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com

#6Andrew Dunstan
andrew@dunslane.net
In reply to: Josh Berkus (#5)
1 attachment(s)
Re: pg_dump --exclude-table-data

On 09/02/2011 03:15 PM, Josh Berkus wrote:

OK, this seems to have some pluses and no negative comments, so it seems
worth going forward. Do we want an equivalent pg_restore option?

I'm not sure it's *as* important for pg_restore, since I can easily use
a manifest to avoid restoring data for a single table. So I guess it's
a question of "how hard is it to add it?"

The short answer is "more work than I want to put in to this."
pg_restore doesn't have any of pg_dump's infrastructure for handling
table name patterns, nor for excluding tables. So I think all that would
remain a TODO. (A good beginner project, maybe).

A slightly updated patch is attached, the main change being that I
removed use of a short option and only support the long name option.
"-D" didn't seem sufficiently mnemonic to me. I'll add this to the
November commitfest, but I'd like to get it committed ASAP as it will
simplify setting up the -pre and -post data patches.

cheers

andrew

Attachments:

exclude-table-data2.patchtext/x-patch; name=exclude-table-data2.patchDownload
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index f6f33de..a5443b7 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -612,6 +612,21 @@ PostgreSQL documentation
      </varlistentry>
 
      <varlistentry>
+      <term><option>--exclude-table-data=<replaceable class="parameter">table</replaceable></option></term>
+      <listitem>
+       <para>
+        Do not dump data for any tables matching the <replaceable
+        class="parameter">table</replaceable> pattern.  The pattern is
+        interpreted according to the same rules as for <option>-t</>.
+        <option>--exclude-table-data</> can be given more than once to 
+        exclude tables matching any of several patterns. This option is
+        useful when you need the definition of a particular table even
+        though you do not need the data in it.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>--inserts</option></term>
       <listitem>
        <para>
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index c17b52c..6048e2c 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -114,6 +114,8 @@ static SimpleStringList table_include_patterns = {NULL, NULL};
 static SimpleOidList table_include_oids = {NULL, NULL};
 static SimpleStringList table_exclude_patterns = {NULL, NULL};
 static SimpleOidList table_exclude_oids = {NULL, NULL};
+static SimpleStringList tabledata_exclude_patterns = {NULL, NULL};
+static SimpleOidList tabledata_exclude_oids = {NULL, NULL};
 
 /* default, if no "inclusion" switches appear, is to dump everything */
 static bool include_everything = true;
@@ -322,6 +324,7 @@ main(int argc, char **argv)
 		{"column-inserts", no_argument, &column_inserts, 1},
 		{"disable-dollar-quoting", no_argument, &disable_dollar_quoting, 1},
 		{"disable-triggers", no_argument, &disable_triggers, 1},
+		{"exclude-table-data", required_argument, NULL, 4},
 		{"inserts", no_argument, &dump_inserts, 1},
 		{"lock-wait-timeout", required_argument, NULL, 2},
 		{"no-tablespaces", no_argument, &outputNoTablespaces, 1},
@@ -485,6 +488,10 @@ main(int argc, char **argv)
 				use_role = optarg;
 				break;
 
+			case 4:			/* exclude table(s) data */
+				simple_string_list_append(&tabledata_exclude_patterns, optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -712,6 +719,10 @@ main(int argc, char **argv)
 	}
 	expand_table_name_patterns(&table_exclude_patterns,
 							   &table_exclude_oids);
+
+	expand_table_name_patterns(&tabledata_exclude_patterns,
+							   &tabledata_exclude_oids);
+
 	/* non-matching exclusion patterns aren't an error */
 
 	/*
@@ -851,6 +862,7 @@ help(const char *progname)
 	printf(_("  --column-inserts            dump data as INSERT commands with column names\n"));
 	printf(_("  --disable-dollar-quoting    disable dollar quoting, use SQL standard quoting\n"));
 	printf(_("  --disable-triggers          disable triggers during data-only restore\n"));
+	printf(_("  --exclude-table-data=TABLE  do NOT dump data for the named table(s)\n"));
 	printf(_("  --inserts                   dump data as INSERT commands, rather than COPY\n"));
 	printf(_("  --no-security-labels        do not dump security label assignments\n"));
 	printf(_("  --no-tablespaces            do not dump tablespace assignments\n"));
@@ -1084,6 +1096,15 @@ selectDumpableTable(TableInfo *tbinfo)
 		simple_oid_list_member(&table_exclude_oids,
 							   tbinfo->dobj.catId.oid))
 		tbinfo->dobj.dump = false;
+
+	/* If table is to be dumped, check that the data is not excluded */
+	if (tbinfo->dobj.dump && !
+		simple_oid_list_member(&tabledata_exclude_oids,
+							   tbinfo->dobj.catId.oid))
+		tbinfo->dobj.dumpdata = true;
+	else
+		tbinfo->dobj.dumpdata = false;
+		
 }
 
 /*
@@ -1515,6 +1536,10 @@ dumpTableData(Archive *fout, TableDataInfo *tdinfo)
 	DataDumperPtr dumpFn;
 	char	   *copyStmt;
 
+	/* don't do anything if the data isn't wanted */
+	if (!tbinfo->dobj.dumpdata)
+		return;
+
 	if (!dump_inserts)
 	{
 		/* Dump/restore using COPY */
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 3d5d534..d6b5dd4 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -129,6 +129,7 @@ typedef struct _dumpableObject
 	char	   *name;			/* object name (should never be NULL) */
 	struct _namespaceInfo *namespace;	/* containing namespace, or NULL */
 	bool		dump;			/* true if we want to dump this object */
+	bool        dumpdata;       /* true if we want data for this object */
 	bool		ext_member;		/* true if object is member of extension */
 	DumpId	   *dependencies;	/* dumpIds of objects this one depends on */
 	int			nDeps;			/* number of valid dependencies */
#7Robert Haas
robertmhaas@gmail.com
In reply to: Andrew Dunstan (#6)
Re: pg_dump --exclude-table-data

On Wed, Nov 2, 2011 at 6:02 PM, Andrew Dunstan <andrew@dunslane.net> wrote:

On 09/02/2011 03:15 PM, Josh Berkus wrote:

OK, this seems to have some pluses and no negative comments, so it seems
worth going forward. Do we want an equivalent pg_restore option?

I'm not sure it's *as* important for pg_restore, since I can easily use
 a manifest to avoid restoring data for a single table.  So I guess it's
a question of "how hard is it to add it?"

The short answer is "more work than I want to put in to this." pg_restore
doesn't have any of pg_dump's infrastructure for handling table name
patterns, nor for excluding tables. So I think all that would remain a TODO.
(A good beginner project, maybe).

A slightly updated patch is attached, the main change being that I removed
use of a short option and only support the long name option. "-D" didn't
seem sufficiently mnemonic to me. I'll add this to the November commitfest,
but I'd like to get it committed ASAP as it will simplify setting up the
-pre and -post data patches.

Instead of:

do NOT dump data for the named table(s)

How about:

dump only schema for the named table(s)

I'm also a bit concerned about the relationship between this and the
existing -s option. It seems odd that you use --schema-only to get
the behavior database-wide, and --exclude-table-data to get it for
just one table. Is there some way we can make that a bit more
consistent?

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#8Andrew Dunstan
andrew@dunslane.net
In reply to: Robert Haas (#7)
Re: pg_dump --exclude-table-data

On 11/04/2011 10:21 AM, Robert Haas wrote:

A slightly updated patch is attached, the main change being that I removed
use of a short option and only support the long name option. "-D" didn't
seem sufficiently mnemonic to me. I'll add this to the November commitfest,
but I'd like to get it committed ASAP as it will simplify setting up the
-pre and -post data patches.

Instead of:

do NOT dump data for the named table(s)

How about:

dump only schema for the named table(s)

I have no great objection to the wording change.

I'm also a bit concerned about the relationship between this and the
existing -s option. It seems odd that you use --schema-only to get
the behavior database-wide, and --exclude-table-data to get it for
just one table. Is there some way we can make that a bit more
consistent?

It's consistent, and was designed to be, with the --exclude-table
option. I'm not sure what you want it to look like instead. But TBH I'm
more interested in getting the functionality than in how it's spelled.

cheers

andrew

#9Robert Haas
robertmhaas@gmail.com
In reply to: Andrew Dunstan (#8)
Re: pg_dump --exclude-table-data

On Wed, Dec 7, 2011 at 10:19 PM, Andrew Dunstan <andrew@dunslane.net> wrote:

I'm also a bit concerned about the relationship between this and the
existing -s option.  It seems odd that you use --schema-only to get
the behavior database-wide, and --exclude-table-data to get it for
just one table.  Is there some way we can make that a bit more
consistent?

It's consistent, and was designed to be, with the --exclude-table option.
I'm not sure what you want it to look like instead. But TBH I'm more
interested in getting the functionality than in how it's spelled.

Ah, hmm. Well, maybe it's fine the way that you have it. But I'd be
tempted to at least add a sentence to the sgml documentation for each
option referring to the other, e.g. "To dump only schema for all
tables in the database, see --schema-only." and "To exclude table data
for only a subset of tables in the database, see
--exclude-table-data.", or something along those lines.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#10Andrew Dunstan
andrew@dunslane.net
In reply to: Robert Haas (#9)
Re: pg_dump --exclude-table-data

On 12/08/2011 11:13 AM, Robert Haas wrote:

On Wed, Dec 7, 2011 at 10:19 PM, Andrew Dunstan<andrew@dunslane.net> wrote:

I'm also a bit concerned about the relationship between this and the
existing -s option. It seems odd that you use --schema-only to get
the behavior database-wide, and --exclude-table-data to get it for
just one table. Is there some way we can make that a bit more
consistent?

It's consistent, and was designed to be, with the --exclude-table option.
I'm not sure what you want it to look like instead. But TBH I'm more
interested in getting the functionality than in how it's spelled.

Ah, hmm. Well, maybe it's fine the way that you have it. But I'd be
tempted to at least add a sentence to the sgml documentation for each
option referring to the other, e.g. "To dump only schema for all
tables in the database, see --schema-only." and "To exclude table data
for only a subset of tables in the database, see
--exclude-table-data.", or something along those lines.

Sure, no problem with that.

cheers

andrew

#11Andrew Dunstan
andrew@dunslane.net
In reply to: Andrew Dunstan (#10)
1 attachment(s)
Re: pg_dump --exclude-table-data

On 12/08/2011 11:36 AM, Andrew Dunstan wrote:

On 12/08/2011 11:13 AM, Robert Haas wrote:

Ah, hmm. Well, maybe it's fine the way that you have it. But I'd be
tempted to at least add a sentence to the sgml documentation for each
option referring to the other, e.g. "To dump only schema for all
tables in the database, see --schema-only." and "To exclude table data
for only a subset of tables in the database, see
--exclude-table-data.", or something along those lines.

Sure, no problem with that.

Revised patch attached.

cheers

andrew

Attachments:

exclude-table-data3.patchtext/x-patch; name=exclude-table-data3.patchDownload
*** a/doc/src/sgml/ref/pg_dump.sgml
--- b/doc/src/sgml/ref/pg_dump.sgml
***************
*** 404,409 **** PostgreSQL documentation
--- 404,413 ----
         <para>
          Dump only the object definitions (schema), not data.
         </para>
+        <para>
+         To exclude table data for only a subset of tables in the database, 
+         see <option>--exclude-table-data</>.
+        </para>
        </listitem>
       </varlistentry>
  
***************
*** 612,617 **** PostgreSQL documentation
--- 616,639 ----
       </varlistentry>
  
       <varlistentry>
+       <term><option>--exclude-table-data=<replaceable class="parameter">table</replaceable></option></term>
+       <listitem>
+        <para>
+         Do not dump data for any tables matching the <replaceable
+         class="parameter">table</replaceable> pattern.  The pattern is
+         interpreted according to the same rules as for <option>-t</>.
+         <option>--exclude-table-data</> can be given more than once to 
+         exclude tables matching any of several patterns. This option is
+         useful when you need the definition of a particular table even
+         though you do not need the data in it.
+        </para>
+        <para>
+         To exclude data for all tables in the database, see <option>--schema-only</>.
+        <para>
+       </listitem>
+      </varlistentry>
+ 
+      <varlistentry>
        <term><option>--inserts</option></term>
        <listitem>
         <para>
*** a/src/bin/pg_dump/pg_dump.c
--- b/src/bin/pg_dump/pg_dump.c
***************
*** 115,120 **** static SimpleStringList table_include_patterns = {NULL, NULL};
--- 115,122 ----
  static SimpleOidList table_include_oids = {NULL, NULL};
  static SimpleStringList table_exclude_patterns = {NULL, NULL};
  static SimpleOidList table_exclude_oids = {NULL, NULL};
+ static SimpleStringList tabledata_exclude_patterns = {NULL, NULL};
+ static SimpleOidList tabledata_exclude_oids = {NULL, NULL};
  
  /* default, if no "inclusion" switches appear, is to dump everything */
  static bool include_everything = true;
***************
*** 324,329 **** main(int argc, char **argv)
--- 326,332 ----
  		{"column-inserts", no_argument, &column_inserts, 1},
  		{"disable-dollar-quoting", no_argument, &disable_dollar_quoting, 1},
  		{"disable-triggers", no_argument, &disable_triggers, 1},
+ 		{"exclude-table-data", required_argument, NULL, 4},
  		{"inserts", no_argument, &dump_inserts, 1},
  		{"lock-wait-timeout", required_argument, NULL, 2},
  		{"no-tablespaces", no_argument, &outputNoTablespaces, 1},
***************
*** 487,492 **** main(int argc, char **argv)
--- 490,499 ----
  				use_role = optarg;
  				break;
  
+ 			case 4:			/* exclude table(s) data */
+ 				simple_string_list_append(&tabledata_exclude_patterns, optarg);
+ 				break;
+ 
  			default:
  				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
  				exit(1);
***************
*** 715,720 **** main(int argc, char **argv)
--- 722,731 ----
  	}
  	expand_table_name_patterns(&table_exclude_patterns,
  							   &table_exclude_oids);
+ 
+ 	expand_table_name_patterns(&tabledata_exclude_patterns,
+ 							   &tabledata_exclude_oids);
+ 
  	/* non-matching exclusion patterns aren't an error */
  
  	/*
***************
*** 854,859 **** help(const char *progname)
--- 865,871 ----
  	printf(_("  --column-inserts            dump data as INSERT commands with column names\n"));
  	printf(_("  --disable-dollar-quoting    disable dollar quoting, use SQL standard quoting\n"));
  	printf(_("  --disable-triggers          disable triggers during data-only restore\n"));
+ 	printf(_("  --exclude-table-data=TABLE  do NOT dump data for the named table(s)\n"));
  	printf(_("  --inserts                   dump data as INSERT commands, rather than COPY\n"));
  	printf(_("  --no-security-labels        do not dump security label assignments\n"));
  	printf(_("  --no-tablespaces            do not dump tablespace assignments\n"));
***************
*** 1087,1092 **** selectDumpableTable(TableInfo *tbinfo)
--- 1099,1113 ----
  		simple_oid_list_member(&table_exclude_oids,
  							   tbinfo->dobj.catId.oid))
  		tbinfo->dobj.dump = false;
+ 
+ 	/* If table is to be dumped, check that the data is not excluded */
+ 	if (tbinfo->dobj.dump && !
+ 		simple_oid_list_member(&tabledata_exclude_oids,
+ 							   tbinfo->dobj.catId.oid))
+ 		tbinfo->dobj.dumpdata = true;
+ 	else
+ 		tbinfo->dobj.dumpdata = false;
+ 		
  }
  
  /*
***************
*** 1518,1523 **** dumpTableData(Archive *fout, TableDataInfo *tdinfo)
--- 1539,1548 ----
  	DataDumperPtr dumpFn;
  	char	   *copyStmt;
  
+ 	/* don't do anything if the data isn't wanted */
+ 	if (!tbinfo->dobj.dumpdata)
+ 		return;
+ 
  	if (!dump_inserts)
  	{
  		/* Dump/restore using COPY */
*** a/src/bin/pg_dump/pg_dump.h
--- b/src/bin/pg_dump/pg_dump.h
***************
*** 129,134 **** typedef struct _dumpableObject
--- 129,135 ----
  	char	   *name;			/* object name (should never be NULL) */
  	struct _namespaceInfo *namespace;	/* containing namespace, or NULL */
  	bool		dump;			/* true if we want to dump this object */
+ 	bool        dumpdata;       /* true if we want data for this object */
  	bool		ext_member;		/* true if object is member of extension */
  	DumpId	   *dependencies;	/* dumpIds of objects this one depends on */
  	int			nDeps;			/* number of valid dependencies */
#12Peter Geoghegan
peter@2ndquadrant.com
In reply to: Andrew Dunstan (#11)
Re: pg_dump --exclude-table-data

Not sure that I have a lot to add here, but I am officially listed as
a reviewer, which is a responsibility that I don't want to shirk.

In my opinion, this patch is obviously useful. I don't find the
asymmetry that it will create with pg_restore to be troubling, so I'd
favour committing it as-is.

Extremely minor problem noted: There are two spaces at the start of
one sentence in your SGML doc updates.

--
Peter Geoghegan       http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training and Services

#13Andrew Dunstan
andrew@dunslane.net
In reply to: Peter Geoghegan (#12)
Re: pg_dump --exclude-table-data

On 12/14/2011 06:28 AM, Peter Geoghegan wrote:

Not sure that I have a lot to add here, but I am officially listed as
a reviewer, which is a responsibility that I don't want to shirk.

In my opinion, this patch is obviously useful. I don't find the
asymmetry that it will create with pg_restore to be troubling, so I'd
favour committing it as-is.

Extremely minor problem noted: There are two spaces at the start of
one sentence in your SGML doc updates.

Thanks. Committed with that changed, although we seem to be getting
altogether too obsessive about white space, IMNSHO.

cheers

andrew

#14Peter Geoghegan
peter@2ndquadrant.com
In reply to: Andrew Dunstan (#13)
Re: pg_dump --exclude-table-data

On 14 December 2011 14:31, Andrew Dunstan <andrew@dunslane.net> wrote:

Thanks. Committed with that changed, although we seem to be getting
altogether too obsessive about white space, IMNSHO.

I agree, but I think it's important that we judge patches by a
consistent standard. Right now, for better or worse, that standard
includes being obsessed with white space.

--
Peter Geoghegan       http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training and Services

#15David E. Wheeler
david@kineticode.com
In reply to: Andrew Dunstan (#13)
Re: pg_dump --exclude-table-data

On Dec 14, 2011, at 6:31 AM, Andrew Dunstan wrote:

Thanks. Committed with that changed, although we seem to be getting altogether too obsessive about white space, IMNSHO.

If that’s all there is to complain about, I think it’s a pretty good sign. ;-P

David