pg_restore --no-post-data and --post-data-only

Started by Andrew Dunstanover 14 years ago38 messages
#1Andrew Dunstan
andrew@dunslane.net
2 attachment(s)

Attached is an undocumented patch that allows pg_restore to omit
post-data items or omit all but post-data items. This has been discussed
before, and Simon sent in a patch back on 2008, which has bitrotted
some. I'm not sure why it was dropped at the time, but I think it's time
to do this. This patch relies on some infrastructure that was added
since Simon's patch, so it works a bit differently (and more simply).

So with this patch, the following three sequences should be equivalent:

pg_restore --no-post-data
pg_restore --post-data-only

pg_restore -s --no-post-data
pg_restore -a
pg_restore --post-data-only

pg_restore

This is useful and worth doing on its own, and will also add to the
usefulness of the pg_dump --exclude-table-data patch in my previous email.

As with that patch, a version that applies to version 9.0 and 8.4
sources is also attached, for the very eager.

cheers

andrew

Attachments:

exclude-post-data-steps.patch.90text/plain; name=exclude-post-data-steps.patch.90Download
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index 5a73779..8bd45c1 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -106,6 +106,8 @@ typedef struct _restoreOptions
 	char	   *superuser;		/* Username to use as superuser */
 	char	   *use_role;		/* Issue SET ROLE to this */
 	int			dataOnly;
+	int         postDataOnly;   /* skip all but post-data section */
+	int         noPostData;     /* skip post-data section */
 	int			dropSchema;
 	char	   *filename;
 	int			schemaOnly;
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 26ee9d9..d113435 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2086,6 +2086,7 @@ ReadToc(ArchiveHandle *AH)
 	int			depIdx;
 	int			depSize;
 	TocEntry   *te;
+	bool        in_post_data = false;
 
 	AH->tocCount = ReadInt(AH);
 	AH->maxDumpId = 0;
@@ -2151,6 +2152,12 @@ ReadToc(ArchiveHandle *AH)
 				te->section = SECTION_PRE_DATA;
 		}
 
+		/* will stay true even for SECTION_NONE items */
+		if (te->section == SECTION_POST_DATA)
+			in_post_data = true;
+
+		te->inPostData = in_post_data;
+
 		te->defn = ReadStr(AH);
 		te->dropStmt = ReadStr(AH);
 
@@ -2306,6 +2313,12 @@ _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls)
 			return 0;
 	}
 
+	/* skip (all but) post data section as required */
+	if (ropt->noPostData && te->inPostData)
+		return 0;
+	if (ropt->postDataOnly && ! te->inPostData)
+		return 0;
+
 	if (ropt->selTypes)
 	{
 		if (strcmp(te->desc, "TABLE") == 0 ||
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index a3a87dc..8557481 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -289,6 +289,9 @@ typedef struct _tocEntry
 	void	   *dataDumperArg;	/* Arg for above routine */
 	void	   *formatData;		/* TOC Entry data specific to file format */
 
+	/* in post data? not quite the same as section, might be SECTION_NONE */
+	bool        inPostData;     
+
 	/* working state (needed only for parallel restore) */
 	struct _tocEntry *par_prev; /* list links for pending/ready items; */
 	struct _tocEntry *par_next; /* these are NULL if not in either list */
diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c
index dbdf7ac..e205d6e 100644
--- a/src/bin/pg_dump/pg_restore.c
+++ b/src/bin/pg_dump/pg_restore.c
@@ -76,6 +76,8 @@ main(int argc, char **argv)
 	static int	no_data_for_failed_tables = 0;
 	static int	outputNoTablespaces = 0;
 	static int	use_setsessauth = 0;
+	static int  post_data_only = 0;
+	static int  no_post_data = 0;
 
 	struct option cmdopts[] = {
 		{"clean", 0, NULL, 'c'},
@@ -116,7 +118,9 @@ main(int argc, char **argv)
 		{"no-tablespaces", no_argument, &outputNoTablespaces, 1},
 		{"role", required_argument, NULL, 2},
 		{"use-set-session-authorization", no_argument, &use_setsessauth, 1},
-
+		{"post-data-only", no_argument, &post_data_only, 1},
+		{"no-post-data", no_argument, &no_post_data, 1},
+ 
 		{NULL, 0, NULL, 0}
 	};
 
@@ -337,6 +341,8 @@ main(int argc, char **argv)
 	opts->noDataForFailedTables = no_data_for_failed_tables;
 	opts->noTablespace = outputNoTablespaces;
 	opts->use_setsessauth = use_setsessauth;
+	opts->postDataOnly = post_data_only;
+	opts->noPostData = no_post_data;
 
 	if (opts->formatName)
 	{
@@ -443,6 +449,9 @@ usage(const char *progname)
 			 "                           created\n"));
 	printf(_("  --no-tablespaces         do not restore tablespace assignments\n"));
 	printf(_("  --role=ROLENAME          do SET ROLE before restore\n"));
+	printf(_("  --no-post-data           do not restore constraints, indexes, rules, triggers\n"));
+	printf(_("  --post-data-only         only restore constraints, indexes, rules, triggers\n"));
+	
 	printf(_("  --use-set-session-authorization\n"
 			 "                           use SET SESSION AUTHORIZATION commands instead of\n"
 	  "                           ALTER OWNER commands to set ownership\n"));
exclude-post-data-steps.patchtext/x-patch; name=exclude-post-data-steps.patchDownload
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index ce12a41..d4e2890 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -108,6 +108,8 @@ typedef struct _restoreOptions
 	char	   *superuser;		/* Username to use as superuser */
 	char	   *use_role;		/* Issue SET ROLE to this */
 	int			dataOnly;
+	int         postDataOnly;   /* skip all but post-data section */
+	int         noPostData;     /* skip post-data section */
 	int			dropSchema;
 	char	   *filename;
 	int			schemaOnly;
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index c57799e..8b90ad0 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2141,6 +2141,7 @@ ReadToc(ArchiveHandle *AH)
 	int			depIdx;
 	int			depSize;
 	TocEntry   *te;
+	bool        in_post_data = false;
 
 	AH->tocCount = ReadInt(AH);
 	AH->maxDumpId = 0;
@@ -2206,6 +2207,12 @@ ReadToc(ArchiveHandle *AH)
 				te->section = SECTION_PRE_DATA;
 		}
 
+		/* will stay true even for SECTION_NONE items */
+		if (te->section == SECTION_POST_DATA)
+			in_post_data = true;
+
+		te->inPostData = in_post_data;
+
 		te->defn = ReadStr(AH);
 		te->dropStmt = ReadStr(AH);
 
@@ -2365,6 +2372,12 @@ _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls)
 			return 0;
 	}
 
+	/* skip (all but) post data section as required */
+	if (ropt->noPostData && te->inPostData)
+		return 0;
+	if (ropt->postDataOnly && ! te->inPostData)
+		return 0;
+
 	if (ropt->selTypes)
 	{
 		if (strcmp(te->desc, "TABLE") == 0 ||
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 8a3a6f9..7712a42 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -287,6 +287,9 @@ typedef struct _tocEntry
 	void	   *dataDumperArg;	/* Arg for above routine */
 	void	   *formatData;		/* TOC Entry data specific to file format */
 
+	/* in post data? not quite the same as section, might be SECTION_NONE */
+	bool        inPostData;     
+
 	/* working state (needed only for parallel restore) */
 	struct _tocEntry *par_prev; /* list links for pending/ready items; */
 	struct _tocEntry *par_next; /* these are NULL if not in either list */
diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c
index 7731d25..74145fb 100644
--- a/src/bin/pg_dump/pg_restore.c
+++ b/src/bin/pg_dump/pg_restore.c
@@ -77,6 +77,8 @@ main(int argc, char **argv)
 	static int	outputNoTablespaces = 0;
 	static int	use_setsessauth = 0;
 	static int	no_security_labels = 0;
+	static int  post_data_only = 0;
+	static int  no_post_data = 0;
 
 	struct option cmdopts[] = {
 		{"clean", 0, NULL, 'c'},
@@ -118,6 +120,8 @@ main(int argc, char **argv)
 		{"role", required_argument, NULL, 2},
 		{"use-set-session-authorization", no_argument, &use_setsessauth, 1},
 		{"no-security-labels", no_argument, &no_security_labels, 1},
+		{"post-data-only", no_argument, &post_data_only, 1},
+		{"no-post-data", no_argument, &no_post_data, 1},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -319,6 +323,8 @@ main(int argc, char **argv)
 	opts->noTablespace = outputNoTablespaces;
 	opts->use_setsessauth = use_setsessauth;
 	opts->no_security_labels = no_security_labels;
+	opts->postDataOnly = post_data_only;
+	opts->noPostData = no_post_data;
 
 	if (opts->formatName)
 	{
@@ -431,7 +437,10 @@ usage(const char *progname)
 			 "                           do not restore data of tables that could not be\n"
 			 "                           created\n"));
 	printf(_("  --no-security-labels     do not restore security labels\n"));
-	printf(_("  --no-tablespaces         do not restore tablespace assignments\n"));
+	printf(_("  --no-tablespaces         do not restore tablespace assignments\n"));	
+	printf(_("  --no-post-data           do not restore constraints, indexes, rules, triggers\n"));
+	printf(_("  --post-data-only         only restore constraints, indexes, rules, triggers\n"));
+	
 	printf(_("  --use-set-session-authorization\n"
 			 "                           use SET SESSION AUTHORIZATION commands instead of\n"
 	  "                           ALTER OWNER commands to set ownership\n"));
#2Josh Berkus
josh@agliodbs.com
In reply to: Andrew Dunstan (#1)
Re: pg_restore --no-post-data and --post-data-only

On 8/23/11 1:30 PM, Andrew Dunstan wrote:

Attached is an undocumented patch that allows pg_restore to omit
post-data items or omit all but post-data items. This has been discussed
before, and Simon sent in a patch back on 2008, which has bitrotted
some. I'm not sure why it was dropped at the time, but I think it's time
to do this. This patch relies on some infrastructure that was added
since Simon's patch, so it works a bit differently (and more simply).

If it's not clear from Andrew's description, the purpose of this patch
is to allow dividing your pgdump into 3 portions:

1. schema
2. data
3. constraints/indexes

This allows users to implement a number of custom solutions for ad-hoc
parallel dump, conditional loading, data munging and sampled databases.
While doing so was possible before using the manifest from pg_restore
-l, the manifest approach has been complex to automate and relies on
obscure knowledge.

I have immediate production use for this patch and may be backporting it.

--
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com

#3Andrew Dunstan
andrew@dunslane.net
In reply to: Josh Berkus (#2)
Re: pg_restore --no-post-data and --post-data-only

On 08/24/2011 08:43 PM, Josh Berkus wrote:

On 8/23/11 1:30 PM, Andrew Dunstan wrote:

Attached is an undocumented patch that allows pg_restore to omit
post-data items or omit all but post-data items. This has been discussed
before, and Simon sent in a patch back on 2008, which has bitrotted
some. I'm not sure why it was dropped at the time, but I think it's time
to do this. This patch relies on some infrastructure that was added
since Simon's patch, so it works a bit differently (and more simply).

If it's not clear from Andrew's description, the purpose of this patch
is to allow dividing your pgdump into 3 portions:

1. schema
2. data
3. constraints/indexes

This allows users to implement a number of custom solutions for ad-hoc
parallel dump, conditional loading, data munging and sampled databases.
While doing so was possible before using the manifest from pg_restore
-l, the manifest approach has been complex to automate and relies on
obscure knowledge.

I have immediate production use for this patch and may be backporting it.

It's already backported, at least as far as 8.4. Check your email :-)

cheers

andrew

#4Jim Nasby
jim@nasby.net
In reply to: Josh Berkus (#2)
Re: pg_restore --no-post-data and --post-data-only

On Aug 24, 2011, at 7:43 PM, Josh Berkus wrote:

On 8/23/11 1:30 PM, Andrew Dunstan wrote:

Attached is an undocumented patch that allows pg_restore to omit
post-data items or omit all but post-data items. This has been discussed
before, and Simon sent in a patch back on 2008, which has bitrotted
some. I'm not sure why it was dropped at the time, but I think it's time
to do this. This patch relies on some infrastructure that was added
since Simon's patch, so it works a bit differently (and more simply).

If it's not clear from Andrew's description, the purpose of this patch
is to allow dividing your pgdump into 3 portions:

1. schema
2. data
3. constraints/indexes

This allows users to implement a number of custom solutions for ad-hoc
parallel dump, conditional loading, data munging and sampled databases.
While doing so was possible before using the manifest from pg_restore
-l, the manifest approach has been complex to automate and relies on
obscure knowledge.

I have immediate production use for this patch and may be backporting it.

FWIW, I got around this by writing a perl script that calls pg_dump -s and watches for the end of table create statements (IIRC it specifically looks for the first CREATE INDEX). The advantage to that approach is that you don't have to first create a custom format dump and then run pg_restore against that.
--
Jim C. Nasby, Database Architect jim@nasby.net
512.569.9461 (cell) http://jim.nasby.net

#5Andrew Dunstan
andrew@dunslane.net
In reply to: Jim Nasby (#4)
Re: pg_restore --no-post-data and --post-data-only

On 08/25/2011 06:05 PM, Jim Nasby wrote:

On Aug 24, 2011, at 7:43 PM, Josh Berkus wrote:

On 8/23/11 1:30 PM, Andrew Dunstan wrote:

Attached is an undocumented patch that allows pg_restore to omit
post-data items or omit all but post-data items. This has been discussed
before, and Simon sent in a patch back on 2008, which has bitrotted
some. I'm not sure why it was dropped at the time, but I think it's time
to do this. This patch relies on some infrastructure that was added
since Simon's patch, so it works a bit differently (and more simply).

If it's not clear from Andrew's description, the purpose of this patch
is to allow dividing your pgdump into 3 portions:

1. schema
2. data
3. constraints/indexes

This allows users to implement a number of custom solutions for ad-hoc
parallel dump, conditional loading, data munging and sampled databases.
While doing so was possible before using the manifest from pg_restore
-l, the manifest approach has been complex to automate and relies on
obscure knowledge.

I have immediate production use for this patch and may be backporting it.

FWIW, I got around this by writing a perl script that calls pg_dump -s and watches for the end of table create statements (IIRC it specifically looks for the first CREATE INDEX). The advantage to that approach is that you don't have to first create a custom format dump and then run pg_restore against that.

Well, notwithstanding my well known love of perl, that strikes me as
spending a pound to save a penny. And custom format dumps rock ;-) Also,
your recipe above is buggy, BTW. A CREATE INDEX statement might well not
be the first item in the post-data section.

But we could also add these switches to pg_dump too if people feel it's
worthwhile. I haven't looked but the logic should not be terribly hard.

cheers

andrew

#6Andrew Dunstan
andrew@dunslane.net
In reply to: Andrew Dunstan (#5)
1 attachment(s)
Re: pg_restore --no-post-data and --post-data-only

On 08/25/2011 06:15 PM, Andrew Dunstan wrote:

But we could also add these switches to pg_dump too if people feel
it's worthwhile. I haven't looked but the logic should not be terribly
hard.

Something like the attached, in fact, which seems pretty simple.

cheers

andrew

Attachments:

exclude-post-data-steps_pg_dump.patchtext/x-patch; name=exclude-post-data-steps_pg_dump.patchDownload
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index f6cd7eb..e9b4cc6 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -140,6 +140,8 @@ static int	column_inserts = 0;
 static int	no_security_labels = 0;
 static int	no_unlogged_table_data = 0;
 static int	serializable_deferrable = 0;
+static int  exclude_post_data = 0;
+static int  post_data_only = 0;
 
 
 static void help(const char *progname);
@@ -334,6 +336,8 @@ main(int argc, char **argv)
 		{"use-set-session-authorization", no_argument, &use_setsessauth, 1},
 		{"no-security-labels", no_argument, &no_security_labels, 1},
 		{"no-unlogged-table-data", no_argument, &no_unlogged_table_data, 1},
+		{"no-post-data", no_argument, &exclude_post_data, 1},
+		{"post-data-only", no_argument, &post_data_only, 1},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -790,7 +794,7 @@ main(int argc, char **argv)
 	dumpStdStrings(g_fout);
 
 	/* The database item is always next, unless we don't want it at all */
-	if (include_everything && !dataOnly)
+	if (include_everything && !dataOnly && !post_data_only)
 		dumpDatabase(g_fout);
 
 	/* Now the rearrangeable objects. */
@@ -876,6 +880,8 @@ help(const char *progname)
 	printf(_("  --no-unlogged-table-data    do not dump unlogged table data\n"));
 	printf(_("  --quote-all-identifiers     quote all identifiers, even if not key words\n"));
 	printf(_("  --serializable-deferrable   wait until the dump can run without anomalies\n"));
+	printf(_("  --no-post-data              do not dump constraints, indexes, rules, triggers\n"));
+	printf(_("  --post-data-only            only dump constraints, indexes, rules, triggers\n"));
 	printf(_("  --use-set-session-authorization\n"
 			 "                              use SET SESSION AUTHORIZATION commands instead of\n"
 	"                              ALTER OWNER commands to set ownership\n"));
@@ -7023,6 +7029,25 @@ collectComments(Archive *fout, CommentItem **items)
 static void
 dumpDumpableObject(Archive *fout, DumpableObject *dobj)
 {
+
+	int skip = 0;
+
+	switch (dobj->objType)
+	{
+		case DO_INDEX:
+		case DO_TRIGGER:
+		case DO_CONSTRAINT:
+		case DO_FK_CONSTRAINT:
+		case DO_RULE:
+			skip = exclude_post_data;
+			break;
+		default:
+			skip = post_data_only;
+	}
+
+	if (skip)
+		return;
+	
 	switch (dobj->objType)
 	{
 		case DO_NAMESPACE:
#7Robert Haas
robertmhaas@gmail.com
In reply to: Andrew Dunstan (#6)
Re: pg_restore --no-post-data and --post-data-only

On Fri, Aug 26, 2011 at 11:22 AM, Andrew Dunstan <andrew@dunslane.net> wrote:

But we could also add these switches to pg_dump too if people feel it's
worthwhile. I haven't looked but the logic should not be terribly hard.

Something like the attached, in fact, which seems pretty simple.

It seems like there are three sets of things you might want here:
pre-data, data, post-data. So in the end we could end up with:

--pre-data-only
--post-data-only
--data-only
--no-pre-data
--no-post-data
--no-data

And then maybe someone will want just the create index commands and
not the constraint commands. It seems like it might be more elegant
to come up with a single switch where you can list which things you
want:

--sections='predata data'
--sections='postdata'
--sections='index'

Just thinking out loud....

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#8Jeff Davis
pgsql@j-davis.com
In reply to: Robert Haas (#7)
Re: pg_restore --no-post-data and --post-data-only

On Fri, 2011-08-26 at 12:46 -0400, Robert Haas wrote:

--sections='predata data'
--sections='postdata'
--sections='index'

Agreed. After command line options reach a certain level of complexity,
I think it's worth looking for a more general way to express them.

Regards,
Jeff Davis

#9Andrew Dunstan
andrew@dunslane.net
In reply to: Robert Haas (#7)
Re: pg_restore --no-post-data and --post-data-only

On 08/26/2011 12:46 PM, Robert Haas wrote:

On Fri, Aug 26, 2011 at 11:22 AM, Andrew Dunstan<andrew@dunslane.net> wrote:

But we could also add these switches to pg_dump too if people feel it's
worthwhile. I haven't looked but the logic should not be terribly hard.

Something like the attached, in fact, which seems pretty simple.

It seems like there are three sets of things you might want here:
pre-data, data, post-data. So in the end we could end up with:

--pre-data-only
--post-data-only
--data-only
--no-pre-data
--no-post-data
--no-data

And then maybe someone will want just the create index commands and
not the constraint commands. It seems like it might be more elegant
to come up with a single switch where you can list which things you
want:

--sections='predata data'
--sections='postdata'
--sections='index'

Just thinking out loud....

I knew there would be some bike-shedding about how we specify these
things, which is why I haven't written docs yet.

All the possibilities you specify except for the indexes section can be
done by using these switches in combination with -s and -a.

For anything more fine-grained, I'm inclined to say that people need to
roll their own. pg_restore's --list and --use-list give you extremely
fine-grained control. I have working scripts which use these for example
to filter out londiste and pgq objects, certain large tables, audit
objects and more. As an example of the complexity I think we should
avoid, which section would UNIQUE and PRIMARY KEY constraints belong in?
"constraints" because that's what they are, or "indexes" because that's
what they create? No matter which answer you choose someone will claim
you have violated POLA.

Chopping things into pre-data, data and post-data would get us around
99% of the cases we could reasonably provide for in my experience. That
seems enough :-)

I don't have anything in principle against your '--sections="foo bar"'
suggestion, but it would be more work to program. Simpler, and probably
more consistent with how we do other things, would be allowing multiple
--section options, if we don't want to have named options such as I have
provided.

cheers

andrew

#10Robert Haas
robertmhaas@gmail.com
In reply to: Andrew Dunstan (#9)
Re: pg_restore --no-post-data and --post-data-only

On Fri, Aug 26, 2011 at 1:15 PM, Andrew Dunstan <andrew@dunslane.net> wrote:

I don't have anything in principle against your '--sections="foo bar"'
suggestion, but it would be more work to program. Simpler, and probably more
consistent with how we do other things, would be allowing multiple --section
options, if we don't want to have named options such as I have provided.

I wouldn't object to that, but "more work to program" probably means
about an extra 10 lines of code in this particular case.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#11Alvaro Herrera
alvherre@commandprompt.com
In reply to: Robert Haas (#10)
Re: pg_restore --no-post-data and --post-data-only

Excerpts from Robert Haas's message of vie ago 26 15:36:36 -0300 2011:

On Fri, Aug 26, 2011 at 1:15 PM, Andrew Dunstan <andrew@dunslane.net> wrote:

I don't have anything in principle against your '--sections="foo bar"'
suggestion, but it would be more work to program. Simpler, and probably more
consistent with how we do other things, would be allowing multiple --section
options, if we don't want to have named options such as I have provided.

I wouldn't object to that, but "more work to program" probably means
about an extra 10 lines of code in this particular case.

The "--section=data --section=indexes" proposal seems very reasonable to
me -- more so than "--sections='data indexes'".

--
Álvaro Herrera <alvherre@commandprompt.com>
The PostgreSQL Company - Command Prompt, Inc.
PostgreSQL Replication, Consulting, Custom Development, 24x7 support

#12Jim Nasby
jim@nasby.net
In reply to: Andrew Dunstan (#9)
Re: pg_restore --no-post-data and --post-data-only

On Aug 26, 2011, at 12:15 PM, Andrew Dunstan wrote:

I knew there would be some bike-shedding about how we specify these things, which is why I haven't written docs yet.

While we're debating what shade of yellow to paint the shed...

My actual use case is to be able to be able to "inject" SQL into a SQL-formatted dump either pre- or post-data (I'm on 8.3, so I don't actually dump any data; I'm *mostly* emulating the ability to dump data on just certain tables).

So for what I'm doing, the ideal interface would be a way to tell pg_dump "When you're done dumping all table structures but before you get to any constraints, please run $COMMAND and inject it's output into the dump output." For some of the data obfuscation we're doing it would be easiest if $COMMAND was a perl script instead of SQL, but we could probably convert it.

Of course, many other folks actually need the ability to just spit out specific portions of the dump; I'm hoping we can come up with something that supports both concepts.
--
Jim C. Nasby, Database Architect jim@nasby.net
512.569.9461 (cell) http://jim.nasby.net

#13Tom Lane
tgl@sss.pgh.pa.us
In reply to: Alvaro Herrera (#11)
Re: pg_restore --no-post-data and --post-data-only

Alvaro Herrera <alvherre@commandprompt.com> writes:

The "--section=data --section=indexes" proposal seems very reasonable to
me -- more so than "--sections='data indexes'".

+1 ... not only easier to code and less squishily defined, but more like
the existing precedent for other pg_dump switches, such as --table.

regards, tom lane

#14Andrew Dunstan
andrew@dunslane.net
In reply to: Jim Nasby (#12)
Re: pg_restore --no-post-data and --post-data-only

On 08/26/2011 04:46 PM, Jim Nasby wrote:

On Aug 26, 2011, at 12:15 PM, Andrew Dunstan wrote:

I knew there would be some bike-shedding about how we specify these things, which is why I haven't written docs yet.

While we're debating what shade of yellow to paint the shed...

My actual use case is to be able to be able to "inject" SQL into a SQL-formatted dump either pre- or post-data (I'm on 8.3, so I don't actually dump any data; I'm *mostly* emulating the ability to dump data on just certain tables).

So for what I'm doing, the ideal interface would be a way to tell pg_dump "When you're done dumping all table structures but before you get to any constraints, please run $COMMAND and inject it's output into the dump output." For some of the data obfuscation we're doing it would be easiest if $COMMAND was a perl script instead of SQL, but we could probably convert it.

Of course, many other folks actually need the ability to just spit out specific portions of the dump; I'm hoping we can come up with something that supports both concepts.

Well, the Unix approach is to use tools that do one thing well to build
up more complex tools. Making pg_dump run some external command to
inject things into the stream seems like the wrong thing given this
philosophy. Use pg_dump to get the bits you want (pre-data, post-data)
and sandwich them around whatever else you want. As for getting data
from just certain tables, I just posted a patch for pg_dump to exclude
data for certain tables, and we could look at providing a positive as
well as a negative filter if there is sufficient demand.

cheers

andrew

#15Dimitri Fontaine
dimitri@2ndQuadrant.fr
In reply to: Andrew Dunstan (#9)
Re: pg_restore --no-post-data and --post-data-only

Andrew Dunstan <andrew@dunslane.net> writes:

For anything more fine-grained, I'm inclined to say that people need to roll
their own. pg_restore's --list and --use-list give you extremely
fine-grained control. I have working scripts which use these for example to
filter out londiste and pgq objects, certain large tables, audit objects and

Which is exactly the core features of pg_staging, that builds schema
whitelist and schema_nodata options on top of pg_restore listing. The
only complex thing here is to be able to filter out triggers using a
function defined in a schema you're filtering out, but pg_staging has
support for that.

http://tapoueh.org/pgsql/pgstaging.html
https://github.com/dimitri/pg_staging
http://tapoueh.org/blog/2011/03/29-towards-pg_staging-10.html

And you can also only use the pg_restore listing commands of pg_staging
without having to do the full installation of its features. Will write
some article about how to use it for only catalog listing purpose,
without its infrastructure for fetching backups and managing dev staging
environments.

I don't have anything in principle against your '--sections="foo bar"'
suggestion, but it would be more work to program. Simpler, and probably more
consistent with how we do other things, would be allowing multiple --section
options, if we don't want to have named options such as I have provided.

+1 for --section foo --section bar.

Regards,
--
Dimitri Fontaine
http://2ndQuadrant.fr PostgreSQL : Expertise, Formation et Support

#16Greg Sabino Mullane
greg@turnstep.com
In reply to: Andrew Dunstan (#5)
Re: pg_restore --no-post-data and --post-data-only

-----BEGIN PGP SIGNED MESSAGE-----
Hash: RIPEMD160

Well, notwithstanding my well known love of perl, that strikes me as
spending a pound to save a penny. And custom format dumps rock ;-) Also,
your recipe above is buggy, BTW. A CREATE INDEX statement might well not
be the first item in the post-data section.

But we could also add these switches to pg_dump too if people feel it's
worthwhile. I haven't looked but the logic should not be terribly hard.

A big +1 to --pre-data and --post-data, but until we get there, or
if you have an existing dump file (schema *or* schema+data) that needs
parsing, there is an existing tool:

http://blog.endpoint.com/2010/01/splitting-postgres-pgdump-into-pre-and.html

Once these new flags and the ability to custom format dump pg_dumpall
is done, I'll have very little left to complain about with pg_dump :)

- --
Greg Sabino Mullane greg@turnstep.com
End Point Corporation http://www.endpoint.com/
PGP Key: 0x14964AC8 201108271855
http://biglumber.com/x/web?pk=2529DF6AB8F79407E94445B4BC9B906714964AC8

-----BEGIN PGP SIGNATURE-----

iEYEAREDAAYFAk5ZdfwACgkQvJuQZxSWSsipDQCgpmNtD/I/2gfAzm2b3jouD8nS
qhgAn33t5VLiF8HeslBwCqyMzQJy6VN5
=PfK7
-----END PGP SIGNATURE-----

#17Andrew Dunstan
andrew@dunslane.net
In reply to: Greg Sabino Mullane (#16)
Re: pg_restore --no-post-data and --post-data-only

On 08/27/2011 06:56 PM, Greg Sabino Mullane wrote:

Once these new flags and the ability to custom format dump pg_dumpall
is done, I'll have very little left to complain about with pg_dump :)

It's off topic. But I think custom format would require a major mangling
to be able to handle a complete cluster. This isn't just a simple matter
of programming, IMNSHO.

cheers

andrew

#18Jim Nasby
jim@nasby.net
In reply to: Andrew Dunstan (#14)
Re: pg_restore --no-post-data and --post-data-only

On Aug 26, 2011, at 5:23 PM, Andrew Dunstan wrote:

On 08/26/2011 04:46 PM, Jim Nasby wrote:

On Aug 26, 2011, at 12:15 PM, Andrew Dunstan wrote:

I knew there would be some bike-shedding about how we specify these things, which is why I haven't written docs yet.

While we're debating what shade of yellow to paint the shed...

My actual use case is to be able to be able to "inject" SQL into a SQL-formatted dump either pre- or post-data (I'm on 8.3, so I don't actually dump any data; I'm *mostly* emulating the ability to dump data on just certain tables).

So for what I'm doing, the ideal interface would be a way to tell pg_dump "When you're done dumping all table structures but before you get to any constraints, please run $COMMAND and inject it's output into the dump output." For some of the data obfuscation we're doing it would be easiest if $COMMAND was a perl script instead of SQL, but we could probably convert it.

Of course, many other folks actually need the ability to just spit out specific portions of the dump; I'm hoping we can come up with something that supports both concepts.

Well, the Unix approach is to use tools that do one thing well to build up more complex tools. Making pg_dump run some external command to inject things into the stream seems like the wrong thing given this philosophy. Use pg_dump to get the bits you want (pre-data, post-data) and sandwich them around whatever else you want.

I agree... except for one little niggling concern: If pg_dump is injecting something, then the DDL is being grabbed with a single, consistent snapshot. --pre and --post do not get you that (though we could probably use the new ability to export snapshots to fix that...)

As for getting data from just certain tables, I just posted a patch for pg_dump to exclude data for certain tables, and we could look at providing a positive as well as a negative filter if there is sufficient demand.

Unfortunately some of the dumped data needs to be sanitized, so that won't work unless I can also dump an arbitrary SELECT. But yes, a positive filter would definitely be welcome.
--
Jim C. Nasby, Database Architect jim@nasby.net
512.569.9461 (cell) http://jim.nasby.net

#19Alvaro Herrera
alvherre@commandprompt.com
In reply to: Jim Nasby (#18)
Re: pg_restore --no-post-data and --post-data-only

Excerpts from Jim Nasby's message of mié ago 31 16:45:59 -0300 2011:

On Aug 26, 2011, at 5:23 PM, Andrew Dunstan wrote:

On 08/26/2011 04:46 PM, Jim Nasby wrote:

On Aug 26, 2011, at 12:15 PM, Andrew Dunstan wrote:

I knew there would be some bike-shedding about how we specify these things, which is why I haven't written docs yet.

While we're debating what shade of yellow to paint the shed...

My actual use case is to be able to be able to "inject" SQL into a SQL-formatted dump either pre- or post-data (I'm on 8.3, so I don't actually dump any data; I'm *mostly* emulating the ability to dump data on just certain tables).

So for what I'm doing, the ideal interface would be a way to tell pg_dump "When you're done dumping all table structures but before you get to any constraints, please run $COMMAND and inject it's output into the dump output." For some of the data obfuscation we're doing it would be easiest if $COMMAND was a perl script instead of SQL, but we could probably convert it.

Of course, many other folks actually need the ability to just spit out specific portions of the dump; I'm hoping we can come up with something that supports both concepts.

Well, the Unix approach is to use tools that do one thing well to build up more complex tools. Making pg_dump run some external command to inject things into the stream seems like the wrong thing given this philosophy. Use pg_dump to get the bits you want (pre-data, post-data) and sandwich them around whatever else you want.

I agree... except for one little niggling concern: If pg_dump is injecting something, then the DDL is being grabbed with a single, consistent snapshot. --pre and --post do not get you that (though we could probably use the new ability to export snapshots to fix that...)

Eh, --pre and --post are pg_restore flags, so you already have a
consistent snapshot.

--
Álvaro Herrera <alvherre@commandprompt.com>
The PostgreSQL Company - Command Prompt, Inc.
PostgreSQL Replication, Consulting, Custom Development, 24x7 support

#20Andrew Dunstan
andrew@dunslane.net
In reply to: Alvaro Herrera (#19)
Re: pg_restore --no-post-data and --post-data-only

On 08/31/2011 04:03 PM, Alvaro Herrera wrote:

Well, the Unix approach is to use tools that do one thing well to build up more complex tools. Making pg_dump run some external command to inject things into the stream seems like the wrong thing given this philosophy. Use pg_dump to get the bits you want (pre-data, post-data) and sandwich them around whatever else you want.

I agree... except for one little niggling concern: If pg_dump is injecting something, then the DDL is being grabbed with a single, consistent snapshot. --pre and --post do not get you that (though we could probably use the new ability to export snapshots to fix that...)

Eh, --pre and --post are pg_restore flags, so you already have a
consistent snapshot.

We've been talking about adding them for pg_dump too.

I take Jim's point about the snapshot, but I still don't feel it's a
good reason to allow some arbitrary code or script to be run between
them (and after all, it's not likely to run with the same snapshot anyway).

cheers

andrew

#21Greg Sabino Mullane
greg@turnstep.com
In reply to: Andrew Dunstan (#17)
Re: pg_restore --no-post-data and --post-data-only

-----BEGIN PGP SIGNED MESSAGE-----
Hash: RIPEMD160

It's off topic. But I think custom format would require a major mangling
to be able to handle a complete cluster. This isn't just a simple matter
of programming, IMNSHO.

Oh, I meant just having it create separate custom format files for each
database. As shell scripts all over the world have been doing for years,
but it would be nice if it was simply built in.

- --
Greg Sabino Mullane greg@turnstep.com
End Point Corporation http://www.endpoint.com/
PGP Key: 0x14964AC8 201109012139
http://biglumber.com/x/web?pk=2529DF6AB8F79407E94445B4BC9B906714964AC8
-----BEGIN PGP SIGNATURE-----

iEYEAREDAAYFAk5gM+oACgkQvJuQZxSWSsi+xgCfbr0q+Ilbw0JRsORLZN2pSz1r
JtcAoJaleZvW/wWtU83d9MVeOes4I6+0
=VqFQ
-----END PGP SIGNATURE-----

#22Dimitri Fontaine
dimitri@2ndQuadrant.fr
In reply to: Greg Sabino Mullane (#21)
Re: pg_restore --no-post-data and --post-data-only

"Greg Sabino Mullane" <greg@turnstep.com> writes:

It's off topic. But I think custom format would require a major mangling
to be able to handle a complete cluster. This isn't just a simple matter
of programming, IMNSHO.

Oh, I meant just having it create separate custom format files for each
database. As shell scripts all over the world have been doing for years,
but it would be nice if it was simply built in.

+1
-- 
Dimitri Fontaine
http://2ndQuadrant.fr     PostgreSQL : Expertise, Formation et Support
#23Andrew Dunstan
andrew@dunslane.net
In reply to: Greg Sabino Mullane (#21)
Re: pg_restore --no-post-data and --post-data-only

On 09/01/2011 09:40 PM, Greg Sabino Mullane wrote:

It's off topic. But I think custom format would require a major mangling
to be able to handle a complete cluster. This isn't just a simple matter
of programming, IMNSHO.

Oh, I meant just having it create separate custom format files for each
database. As shell scripts all over the world have been doing for years,
but it would be nice if it was simply built in.

I guess it could be done, although I'm not going to do it :-) I'm more
about making somewhat hard things easier than easy things slightly
easier :-) You'd have to invent some sort of way to name files, possibly
by supplying a template to the -f parameter which would fill in some
placeholder, say a %, with the name of the database. Of course, then
you'd have to make sure the database name didn't contain any forbidden
characters.

cheers

andrew

#24Dimitri Fontaine
dimitri@2ndQuadrant.fr
In reply to: Andrew Dunstan (#23)
Re: pg_restore --no-post-data and --post-data-only

Andrew Dunstan <andrew@dunslane.net> writes:

Oh, I meant just having it create separate custom format files for each
database. As shell scripts all over the world have been doing for years,
but it would be nice if it was simply built in.

I guess it could be done, although I'm not going to do it :-) I'm more about
making somewhat hard things easier than easy things slightly easier :-)

Then what about issuing an archive (tar or ar format here) containing
one custom file per database plus the globals file, SQL, plus maybe a
database listing, and hacking pg_restore so that it knows what to do
with such an input ?

Bonus points if that supports the current -l and -L options, of course.

Regards,
--
Dimitri Fontaine
http://2ndQuadrant.fr PostgreSQL : Expertise, Formation et Support

#25Andrew Dunstan
andrew@dunslane.net
In reply to: Dimitri Fontaine (#24)
Re: pg_restore --no-post-data and --post-data-only

On 09/03/2011 04:49 PM, Dimitri Fontaine wrote:

Andrew Dunstan<andrew@dunslane.net> writes:

Oh, I meant just having it create separate custom format files for each
database. As shell scripts all over the world have been doing for years,
but it would be nice if it was simply built in.

I guess it could be done, although I'm not going to do it :-) I'm more about
making somewhat hard things easier than easy things slightly easier :-)

Then what about issuing an archive (tar or ar format here) containing
one custom file per database plus the globals file, SQL, plus maybe a
database listing, and hacking pg_restore so that it knows what to do
with such an input ?

Bonus points if that supports the current -l and -L options, of course.

That's probably a lot of code for a little benefit, at least from my
POV, but others might find it useful.

cheers

andrew

#26Andrew Dunstan
andrew@dunslane.net
In reply to: Tom Lane (#13)
1 attachment(s)
Re: pg_restore --no-post-data and --post-data-only

On 08/26/2011 05:11 PM, Tom Lane wrote:

Alvaro Herrera<alvherre@commandprompt.com> writes:

The "--section=data --section=indexes" proposal seems very reasonable to
me -- more so than "--sections='data indexes'".

+1 ... not only easier to code and less squishily defined, but more like
the existing precedent for other pg_dump switches, such as --table.

Here is a patch for that for pg_dump. The sections provided for are
pre-data, data and post-data, as discussed elsewhere. I still feel that
anything finer grained should be handled via pg_restore's --use-list
functionality. I'll provide a patch to do the same switch for pg_restore
shortly.

Adding to the commitfest.

cheers

andrew

Attachments:

pg_dump_sections.patchtext/x-patch; name=pg_dump_sections.patchDownload
*** a/doc/src/sgml/ref/pg_dump.sgml
--- b/doc/src/sgml/ref/pg_dump.sgml
***************
*** 116,124 **** PostgreSQL documentation
         </para>
  
         <para>
!         This option is only meaningful for the plain-text format.  For
!         the archive formats, you can specify the option when you
!         call <command>pg_restore</command>.
         </para>
        </listitem>
       </varlistentry>
--- 116,122 ----
         </para>
  
         <para>
! 		This option is equivalent to specifying <option>--section=data</>.
         </para>
        </listitem>
       </varlistentry>
***************
*** 404,413 **** PostgreSQL documentation
--- 402,431 ----
         <para>
          Dump only the object definitions (schema), not data.
         </para>
+        <para>
+ 		This option is equivalent to specifying 
+ 		<option>--section=pre-data --section=post-data</>.
+        </para>
        </listitem>
       </varlistentry>
  
       <varlistentry>
+ 	   <term><option>--section=<replaceable class="parameter">sectionname</replaceable></option></term>
+ 	   <listitem>
+ 		 <para>
+ 		   Only dump the named section. The name can be one of <option>pre-data</>, <option>data</> 
+            and <option>post-data</>. 
+ 		   This option can be specified more than once. The default is to dump all sections.
+ 		 </para>
+          <para>
+ 		   Post-data items consist of definitions of indexes, triggers, rules 
+ 		   and constraints other than check constraints. 
+ 		   Pre-data items consist of all other data definition items.
+ 		 </para>
+ 	   </listitem>
+ 	 </varlistentry>
+ 
+      <varlistentry>
        <term><option>-S <replaceable class="parameter">username</replaceable></option></term>
        <term><option>--superuser=<replaceable class="parameter">username</replaceable></option></term>
        <listitem>
*** a/src/bin/pg_dump/pg_dump.c
--- b/src/bin/pg_dump/pg_dump.c
***************
*** 82,87 **** typedef struct
--- 82,96 ----
  	int			objsubid;		/* subobject (table column #) */
  } SecLabelItem;
  
+ typedef enum 
+ {
+ 	DUMP_PRE_DATA = 0x01,
+ 	DUMP_DATA = 0x02,
+ 	DUMP_POST_DATA = 0x04,
+ 	DUMP_UNSECTIONED = 0xff
+ } DumpSections;
+ 
+ 
  /* global decls */
  bool		g_verbose;			/* User wants verbose narration of our
  								 * activities. */
***************
*** 91,96 **** PGconn	   *g_conn;				/* the database connection */
--- 100,106 ----
  /* various user-settable parameters */
  bool		schemaOnly;
  bool		dataOnly;
+ int         dumpSections; /* bitmask of chosen sections */
  bool		aclsSkip;
  const char *lockWaitTimeout;
  
***************
*** 247,253 **** static const char *fmtCopyColumnList(const TableInfo *ti);
  static void do_sql_command(PGconn *conn, const char *query);
  static void check_sql_result(PGresult *res, PGconn *conn, const char *query,
  				 ExecStatusType expected);
! 
  
  int
  main(int argc, char **argv)
--- 257,263 ----
  static void do_sql_command(PGconn *conn, const char *query);
  static void check_sql_result(PGresult *res, PGconn *conn, const char *query,
  				 ExecStatusType expected);
! static void set_section(const char *arg);
  
  int
  main(int argc, char **argv)
***************
*** 330,335 **** main(int argc, char **argv)
--- 340,346 ----
  		{"quote-all-identifiers", no_argument, &quote_all_identifiers, 1},
  		{"role", required_argument, NULL, 3},
  		{"serializable-deferrable", no_argument, &serializable_deferrable, 1},
+ 		{"section", required_argument, NULL, 5},
  		{"use-set-session-authorization", no_argument, &use_setsessauth, 1},
  		{"no-security-labels", no_argument, &no_security_labels, 1},
  		{"no-unlogged-table-data", no_argument, &no_unlogged_table_data, 1},
***************
*** 346,351 **** main(int argc, char **argv)
--- 357,363 ----
  	strcpy(g_opaque_type, "opaque");
  
  	dataOnly = schemaOnly = false;
+ 	dumpSections = DUMP_UNSECTIONED;
  	lockWaitTimeout = NULL;
  
  	progname = get_progname(argv[0]);
***************
*** 487,492 **** main(int argc, char **argv)
--- 499,508 ----
  				use_role = optarg;
  				break;
  
+ 			case 5:				/* section */
+ 				set_section(optarg);
+ 				break;
+ 
  			default:
  				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
  				exit(1);
***************
*** 517,522 **** main(int argc, char **argv)
--- 533,554 ----
  		exit(1);
  	}
  
+ 	if ((dataOnly || schemaOnly) && dumpSections != DUMP_UNSECTIONED)
+ 	{
+ 		write_msg(NULL, "options -s/--schema-only and -a/--data-only cannot be used with --section\n");
+ 		exit(1);
+ 	}
+ 	
+ 	if (dataOnly)
+ 		dumpSections = DUMP_DATA;
+ 	else if (schemaOnly)
+ 		dumpSections = DUMP_PRE_DATA | DUMP_POST_DATA;
+ 	else if ( dumpSections != DUMP_UNSECTIONED)
+ 	{
+ 		dataOnly = dumpSections == DUMP_DATA;
+ 		schemaOnly = !(dumpSections & DUMP_DATA);
+ 	}
+ 
  	if (dataOnly && outputClean)
  	{
  		write_msg(NULL, "options -c/--clean and -a/--data-only cannot be used together\n");
***************
*** 859,864 **** help(const char *progname)
--- 891,897 ----
  	printf(_("  --no-tablespaces            do not dump tablespace assignments\n"));
  	printf(_("  --no-unlogged-table-data    do not dump unlogged table data\n"));
  	printf(_("  --quote-all-identifiers     quote all identifiers, even if not key words\n"));
+ 	printf(_("  --section=SECTION           dump named section (pre-data, data or post-data\n"));
  	printf(_("  --serializable-deferrable   wait until the dump can run without anomalies\n"));
  	printf(_("  --use-set-session-authorization\n"
  			 "                              use SET SESSION AUTHORIZATION commands instead of\n"
***************
*** 7038,7043 **** collectComments(Archive *fout, CommentItem **items)
--- 7071,7098 ----
  static void
  dumpDumpableObject(Archive *fout, DumpableObject *dobj)
  {
+ 
+ 	int skip = 0;
+ 
+ 	switch (dobj->objType)
+ 	{
+ 		case DO_INDEX:
+ 		case DO_TRIGGER:
+ 		case DO_CONSTRAINT:
+ 		case DO_FK_CONSTRAINT:
+ 		case DO_RULE:
+ 			skip = !(dumpSections & DUMP_POST_DATA);
+ 			break;
+ 		case DO_TABLE_DATA:
+ 			skip = !(dumpSections & DUMP_DATA);
+ 			break;
+ 		default:
+ 			skip = !(dumpSections & DUMP_PRE_DATA);
+ 	}
+ 
+ 	if (skip)
+ 		return;
+    
  	switch (dobj->objType)
  	{
  		case DO_NAMESPACE:
***************
*** 14402,14404 **** check_sql_result(PGresult *res, PGconn *conn, const char *query,
--- 14457,14481 ----
  	write_msg(NULL, "The command was: %s\n", query);
  	exit_nicely();
  }
+ 
+ static void set_section (const char *arg)
+ {
+ 	/* if this is the first, clear all the bits */
+ 	if (dumpSections == DUMP_UNSECTIONED)
+ 		dumpSections = 0; 
+ 	
+ 	if (strcmp(arg,"pre-data") == 0)
+ 		dumpSections |= DUMP_PRE_DATA;
+ 	else if (strcmp(arg,"data") == 0)
+ 		dumpSections |= DUMP_DATA;
+ 	else if (strcmp(arg,"post-data") == 0)
+ 		dumpSections |= DUMP_POST_DATA;
+ 	else
+ 	{
+ 		fprintf(stderr, _("%s: unknown section name \"%s\")\n"),
+ 				progname, arg);
+ 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ 				progname);
+ 		exit(1);
+ 	}
+ }
#27Matteo Beccati
php@beccati.com
In reply to: Andrew Dunstan (#26)
Re: pg_restore --no-post-data and --post-data-only

Hi Andrew,

On 13/11/2011 02:56, Andrew Dunstan wrote:

Here is a patch for that for pg_dump. The sections provided for are
pre-data, data and post-data, as discussed elsewhere. I still feel that
anything finer grained should be handled via pg_restore's --use-list
functionality. I'll provide a patch to do the same switch for pg_restore
shortly.

Adding to the commitfest.

FWIW, I've tested the patch as I've recently needed to build a custom
splitting script for a project and the patch seemed to be a much more
elegant solution. As far as I can tell, it works great and the output
matches the result of my script.

The only little thing I've noticed is a missing ending ")" in the --help
message.

Cheers
--
Matteo Beccati

Development & Consulting - http://www.beccati.com/

#28Andrew Dunstan
andrew@dunslane.net
In reply to: Andrew Dunstan (#26)
1 attachment(s)
Re: pg_restore --no-post-data and --post-data-only

On Sat, November 12, 2011 8:56 pm, Andrew Dunstan wrote:

On 08/26/2011 05:11 PM, Tom Lane wrote:

Alvaro Herrera<alvherre@commandprompt.com> writes:

The "--section=data --section=indexes" proposal seems very reasonable
to
me -- more so than "--sections='data indexes'".

+1 ... not only easier to code and less squishily defined, but more like
the existing precedent for other pg_dump switches, such as --table.

Here is a patch for that for pg_dump. The sections provided for are
pre-data, data and post-data, as discussed elsewhere. I still feel that
anything finer grained should be handled via pg_restore's --use-list
functionality. I'll provide a patch to do the same switch for pg_restore
shortly.

Adding to the commitfest.

Updated version with pg_restore included is attached.

cheers

andrew

Attachments:

dump_sections.patchtext/x-patch; charset=iso-8859-1; name=dump_sections.patchDownload
*** a/doc/src/sgml/ref/pg_dump.sgml
--- b/doc/src/sgml/ref/pg_dump.sgml
***************
*** 116,124 **** PostgreSQL documentation
         </para>
  
         <para>
!         This option is only meaningful for the plain-text format.  For
!         the archive formats, you can specify the option when you
!         call <command>pg_restore</command>.
         </para>
        </listitem>
       </varlistentry>
--- 116,122 ----
         </para>
  
         <para>
! 		This option is equivalent to specifying <option>--section=data</>.
         </para>
        </listitem>
       </varlistentry>
***************
*** 404,413 **** PostgreSQL documentation
--- 402,431 ----
         <para>
          Dump only the object definitions (schema), not data.
         </para>
+        <para>
+ 		This option is equivalent to specifying 
+ 		<option>--section=pre-data --section=post-data</>.
+        </para>
        </listitem>
       </varlistentry>
  
       <varlistentry>
+ 	   <term><option>--section=<replaceable class="parameter">sectionname</replaceable></option></term>
+ 	   <listitem>
+ 		 <para>
+ 		   Only dump the named section. The name can be one of <option>pre-data</>, <option>data</> 
+            and <option>post-data</>. 
+ 		   This option can be specified more than once. The default is to dump all sections.
+ 		 </para>
+          <para>
+ 		   Post-data items consist of definitions of indexes, triggers, rules 
+ 		   and constraints other than check constraints. 
+ 		   Pre-data items consist of all other data definition items.
+ 		 </para>
+ 	   </listitem>
+ 	 </varlistentry>
+ 
+      <varlistentry>
        <term><option>-S <replaceable class="parameter">username</replaceable></option></term>
        <term><option>--superuser=<replaceable class="parameter">username</replaceable></option></term>
        <listitem>
*** a/doc/src/sgml/ref/pg_restore.sgml
--- b/doc/src/sgml/ref/pg_restore.sgml
***************
*** 93,98 ****
--- 93,101 ----
         <para>
          Restore only the data, not the schema (data definitions).
         </para>
+        <para>
+ 		This option is equivalent to specifying <option>--section=data</>.
+        </para>
        </listitem>
       </varlistentry>
  
***************
*** 359,364 ****
--- 362,371 ----
          (Do not confuse this with the <option>--schema</> option, which
          uses the word <quote>schema</> in a different meaning.)
         </para>
+        <para>
+ 		This option is equivalent to specifying 
+ 		<option>--section=pre-data --section=post-data</>.
+        </para>
        </listitem>
       </varlistentry>
  
***************
*** 505,510 ****
--- 512,533 ----
       </varlistentry>
  
       <varlistentry>
+ 	   <term><option>--section=<replaceable class="parameter">sectionname</replaceable></option></term>
+ 	   <listitem>
+ 		 <para>
+ 		   Only restore the named section. The name can be one of <option>pre-data</>, <option>data</> 
+            and <option>post-data</>. 
+ 		   This option can be specified more than once. The default is to restore all sections.
+ 		 </para>
+          <para>
+ 		   Post-data items consist of definitions of indexes, triggers, rules 
+ 		   and constraints other than check constraints. 
+ 		   Pre-data items consist of all other data definition items.
+ 		 </para>
+ 	   </listitem>
+ 	 </varlistentry>
+ 
+      <varlistentry>
        <term><option>--use-set-session-authorization</option></term>
        <listitem>
         <para>
*** a/src/bin/pg_dump/pg_backup.h
--- b/src/bin/pg_dump/pg_backup.h
***************
*** 69,74 **** typedef enum _teSection
--- 69,82 ----
  	SECTION_POST_DATA			/* stuff to be processed after data */
  } teSection;
  
+ typedef enum 
+ {
+ 	DUMP_PRE_DATA = 0x01,
+ 	DUMP_DATA = 0x02,
+ 	DUMP_POST_DATA = 0x04,
+ 	DUMP_UNSECTIONED = 0xff
+ } DumpSections;
+ 
  /*
   *	We may want to have some more user-readable data, but in the mean
   *	time this gives us some abstraction and type checking.
***************
*** 111,116 **** typedef struct _restoreOptions
--- 119,125 ----
  	int			dropSchema;
  	char	   *filename;
  	int			schemaOnly;
+ 	int         dumpSections;
  	int			verbose;
  	int			aclsSkip;
  	int			tocSummary;
*** a/src/bin/pg_dump/pg_backup_archiver.c
--- b/src/bin/pg_dump/pg_backup_archiver.c
***************
*** 665,670 **** NewRestoreOptions(void)
--- 665,671 ----
  	/* set any fields that shouldn't default to zeroes */
  	opts->format = archUnknown;
  	opts->promptPassword = TRI_DEFAULT;
+ 	opts->dumpSections = DUMP_UNSECTIONED;
  
  	return opts;
  }
***************
*** 2163,2168 **** ReadToc(ArchiveHandle *AH)
--- 2164,2170 ----
  	int			depIdx;
  	int			depSize;
  	TocEntry   *te;
+ 	bool        in_post_data = false;
  
  	AH->tocCount = ReadInt(AH);
  	AH->maxDumpId = 0;
***************
*** 2228,2233 **** ReadToc(ArchiveHandle *AH)
--- 2230,2241 ----
  				te->section = SECTION_PRE_DATA;
  		}
  
+ 		/* will stay true even for SECTION_NONE items */
+ 		if (te->section == SECTION_POST_DATA)
+ 			in_post_data = true;
+ 		
+ 		te->inPostData = in_post_data;
+ 
  		te->defn = ReadStr(AH);
  		te->dropStmt = ReadStr(AH);
  
***************
*** 2377,2382 **** _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls)
--- 2385,2401 ----
  	if (!ropt->createDB && strcmp(te->desc, "DATABASE") == 0)
  		return 0;
  
+ 	/* skip (all but) post data section as required */
+ 	/* table data is filtered if necessary lower down */
+ 	if (ropt->dumpSections != DUMP_UNSECTIONED)
+ 	{
+ 		if (!(ropt->dumpSections & DUMP_POST_DATA) && te->inPostData)
+ 			return 0;
+ 		if (!(ropt->dumpSections & DUMP_PRE_DATA) && ! te->inPostData && strcmp(te->desc, "TABLE DATA") != 0)
+ 			return 0;
+ 	}
+ 
+ 
  	/* Check options for selective dump/restore */
  	if (ropt->schemaNames)
  	{
*** a/src/bin/pg_dump/pg_backup_archiver.h
--- b/src/bin/pg_dump/pg_backup_archiver.h
***************
*** 287,292 **** typedef struct _tocEntry
--- 287,295 ----
  	void	   *dataDumperArg;	/* Arg for above routine */
  	void	   *formatData;		/* TOC Entry data specific to file format */
  
+ 	/* in post data? not quite the same as section, might be SECTION_NONE */
+ 	bool        inPostData;     
+ 
  	/* working state (needed only for parallel restore) */
  	struct _tocEntry *par_prev; /* list links for pending/ready items; */
  	struct _tocEntry *par_next; /* these are NULL if not in either list */
*** a/src/bin/pg_dump/pg_dump.c
--- b/src/bin/pg_dump/pg_dump.c
***************
*** 91,96 **** PGconn	   *g_conn;				/* the database connection */
--- 91,97 ----
  /* various user-settable parameters */
  bool		schemaOnly;
  bool		dataOnly;
+ int         dumpSections; /* bitmask of chosen sections */
  bool		aclsSkip;
  const char *lockWaitTimeout;
  
***************
*** 247,253 **** static const char *fmtCopyColumnList(const TableInfo *ti);
  static void do_sql_command(PGconn *conn, const char *query);
  static void check_sql_result(PGresult *res, PGconn *conn, const char *query,
  				 ExecStatusType expected);
! 
  
  int
  main(int argc, char **argv)
--- 248,254 ----
  static void do_sql_command(PGconn *conn, const char *query);
  static void check_sql_result(PGresult *res, PGconn *conn, const char *query,
  				 ExecStatusType expected);
! static void set_section(const char *arg);
  
  int
  main(int argc, char **argv)
***************
*** 330,335 **** main(int argc, char **argv)
--- 331,337 ----
  		{"quote-all-identifiers", no_argument, &quote_all_identifiers, 1},
  		{"role", required_argument, NULL, 3},
  		{"serializable-deferrable", no_argument, &serializable_deferrable, 1},
+ 		{"section", required_argument, NULL, 5},
  		{"use-set-session-authorization", no_argument, &use_setsessauth, 1},
  		{"no-security-labels", no_argument, &no_security_labels, 1},
  		{"no-unlogged-table-data", no_argument, &no_unlogged_table_data, 1},
***************
*** 346,351 **** main(int argc, char **argv)
--- 348,354 ----
  	strcpy(g_opaque_type, "opaque");
  
  	dataOnly = schemaOnly = false;
+ 	dumpSections = DUMP_UNSECTIONED;
  	lockWaitTimeout = NULL;
  
  	progname = get_progname(argv[0]);
***************
*** 487,492 **** main(int argc, char **argv)
--- 490,499 ----
  				use_role = optarg;
  				break;
  
+ 			case 5:				/* section */
+ 				set_section(optarg);
+ 				break;
+ 
  			default:
  				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
  				exit(1);
***************
*** 517,522 **** main(int argc, char **argv)
--- 524,545 ----
  		exit(1);
  	}
  
+ 	if ((dataOnly || schemaOnly) && dumpSections != DUMP_UNSECTIONED)
+ 	{
+ 		write_msg(NULL, "options -s/--schema-only and -a/--data-only cannot be used with --section\n");
+ 		exit(1);
+ 	}
+ 	
+ 	if (dataOnly)
+ 		dumpSections = DUMP_DATA;
+ 	else if (schemaOnly)
+ 		dumpSections = DUMP_PRE_DATA | DUMP_POST_DATA;
+ 	else if ( dumpSections != DUMP_UNSECTIONED)
+ 	{
+ 		dataOnly = dumpSections == DUMP_DATA;
+ 		schemaOnly = !(dumpSections & DUMP_DATA);
+ 	}
+ 
  	if (dataOnly && outputClean)
  	{
  		write_msg(NULL, "options -c/--clean and -a/--data-only cannot be used together\n");
***************
*** 859,864 **** help(const char *progname)
--- 882,888 ----
  	printf(_("  --no-tablespaces            do not dump tablespace assignments\n"));
  	printf(_("  --no-unlogged-table-data    do not dump unlogged table data\n"));
  	printf(_("  --quote-all-identifiers     quote all identifiers, even if not key words\n"));
+ 	printf(_("  --section=SECTION           dump named section (pre-data, data or post-data)\n"));
  	printf(_("  --serializable-deferrable   wait until the dump can run without anomalies\n"));
  	printf(_("  --use-set-session-authorization\n"
  			 "                              use SET SESSION AUTHORIZATION commands instead of\n"
***************
*** 7038,7043 **** collectComments(Archive *fout, CommentItem **items)
--- 7062,7089 ----
  static void
  dumpDumpableObject(Archive *fout, DumpableObject *dobj)
  {
+ 
+ 	int skip = 0;
+ 
+ 	switch (dobj->objType)
+ 	{
+ 		case DO_INDEX:
+ 		case DO_TRIGGER:
+ 		case DO_CONSTRAINT:
+ 		case DO_FK_CONSTRAINT:
+ 		case DO_RULE:
+ 			skip = !(dumpSections & DUMP_POST_DATA);
+ 			break;
+ 		case DO_TABLE_DATA:
+ 			skip = !(dumpSections & DUMP_DATA);
+ 			break;
+ 		default:
+ 			skip = !(dumpSections & DUMP_PRE_DATA);
+ 	}
+ 
+ 	if (skip)
+ 		return;
+    
  	switch (dobj->objType)
  	{
  		case DO_NAMESPACE:
***************
*** 14402,14404 **** check_sql_result(PGresult *res, PGconn *conn, const char *query,
--- 14448,14473 ----
  	write_msg(NULL, "The command was: %s\n", query);
  	exit_nicely();
  }
+ 
+ static void 
+ set_section (const char *arg)
+ {
+ 	/* if this is the first, clear all the bits */
+ 	if (dumpSections == DUMP_UNSECTIONED)
+ 		dumpSections = 0; 
+ 	
+ 	if (strcmp(arg,"pre-data") == 0)
+ 		dumpSections |= DUMP_PRE_DATA;
+ 	else if (strcmp(arg,"data") == 0)
+ 		dumpSections |= DUMP_DATA;
+ 	else if (strcmp(arg,"post-data") == 0)
+ 		dumpSections |= DUMP_POST_DATA;
+ 	else
+ 	{
+ 		fprintf(stderr, _("%s: unknown section name \"%s\")\n"),
+ 				progname, arg);
+ 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ 				progname);
+ 		exit(1);
+ 	}
+ }
*** a/src/bin/pg_dump/pg_restore.c
--- b/src/bin/pg_dump/pg_restore.c
***************
*** 61,66 **** extern int	optind;
--- 61,67 ----
  
  
  static void usage(const char *progname);
+ static void set_section (int *dumpSections, const char *arg);
  
  typedef struct option optType;
  
***************
*** 116,121 **** main(int argc, char **argv)
--- 117,123 ----
  		{"no-data-for-failed-tables", no_argument, &no_data_for_failed_tables, 1},
  		{"no-tablespaces", no_argument, &outputNoTablespaces, 1},
  		{"role", required_argument, NULL, 2},
+ 		{"section", required_argument, NULL, 3},
  		{"use-set-session-authorization", no_argument, &use_setsessauth, 1},
  		{"no-security-labels", no_argument, &no_security_labels, 1},
  
***************
*** 270,275 **** main(int argc, char **argv)
--- 272,281 ----
  				opts->use_role = optarg;
  				break;
  
+ 			case 3:				/* section */
+ 				set_section(&(opts->dumpSections), optarg);
+ 				break;
+ 
  			default:
  				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
  				exit(1);
***************
*** 292,297 **** main(int argc, char **argv)
--- 298,327 ----
  		exit(1);
  	}
  
+ 	if (opts->dataOnly && opts->schemaOnly)
+ 	{
+ 		fprintf(stderr, _("%s: options -s/--schema-only and -a/--data-only cannot be used together\n"),
+ 			progname);
+ 		exit(1);
+ 	}
+ 
+ 	if ((opts->dataOnly || opts->schemaOnly) && (opts->dumpSections != DUMP_UNSECTIONED))
+ 	{
+ 		fprintf(stderr, _("%s: options -s/--schema-only and -a/--data-only cannot be used with --section\n"),
+ 			progname);
+ 		exit(1);
+ 	}
+ 	
+ 	if (opts->dataOnly)
+ 		opts->dumpSections = DUMP_DATA;
+ 	else if (opts->schemaOnly)
+ 		opts->dumpSections = DUMP_PRE_DATA | DUMP_POST_DATA;
+ 	else if ( opts->dumpSections != DUMP_UNSECTIONED)
+ 	{
+ 		opts->dataOnly = opts->dumpSections == DUMP_DATA;
+ 		opts->schemaOnly = !(opts->dumpSections & DUMP_DATA);
+ 	}
+ 
  	/* Should get at most one of -d and -f, else user is confused */
  	if (opts->dbname)
  	{
***************
*** 432,437 **** usage(const char *progname)
--- 462,468 ----
  			 "                           created\n"));
  	printf(_("  --no-security-labels     do not restore security labels\n"));
  	printf(_("  --no-tablespaces         do not restore tablespace assignments\n"));
+ 	printf(_("  --section=SECTION        restore named section (pre-data, data or post-data)\n"));
  	printf(_("  --use-set-session-authorization\n"
  			 "                           use SET SESSION AUTHORIZATION commands instead of\n"
  	  "                           ALTER OWNER commands to set ownership\n"));
***************
*** 447,449 **** usage(const char *progname)
--- 478,503 ----
  	printf(_("\nIf no input file name is supplied, then standard input is used.\n\n"));
  	printf(_("Report bugs to <pgsql-bugs@postgresql.org>.\n"));
  }
+ 
+ static void 
+ set_section (int *dumpSections, const char *arg)
+ {
+ 	/* if this is the first, clear all the bits */
+ 	if (*dumpSections == DUMP_UNSECTIONED)
+ 		*dumpSections = 0; 
+ 	
+ 	if (strcmp(arg,"pre-data") == 0)
+ 		*dumpSections |= DUMP_PRE_DATA;
+ 	else if (strcmp(arg,"data") == 0)
+ 		*dumpSections |= DUMP_DATA;
+ 	else if (strcmp(arg,"post-data") == 0)
+ 		*dumpSections |= DUMP_POST_DATA;
+ 	else
+ 	{
+ 		fprintf(stderr, _("%s: unknown section name \"%s\")\n"),
+ 				progname, arg);
+ 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ 				progname);
+ 		exit(1);
+ 	}
+ }
#29Joshua Berkus
josh@agliodbs.com
In reply to: Andrew Dunstan (#28)
Re: pg_restore --no-post-data and --post-data-only

Here is a patch for that for pg_dump. The sections provided for are
pre-data, data and post-data, as discussed elsewhere. I still feel that
anything finer grained should be handled via pg_restore's --use-list
functionality. I'll provide a patch to do the same switch for pg_restore
shortly.

Adding to the commitfest.

Updated version with pg_restore included is attached.

Functionality review:

I have tested the backported version of this patch using a 500GB production database with over 200 objects and it worked as specified.

This functionality is extremely useful for the a variety of selective copying of databases, including creating shrunken test instances, ad-hoc parallel dump, differently indexed copies, and sanitizing copies of sensitive data, and even bringing the database up for usage while the indexes are still building.

Note that this feature has the odd effect that some constraints are loaded at the same time as the tables and some are loaded with the post-data. This is consistent with how text-mode pg_dump has always worked, but will seem odd to the user. This also raises the possibility of a future pg_dump/pg_restore optimization.

--
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com
San Francisco

#30Robert Haas
robertmhaas@gmail.com
In reply to: Joshua Berkus (#29)
Re: pg_restore --no-post-data and --post-data-only

On Tue, Nov 15, 2011 at 8:19 PM, Joshua Berkus <josh@agliodbs.com> wrote:

Here is a patch for that for pg_dump. The sections provided for are
pre-data, data and post-data, as discussed elsewhere. I still feel that
anything finer grained should be handled via pg_restore's --use-list
functionality. I'll provide a patch to do the same switch for pg_restore
shortly.

Adding to the commitfest.

Updated version with pg_restore included is attached.

Functionality review:

I have tested the backported version of this patch using a 500GB production database with over 200 objects and it worked as specified.

This functionality is extremely useful for the a variety of selective copying of databases, including creating shrunken test instances, ad-hoc parallel dump, differently indexed copies, and sanitizing copies of sensitive data, and even bringing the database up for usage while the indexes are still building.

Note that this feature has the odd effect that some constraints are loaded at the same time as the tables and some are loaded with the post-data.  This is consistent with how text-mode pg_dump has always worked, but will seem odd to the user.  This also raises the possibility of a future pg_dump/pg_restore optimization.

That does seem odd. Why do we do it that way?

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#31Josh Berkus
josh@agliodbs.com
In reply to: Robert Haas (#30)
Re: pg_restore --no-post-data and --post-data-only

Note that this feature has the odd effect that some constraints are loaded at the same time as the tables and some are loaded with the post-data. This is consistent with how text-mode pg_dump has always worked, but will seem odd to the user. This also raises the possibility of a future pg_dump/pg_restore optimization.

That does seem odd. Why do we do it that way?

Beats me.

--
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com

#32Tom Lane
tgl@sss.pgh.pa.us
In reply to: Josh Berkus (#31)
Re: pg_restore --no-post-data and --post-data-only

Josh Berkus <josh@agliodbs.com> writes:

Note that this feature has the odd effect that some constraints are loaded at the same time as the tables and some are loaded with the post-data. This is consistent with how text-mode pg_dump has always worked, but will seem odd to the user. This also raises the possibility of a future pg_dump/pg_restore optimization.

That does seem odd. Why do we do it that way?

Beats me.

Performance, mostly --- we prefer to apply checks during the original
data load if possible, but for indexes and FK constraints it's faster to
apply them later. Also, we can separate constraints from the original
table declaration if it's necessary to break a reference circularity.
This isn't something that would be wise to whack around.

regards, tom lane

#33Andrew Dunstan
andrew@dunslane.net
In reply to: Tom Lane (#32)
Re: pg_restore --no-post-data and --post-data-only

On 12/07/2011 11:31 AM, Tom Lane wrote:

Josh Berkus<josh@agliodbs.com> writes:

Note that this feature has the odd effect that some constraints are loaded at the same time as the tables and some are loaded with the post-data. This is consistent with how text-mode pg_dump has always worked, but will seem odd to the user. This also raises the possibility of a future pg_dump/pg_restore optimization.

That does seem odd. Why do we do it that way?

Beats me.

Performance, mostly --- we prefer to apply checks during the original
data load if possible, but for indexes and FK constraints it's faster to
apply them later. Also, we can separate constraints from the original
table declaration if it's necessary to break a reference circularity.
This isn't something that would be wise to whack around.

Yeah, and if we did want to change it that should be a TODO and not hold
up this feature.

cheers

andrew

#34Joachim Wieland
joe@mcknight.de
In reply to: Andrew Dunstan (#28)
Re: pg_restore --no-post-data and --post-data-only

On Tue, Nov 15, 2011 at 6:14 PM, Andrew Dunstan <andrew@dunslane.net> wrote:

Updated version with pg_restore included is attached.

The patch applies with some fuzz by now but compiles without errors or warnings.

The feature just works, it is not adding a lot of new code, basically
it parses the given options and then skips over steps depending on the
selected section.

I verified the equivalence of -a and -s to the respective sections in
the different archive formats and no surprise here either, they were
equivalent except for the header (which has a timestamp).

If you ask pg_restore to restore a section out of an archive which
doesn't have this section, there is no error and the command just
succeeds. This is what I expected and I think it's the right thing to
do but maybe others think that
there should be a warning.

In pg_restore, pre-data cannot be run in parallel, it would only run
serially, data and post-data can run in parallel, though. This is also
what I had expected but it might be worth to add a note about this to
the documentation.

What I didn't like about the implementation was the two set_section()
functions, I'd prefer them to move to a file that is shared between
pg_dump and pg_restore and become one function...

Minor issues:

{"section", required_argument, NULL, 5} in pg_dump.c is not in the alphabetical
order of the options.

./pg_restore --section=foobar
pg_restore: unknown section name "foobar")

Note the trailing ')', it's coming from a _(...) confusion

Some of the lines in the patch have trailing spaces and in the
documentation part tabs and spaces are mixed.

int skip used as bool skip in dumpDumpableObject()

Joachim

#35Josh Berkus
josh@agliodbs.com
In reply to: Joachim Wieland (#34)
Re: pg_restore --no-post-data and --post-data-only

On 12/8/11 9:18 PM, Joachim Wieland wrote:

If you ask pg_restore to restore a section out of an archive which
doesn't have this section, there is no error and the command just
succeeds. This is what I expected and I think it's the right thing to
do but maybe others think that
there should be a warning.

Andrew and I discussed this previously. It's consistent with how we
treat other options in pg_restore. It may be that we should be
consistently treating all options differently, but I don't think that's
specific to this patch.

--
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com

#36Andrew Dunstan
andrew@dunslane.net
In reply to: Joachim Wieland (#34)
1 attachment(s)
Re: pg_restore --no-post-data and --post-data-only

On 12/08/2011 09:18 PM, Joachim Wieland wrote:

On Tue, Nov 15, 2011 at 6:14 PM, Andrew Dunstan<andrew@dunslane.net> wrote:

Updated version with pg_restore included is attached.

The patch applies with some fuzz by now but compiles without errors or warnings.

The feature just works, it is not adding a lot of new code, basically
it parses the given options and then skips over steps depending on the
selected section.

I verified the equivalence of -a and -s to the respective sections in
the different archive formats and no surprise here either, they were
equivalent except for the header (which has a timestamp).

If you ask pg_restore to restore a section out of an archive which
doesn't have this section, there is no error and the command just
succeeds. This is what I expected and I think it's the right thing to
do but maybe others think that
there should be a warning.

In pg_restore, pre-data cannot be run in parallel, it would only run
serially, data and post-data can run in parallel, though. This is also
what I had expected but it might be worth to add a note about this to
the documentation.

This is true now of parallel restore, and is by design (see debates from
the time.)

What I didn't like about the implementation was the two set_section()
functions, I'd prefer them to move to a file that is shared between
pg_dump and pg_restore and become one function...

Done

Minor issues:

{"section", required_argument, NULL, 5} in pg_dump.c is not in the alphabetical
order of the options.

./pg_restore --section=foobar
pg_restore: unknown section name "foobar")

Note the trailing ')', it's coming from a _(...) confusion

Some of the lines in the patch have trailing spaces and in the
documentation part tabs and spaces are mixed.

int skip used as bool skip in dumpDumpableObject()

Should all be fixed. Revised patch attached.

cheers

andrew

Attachments:

dump_sections3.patchtext/x-patch; name=dump_sections3.patchDownload
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index f6f33de..b16b429 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -116,9 +116,7 @@ PostgreSQL documentation
        </para>
 
        <para>
-        This option is only meaningful for the plain-text format.  For
-        the archive formats, you can specify the option when you
-        call <command>pg_restore</command>.
+        This option is equivalent to specifying <option>--section=data</>.
        </para>
       </listitem>
      </varlistentry>
@@ -404,10 +402,30 @@ PostgreSQL documentation
        <para>
         Dump only the object definitions (schema), not data.
        </para>
+       <para>
+        This option is equivalent to specifying 
+        <option>--section=pre-data --section=post-data</>.
+       </para>
       </listitem>
      </varlistentry>
 
      <varlistentry>
+       <term><option>--section=<replaceable class="parameter">sectionname</replaceable></option></term>
+       <listitem>
+         <para>
+           Only dump the named section. The name can be one of <option>pre-data</>, <option>data</> 
+           and <option>post-data</>. 
+           This option can be specified more than once. The default is to dump all sections.
+         </para>
+         <para>
+           Post-data items consist of definitions of indexes, triggers, rules 
+           and constraints other than check constraints. 
+           Pre-data items consist of all other data definition items.
+         </para>
+       </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>-S <replaceable class="parameter">username</replaceable></option></term>
       <term><option>--superuser=<replaceable class="parameter">username</replaceable></option></term>
       <listitem>
diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml
index be11d17..a28faf8 100644
--- a/doc/src/sgml/ref/pg_restore.sgml
+++ b/doc/src/sgml/ref/pg_restore.sgml
@@ -93,6 +93,9 @@
        <para>
         Restore only the data, not the schema (data definitions).
        </para>
+       <para>
+        This option is equivalent to specifying <option>--section=data</>.
+       </para>
       </listitem>
      </varlistentry>
 
@@ -359,6 +362,10 @@
         (Do not confuse this with the <option>--schema</> option, which
         uses the word <quote>schema</> in a different meaning.)
        </para>
+       <para>
+        This option is equivalent to specifying 
+        <option>--section=pre-data --section=post-data</>.
+       </para>
       </listitem>
      </varlistentry>
 
@@ -505,6 +512,22 @@
      </varlistentry>
 
      <varlistentry>
+       <term><option>--section=<replaceable class="parameter">sectionname</replaceable></option></term>
+       <listitem>
+         <para>
+           Only restore the named section. The name can be one of <option>pre-data</>, <option>data</> 
+           and <option>post-data</>. 
+           This option can be specified more than once. The default is to restore all sections.
+         </para>
+         <para>
+           Post-data items consist of definitions of indexes, triggers, rules 
+           and constraints other than check constraints. 
+           Pre-data items consist of all other data definition items.
+         </para>
+       </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>--use-set-session-authorization</option></term>
       <listitem>
        <para>
diff --git a/src/bin/pg_dump/dumputils.c b/src/bin/pg_dump/dumputils.c
index 5dab967..659ec06 100644
--- a/src/bin/pg_dump/dumputils.c
+++ b/src/bin/pg_dump/dumputils.c
@@ -17,6 +17,7 @@
 #include <ctype.h>
 
 #include "dumputils.h"
+#include "pg_backup.h"
 
 #include "parser/keywords.h"
 
@@ -1262,3 +1263,32 @@ exit_horribly(const char *modulename, const char *fmt,...)
 
 	exit(1);
 }
+
+/*
+ * Set the bitmask in dumpSections according to the first argument.
+ * dumpSections is initialised as DUMP_UNSECTIONED by pg_dump and
+ * pg_restore so they can know if this has even been called.
+ */
+
+void
+set_section (const char *arg, int *dumpSections)
+{
+	/* if this is the first, clear all the bits */
+	if (*dumpSections == DUMP_UNSECTIONED)
+		*dumpSections = 0;
+
+	if (strcmp(arg,"pre-data") == 0)
+		*dumpSections |= DUMP_PRE_DATA;
+	else if (strcmp(arg,"data") == 0)
+		*dumpSections |= DUMP_DATA;
+	else if (strcmp(arg,"post-data") == 0)
+		*dumpSections |= DUMP_POST_DATA;
+	else
+	{
+		fprintf(stderr, _("%s: unknown section name \"%s\")\n"),
+				progname, arg);
+		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+				progname);
+		exit(1);
+	}
+}
diff --git a/src/bin/pg_dump/dumputils.h b/src/bin/pg_dump/dumputils.h
index c857f3a..f80dd27 100644
--- a/src/bin/pg_dump/dumputils.h
+++ b/src/bin/pg_dump/dumputils.h
@@ -58,5 +58,6 @@ extern void vwrite_msg(const char *modulename, const char *fmt, va_list ap)
 				__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 0)));
 extern void exit_horribly(const char *modulename, const char *fmt,...)
 				__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
+extern void set_section (const char *arg, int *dumpSections);
 
 #endif   /* DUMPUTILS_H */
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index 8168cff..8926488 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -69,6 +69,14 @@ typedef enum _teSection
 	SECTION_POST_DATA			/* stuff to be processed after data */
 } teSection;
 
+typedef enum
+{
+	DUMP_PRE_DATA = 0x01,
+	DUMP_DATA = 0x02,
+	DUMP_POST_DATA = 0x04,
+	DUMP_UNSECTIONED = 0xff
+} DumpSections;
+
 /*
  *	We may want to have some more user-readable data, but in the mean
  *	time this gives us some abstraction and type checking.
@@ -111,6 +119,7 @@ typedef struct _restoreOptions
 	int			dropSchema;
 	char	   *filename;
 	int			schemaOnly;
+	int         dumpSections;
 	int			verbose;
 	int			aclsSkip;
 	int			tocSummary;
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 6612309..7d895c4 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -665,6 +665,7 @@ NewRestoreOptions(void)
 	/* set any fields that shouldn't default to zeroes */
 	opts->format = archUnknown;
 	opts->promptPassword = TRI_DEFAULT;
+	opts->dumpSections = DUMP_UNSECTIONED;
 
 	return opts;
 }
@@ -2120,6 +2121,7 @@ ReadToc(ArchiveHandle *AH)
 	int			depIdx;
 	int			depSize;
 	TocEntry   *te;
+	bool        in_post_data = false;
 
 	AH->tocCount = ReadInt(AH);
 	AH->maxDumpId = 0;
@@ -2185,6 +2187,12 @@ ReadToc(ArchiveHandle *AH)
 				te->section = SECTION_PRE_DATA;
 		}
 
+		/* will stay true even for SECTION_NONE items */
+		if (te->section == SECTION_POST_DATA)
+			in_post_data = true;
+
+		te->inPostData = in_post_data;
+
 		te->defn = ReadStr(AH);
 		te->dropStmt = ReadStr(AH);
 
@@ -2334,6 +2342,17 @@ _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include_acls)
 	if (!ropt->createDB && strcmp(te->desc, "DATABASE") == 0)
 		return 0;
 
+	/* skip (all but) post data section as required */
+	/* table data is filtered if necessary lower down */
+	if (ropt->dumpSections != DUMP_UNSECTIONED)
+	{
+		if (!(ropt->dumpSections & DUMP_POST_DATA) && te->inPostData)
+			return 0;
+		if (!(ropt->dumpSections & DUMP_PRE_DATA) && ! te->inPostData && strcmp(te->desc, "TABLE DATA") != 0)
+			return 0;
+	}
+
+
 	/* Check options for selective dump/restore */
 	if (ropt->schemaNames)
 	{
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 07a4b6e..7a4fd36 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -287,6 +287,9 @@ typedef struct _tocEntry
 	void	   *dataDumperArg;	/* Arg for above routine */
 	void	   *formatData;		/* TOC Entry data specific to file format */
 
+	/* in post data? not quite the same as section, might be SECTION_NONE */
+	bool        inPostData;
+
 	/* working state (needed only for parallel restore) */
 	struct _tocEntry *par_prev; /* list links for pending/ready items; */
 	struct _tocEntry *par_next; /* these are NULL if not in either list */
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index afeae6f..2515970 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -91,6 +91,7 @@ PGconn	   *g_conn;				/* the database connection */
 /* various user-settable parameters */
 bool		schemaOnly;
 bool		dataOnly;
+int         dumpSections; /* bitmask of chosen sections */
 bool		aclsSkip;
 const char *lockWaitTimeout;
 
@@ -248,7 +249,6 @@ static void do_sql_command(PGconn *conn, const char *query);
 static void check_sql_result(PGresult *res, PGconn *conn, const char *query,
 				 ExecStatusType expected);
 
-
 int
 main(int argc, char **argv)
 {
@@ -329,6 +329,7 @@ main(int argc, char **argv)
 		{"no-tablespaces", no_argument, &outputNoTablespaces, 1},
 		{"quote-all-identifiers", no_argument, &quote_all_identifiers, 1},
 		{"role", required_argument, NULL, 3},
+		{"section", required_argument, NULL, 5},
 		{"serializable-deferrable", no_argument, &serializable_deferrable, 1},
 		{"use-set-session-authorization", no_argument, &use_setsessauth, 1},
 		{"no-security-labels", no_argument, &no_security_labels, 1},
@@ -346,6 +347,7 @@ main(int argc, char **argv)
 	strcpy(g_opaque_type, "opaque");
 
 	dataOnly = schemaOnly = false;
+	dumpSections = DUMP_UNSECTIONED;
 	lockWaitTimeout = NULL;
 
 	progname = get_progname(argv[0]);
@@ -487,6 +489,10 @@ main(int argc, char **argv)
 				use_role = optarg;
 				break;
 
+			case 5:				/* section */
+				set_section(optarg, &dumpSections);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -517,6 +523,22 @@ main(int argc, char **argv)
 		exit(1);
 	}
 
+	if ((dataOnly || schemaOnly) && dumpSections != DUMP_UNSECTIONED)
+	{
+		write_msg(NULL, "options -s/--schema-only and -a/--data-only cannot be used with --section\n");
+		exit(1);
+	}
+
+	if (dataOnly)
+		dumpSections = DUMP_DATA;
+	else if (schemaOnly)
+		dumpSections = DUMP_PRE_DATA | DUMP_POST_DATA;
+	else if ( dumpSections != DUMP_UNSECTIONED)
+	{
+		dataOnly = dumpSections == DUMP_DATA;
+		schemaOnly = !(dumpSections & DUMP_DATA);
+	}
+
 	if (dataOnly && outputClean)
 	{
 		write_msg(NULL, "options -c/--clean and -a/--data-only cannot be used together\n");
@@ -859,6 +881,7 @@ help(const char *progname)
 	printf(_("  --no-tablespaces            do not dump tablespace assignments\n"));
 	printf(_("  --no-unlogged-table-data    do not dump unlogged table data\n"));
 	printf(_("  --quote-all-identifiers     quote all identifiers, even if not key words\n"));
+	printf(_("  --section=SECTION           dump named section (pre-data, data or post-data)\n"));
 	printf(_("  --serializable-deferrable   wait until the dump can run without anomalies\n"));
 	printf(_("  --use-set-session-authorization\n"
 			 "                              use SET SESSION AUTHORIZATION commands instead of\n"
@@ -7068,6 +7091,28 @@ collectComments(Archive *fout, CommentItem **items)
 static void
 dumpDumpableObject(Archive *fout, DumpableObject *dobj)
 {
+
+	bool skip = false;
+
+	switch (dobj->objType)
+	{
+		case DO_INDEX:
+		case DO_TRIGGER:
+		case DO_CONSTRAINT:
+		case DO_FK_CONSTRAINT:
+		case DO_RULE:
+			skip = !(dumpSections & DUMP_POST_DATA);
+			break;
+		case DO_TABLE_DATA:
+			skip = !(dumpSections & DUMP_DATA);
+			break;
+		default:
+			skip = !(dumpSections & DUMP_PRE_DATA);
+	}
+
+	if (skip)
+		return;
+
 	switch (dobj->objType)
 	{
 		case DO_NAMESPACE:
diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c
index 8d6edacd..6ff1ab8 100644
--- a/src/bin/pg_dump/pg_restore.c
+++ b/src/bin/pg_dump/pg_restore.c
@@ -118,6 +118,7 @@ main(int argc, char **argv)
 		{"no-data-for-failed-tables", no_argument, &no_data_for_failed_tables, 1},
 		{"no-tablespaces", no_argument, &outputNoTablespaces, 1},
 		{"role", required_argument, NULL, 2},
+		{"section", required_argument, NULL, 3},
 		{"use-set-session-authorization", no_argument, &use_setsessauth, 1},
 		{"no-security-labels", no_argument, &no_security_labels, 1},
 
@@ -272,6 +273,10 @@ main(int argc, char **argv)
 				opts->use_role = optarg;
 				break;
 
+			case 3:				/* section */
+				set_section(optarg, &(opts->dumpSections));
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -294,6 +299,30 @@ main(int argc, char **argv)
 		exit(1);
 	}
 
+	if (opts->dataOnly && opts->schemaOnly)
+	{
+		fprintf(stderr, _("%s: options -s/--schema-only and -a/--data-only cannot be used together\n"),
+			progname);
+		exit(1);
+	}
+
+	if ((opts->dataOnly || opts->schemaOnly) && (opts->dumpSections != DUMP_UNSECTIONED))
+	{
+		fprintf(stderr, _("%s: options -s/--schema-only and -a/--data-only cannot be used with --section\n"),
+			progname);
+		exit(1);
+	}
+
+	if (opts->dataOnly)
+		opts->dumpSections = DUMP_DATA;
+	else if (opts->schemaOnly)
+		opts->dumpSections = DUMP_PRE_DATA | DUMP_POST_DATA;
+	else if ( opts->dumpSections != DUMP_UNSECTIONED)
+	{
+		opts->dataOnly = opts->dumpSections == DUMP_DATA;
+		opts->schemaOnly = !(opts->dumpSections & DUMP_DATA);
+	}
+
 	/* Should get at most one of -d and -f, else user is confused */
 	if (opts->dbname)
 	{
@@ -434,6 +463,7 @@ usage(const char *progname)
 			 "                           created\n"));
 	printf(_("  --no-security-labels     do not restore security labels\n"));
 	printf(_("  --no-tablespaces         do not restore tablespace assignments\n"));
+	printf(_("  --section=SECTION        restore named section (pre-data, data or post-data)\n"));
 	printf(_("  --use-set-session-authorization\n"
 			 "                           use SET SESSION AUTHORIZATION commands instead of\n"
 	  "                           ALTER OWNER commands to set ownership\n"));
#37Greg Smith
greg@2ndQuadrant.com
In reply to: Andrew Dunstan (#36)
Re: pg_restore --no-post-data and --post-data-only

On 12/12/2011 04:35 PM, Andrew Dunstan wrote:

Should all be fixed. Revised patch attached.

There were two successful test results here and only minor things noted
to fix, which are all cleaned up now. This seems ready for a committer
now; I'm just now sure if you want to do it yourself or have someone
else take a last look over it instead.

--
Greg Smith 2ndQuadrant US greg@2ndQuadrant.com Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.us

#38Andrew Dunstan
andrew@dunslane.net
In reply to: Greg Smith (#37)
Re: pg_restore --no-post-data and --post-data-only

On 12/16/2011 02:43 PM, Greg Smith wrote:

On 12/12/2011 04:35 PM, Andrew Dunstan wrote:

Should all be fixed. Revised patch attached.

There were two successful test results here and only minor things
noted to fix, which are all cleaned up now. This seems ready for a
committer now; I'm just now sure if you want to do it yourself or have
someone else take a last look over it instead.

I'll do it myself if nobody else wants to comment.

cheers

andrew