Teaching pg_dump to use NOT VALID constraints

Started by Simon Riggsabout 11 years ago7 messages
#1Simon Riggs
simon@2ndQuadrant.com
1 attachment(s)

Magnus and I discussed the need for pg_dump to offer the use of NOT
VALID constraints.
Here's the patch.

pg_dump --no-revalidaton

will add "NOT VALID" onto the recreation SQL for any FKs, but only for
ones that were already known to be valid.

--
Simon Riggs http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

Attachments:

pg_dump_revalidation.v1.patchapplication/octet-stream; name=pg_dump_revalidation.v1.patchDownload
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index c92c6ee..ff28fcd 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -818,6 +818,17 @@ PostgreSQL documentation
      </varlistentry>
 
      <varlistentry>
+      <term><option>--no-revalidation</option></term>
+      <listitem>
+       <para>
+        Use the NOT VALID option when recreating table constraints that are
+        already known to be valid, so that initial validation checks are
+        skipped.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>--quote-all-identifiers</></term>
       <listitem>
        <para>
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index c2ebcd4..9c49692 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -142,6 +142,7 @@ typedef struct _restoreOptions
 	int			suppressDumpWarnings;	/* Suppress output of WARNING entries
 										 * to stderr */
 	bool		single_txn;
+	bool		no_revalidation;
 
 	bool	   *idWanted;		/* array showing which dump IDs to emit */
 	int			enable_row_security;
@@ -178,6 +179,7 @@ typedef struct _dumpOptions
 	int			outputNoTablespaces;
 	int			use_setsessauth;
 	int			enable_row_security;
+	int			no_revalidation;
 
 	/* default, if no "inclusion" switches appear, is to dump everything */
 	bool		include_everything;
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 1e8f089..81a19b1 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -333,6 +333,7 @@ main(int argc, char **argv)
 		{"no-security-labels", no_argument, &dopt->no_security_labels, 1},
 		{"no-synchronized-snapshots", no_argument, &dopt->no_synchronized_snapshots, 1},
 		{"no-unlogged-table-data", no_argument, &dopt->no_unlogged_table_data, 1},
+		{"no-revalidation", no_argument, &dopt->no_revalidation, 1},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -5613,6 +5614,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 				constrinfo[j].condeferrable = *(PQgetvalue(res, j, i_condeferrable)) == 't';
 				constrinfo[j].condeferred = *(PQgetvalue(res, j, i_condeferred)) == 't';
 				constrinfo[j].conislocal = true;
+				constrinfo[j].conisvalid = true;
 				constrinfo[j].separate = true;
 
 				indxinfo[j].indexconstraint = constrinfo[j].dobj.dumpId;
@@ -5683,9 +5685,20 @@ getConstraints(Archive *fout, TableInfo tblinfo[], int numTables)
 		selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name);
 
 		resetPQExpBuffer(query);
-		appendPQExpBuffer(query,
+		if (fout->remoteVersion >= 90100)
+			appendPQExpBuffer(query,
 						  "SELECT tableoid, oid, conname, confrelid, "
-						  "pg_catalog.pg_get_constraintdef(oid) AS condef "
+						  "pg_catalog.pg_get_constraintdef(oid) AS condef, "
+						  "convalidated "
+						  "FROM pg_catalog.pg_constraint "
+						  "WHERE conrelid = '%u'::pg_catalog.oid "
+						  "AND contype = 'f'",
+						  tbinfo->dobj.catId.oid);
+		else
+			appendPQExpBuffer(query,
+						  "SELECT tableoid, oid, conname, confrelid, "
+						  "pg_catalog.pg_get_constraintdef(oid) AS condef, "
+						  "true as convalidated "
 						  "FROM pg_catalog.pg_constraint "
 						  "WHERE conrelid = '%u'::pg_catalog.oid "
 						  "AND contype = 'f'",
@@ -5704,6 +5717,8 @@ getConstraints(Archive *fout, TableInfo tblinfo[], int numTables)
 
 		for (j = 0; j < ntups; j++)
 		{
+			bool		validated = PQgetvalue(res, j, 5)[0] == 't';
+
 			constrinfo[j].dobj.objType = DO_FK_CONSTRAINT;
 			constrinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_contableoid));
 			constrinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_conoid));
@@ -5719,6 +5734,7 @@ getConstraints(Archive *fout, TableInfo tblinfo[], int numTables)
 			constrinfo[j].condeferrable = false;
 			constrinfo[j].condeferred = false;
 			constrinfo[j].conislocal = true;
+			constrinfo[j].conisvalid = validated;
 			constrinfo[j].separate = true;
 		}
 
@@ -5817,7 +5833,7 @@ getDomainConstraints(Archive *fout, TypeInfo *tyinfo)
 		constrinfo[i].condeferrable = false;
 		constrinfo[i].condeferred = false;
 		constrinfo[i].conislocal = true;
-
+		constrinfo[i].conisvalid = validated;
 		constrinfo[i].separate = !validated;
 
 		/*
@@ -7095,6 +7111,7 @@ getTableAttrs(Archive *fout, DumpOptions *dopt, TableInfo *tblinfo, int numTable
 				 * that potentially-violating existing data is loaded before
 				 * the constraint.
 				 */
+				constrs[j].conisvalid = validated;
 				constrs[j].separate = !validated;
 
 				constrs[j].dobj.dump = tbinfo->dobj.dump;
@@ -14350,15 +14367,12 @@ dumpConstraint(Archive *fout, DumpOptions *dopt, ConstraintInfo *coninfo)
 	}
 	else if (coninfo->contype == 'f')
 	{
-		/*
-		 * XXX Potentially wrap in a 'SET CONSTRAINTS OFF' block so that the
-		 * current table data is not processed
-		 */
 		appendPQExpBuffer(q, "ALTER TABLE ONLY %s\n",
 						  fmtId(tbinfo->dobj.name));
-		appendPQExpBuffer(q, "    ADD CONSTRAINT %s %s;\n",
+		appendPQExpBuffer(q, "    ADD CONSTRAINT %s %s%s\n",
 						  fmtId(coninfo->dobj.name),
-						  coninfo->condef);
+						  coninfo->condef,
+						  (coninfo->conisvalid && dopt->no_revalidation ? " NOT VALID;" : ";"));
 
 		/*
 		 * DROP must be fully qualified in case same name appears in
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index a7eb2fd..da3568a 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -353,6 +353,7 @@ typedef struct _constraintInfo
 	bool		condeferrable;	/* TRUE if constraint is DEFERRABLE */
 	bool		condeferred;	/* TRUE if constraint is INITIALLY DEFERRED */
 	bool		conislocal;		/* TRUE if constraint has local definition */
+	bool		conisvalid;		/* TRUE if constraint is valid */
 	bool		separate;		/* TRUE if must dump as separate item */
 } ConstraintInfo;
 
#2Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Simon Riggs (#1)
Re: Teaching pg_dump to use NOT VALID constraints

Simon Riggs wrote:

Magnus and I discussed the need for pg_dump to offer the use of NOT
VALID constraints.
Here's the patch.

pg_dump --no-revalidaton

will add "NOT VALID" onto the recreation SQL for any FKs, but only for
ones that were already known to be valid.

Well. Constraints that haven't been validated already have a NOT VALID
emitted by ruleutils.c, yes? So what this patch does is add such a
clause for all *other* constraints. Right? In other words what it aims
to do is speed up loading of data by skipping the validation step on
restore. Is that right?

ISTM we could have the default pg_dump behavior emit NOT VALID
constraints, and add VALIDATE CONSTRAINT commands at the end; that way
the database is usable sooner but the constraints end up marked as
validated by the time the dump is finished.

--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#3Simon Riggs
simon@2ndQuadrant.com
In reply to: Alvaro Herrera (#2)
Re: Teaching pg_dump to use NOT VALID constraints

On 10 November 2014 17:33, Alvaro Herrera <alvherre@2ndquadrant.com> wrote:

pg_dump --no-revalidaton

will add "NOT VALID" onto the recreation SQL for any FKs, but only for
ones that were already known to be valid.

Well. Constraints that haven't been validated already have a NOT VALID
emitted by ruleutils.c, yes? So what this patch does is add such a
clause for all *other* constraints. Right? In other words what it aims
to do is speed up loading of data by skipping the validation step on
restore. Is that right?

Correct. CHECK constraints are added onto main table so they validate at load.

ISTM we could have the default pg_dump behavior emit NOT VALID
constraints, and add VALIDATE CONSTRAINT commands at the end; that way
the database is usable sooner but the constraints end up marked as
validated by the time the dump is finished.

Yes, may be an even better idea. We'd still want the --no-revalidation
option, AFAICS.

FKs are already "at the end". Perhaps we should add another
"validation" section?

I like the idea, just not sure how long it would take.

--
Simon Riggs http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#4Jim Nasby
Jim.Nasby@BlueTreble.com
In reply to: Simon Riggs (#3)
Re: Teaching pg_dump to use NOT VALID constraints

On 11/10/14, 12:00 PM, Simon Riggs wrote:

On 10 November 2014 17:33, Alvaro Herrera <alvherre@2ndquadrant.com> wrote:

pg_dump --no-revalidaton

will add "NOT VALID" onto the recreation SQL for any FKs, but only for
ones that were already known to be valid.

Well. Constraints that haven't been validated already have a NOT VALID
emitted by ruleutils.c, yes? So what this patch does is add such a
clause for all *other* constraints. Right? In other words what it aims
to do is speed up loading of data by skipping the validation step on
restore. Is that right?

Correct. CHECK constraints are added onto main table so they validate at load.

ISTM we could have the default pg_dump behavior emit NOT VALID
constraints, and add VALIDATE CONSTRAINT commands at the end; that way
the database is usable sooner but the constraints end up marked as
validated by the time the dump is finished.

Yes, may be an even better idea. We'd still want the --no-revalidation
option, AFAICS.

FKs are already "at the end". Perhaps we should add another
"validation" section?

I like the idea, just not sure how long it would take.

Isn't the real use-case here that if constraints were valid when you dumped then we shouldn't have to *any* re-validate when we load? (Though, we'd have to be careful of that with CHECK because that can call user code...)
--
Jim Nasby, Data Architect, Blue Treble Consulting
Data in Trouble? Get it in Treble! http://BlueTreble.com

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#5Simon Riggs
simon@2ndQuadrant.com
In reply to: Jim Nasby (#4)
Re: Teaching pg_dump to use NOT VALID constraints

On 13 November 2014 00:20, Jim Nasby <Jim.Nasby@bluetreble.com> wrote:

Isn't the real use-case here that if constraints were valid when you dumped
then we shouldn't have to *any* re-validate when we load? (Though, we'd have
to be careful of that with CHECK because that can call user code...)

Yes, that is the use case this patch would improve considerably.

--
Simon Riggs http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#6Peter Eisentraut
peter_e@gmx.net
In reply to: Simon Riggs (#5)
Re: Teaching pg_dump to use NOT VALID constraints

On 11/13/14 5:07 AM, Simon Riggs wrote:

On 13 November 2014 00:20, Jim Nasby <Jim.Nasby@bluetreble.com> wrote:

Isn't the real use-case here that if constraints were valid when you dumped
then we shouldn't have to *any* re-validate when we load? (Though, we'd have
to be careful of that with CHECK because that can call user code...)

Yes, that is the use case this patch would improve considerably.

But your patch doesn't really address that. It just leaves the
constraints as invalid, and someone will have to revalidate them later
(how?). What Jim was describing was a mode that creates the constraints
as valid but doesn't actually validate them. I can see both sides of
that kind of feature.

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#7Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Peter Eisentraut (#6)
Re: Teaching pg_dump to use NOT VALID constraints

Peter Eisentraut wrote:

On 11/13/14 5:07 AM, Simon Riggs wrote:

On 13 November 2014 00:20, Jim Nasby <Jim.Nasby@bluetreble.com> wrote:

Isn't the real use-case here that if constraints were valid when you dumped
then we shouldn't have to *any* re-validate when we load? (Though, we'd have
to be careful of that with CHECK because that can call user code...)

Yes, that is the use case this patch would improve considerably.

But your patch doesn't really address that. It just leaves the
constraints as invalid, and someone will have to revalidate them later
(how?). What Jim was describing was a mode that creates the constraints
as valid but doesn't actually validate them. I can see both sides of
that kind of feature.

This might lead to users introducing invalid data by way of declaring
constants as valid but not checked by the system; if they turn out to be
invalid, the final state is a mess. I would only buy such a feature if
we had some way to pass down the knowledge of the constraint being valid
in the original system through some other means; say emit a CRC of the
copy data in the pg_dump output that can be verified while loading, and
only allow unvalidated constraints to be marked VALID if the sum
matches.

--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers