Teaching pg_dump to use NOT VALID constraints
Magnus and I discussed the need for pg_dump to offer the use of NOT
VALID constraints.
Here's the patch.
pg_dump --no-revalidaton
will add "NOT VALID" onto the recreation SQL for any FKs, but only for
ones that were already known to be valid.
--
Simon Riggs http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
Attachments:
pg_dump_revalidation.v1.patchapplication/octet-stream; name=pg_dump_revalidation.v1.patchDownload
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index c92c6ee..ff28fcd 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -818,6 +818,17 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
+ <term><option>--no-revalidation</option></term>
+ <listitem>
+ <para>
+ Use the NOT VALID option when recreating table constraints that are
+ already known to be valid, so that initial validation checks are
+ skipped.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>--quote-all-identifiers</></term>
<listitem>
<para>
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index c2ebcd4..9c49692 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -142,6 +142,7 @@ typedef struct _restoreOptions
int suppressDumpWarnings; /* Suppress output of WARNING entries
* to stderr */
bool single_txn;
+ bool no_revalidation;
bool *idWanted; /* array showing which dump IDs to emit */
int enable_row_security;
@@ -178,6 +179,7 @@ typedef struct _dumpOptions
int outputNoTablespaces;
int use_setsessauth;
int enable_row_security;
+ int no_revalidation;
/* default, if no "inclusion" switches appear, is to dump everything */
bool include_everything;
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 1e8f089..81a19b1 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -333,6 +333,7 @@ main(int argc, char **argv)
{"no-security-labels", no_argument, &dopt->no_security_labels, 1},
{"no-synchronized-snapshots", no_argument, &dopt->no_synchronized_snapshots, 1},
{"no-unlogged-table-data", no_argument, &dopt->no_unlogged_table_data, 1},
+ {"no-revalidation", no_argument, &dopt->no_revalidation, 1},
{NULL, 0, NULL, 0}
};
@@ -5613,6 +5614,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
constrinfo[j].condeferrable = *(PQgetvalue(res, j, i_condeferrable)) == 't';
constrinfo[j].condeferred = *(PQgetvalue(res, j, i_condeferred)) == 't';
constrinfo[j].conislocal = true;
+ constrinfo[j].conisvalid = true;
constrinfo[j].separate = true;
indxinfo[j].indexconstraint = constrinfo[j].dobj.dumpId;
@@ -5683,9 +5685,20 @@ getConstraints(Archive *fout, TableInfo tblinfo[], int numTables)
selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name);
resetPQExpBuffer(query);
- appendPQExpBuffer(query,
+ if (fout->remoteVersion >= 90100)
+ appendPQExpBuffer(query,
"SELECT tableoid, oid, conname, confrelid, "
- "pg_catalog.pg_get_constraintdef(oid) AS condef "
+ "pg_catalog.pg_get_constraintdef(oid) AS condef, "
+ "convalidated "
+ "FROM pg_catalog.pg_constraint "
+ "WHERE conrelid = '%u'::pg_catalog.oid "
+ "AND contype = 'f'",
+ tbinfo->dobj.catId.oid);
+ else
+ appendPQExpBuffer(query,
+ "SELECT tableoid, oid, conname, confrelid, "
+ "pg_catalog.pg_get_constraintdef(oid) AS condef, "
+ "true as convalidated "
"FROM pg_catalog.pg_constraint "
"WHERE conrelid = '%u'::pg_catalog.oid "
"AND contype = 'f'",
@@ -5704,6 +5717,8 @@ getConstraints(Archive *fout, TableInfo tblinfo[], int numTables)
for (j = 0; j < ntups; j++)
{
+ bool validated = PQgetvalue(res, j, 5)[0] == 't';
+
constrinfo[j].dobj.objType = DO_FK_CONSTRAINT;
constrinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_contableoid));
constrinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_conoid));
@@ -5719,6 +5734,7 @@ getConstraints(Archive *fout, TableInfo tblinfo[], int numTables)
constrinfo[j].condeferrable = false;
constrinfo[j].condeferred = false;
constrinfo[j].conislocal = true;
+ constrinfo[j].conisvalid = validated;
constrinfo[j].separate = true;
}
@@ -5817,7 +5833,7 @@ getDomainConstraints(Archive *fout, TypeInfo *tyinfo)
constrinfo[i].condeferrable = false;
constrinfo[i].condeferred = false;
constrinfo[i].conislocal = true;
-
+ constrinfo[i].conisvalid = validated;
constrinfo[i].separate = !validated;
/*
@@ -7095,6 +7111,7 @@ getTableAttrs(Archive *fout, DumpOptions *dopt, TableInfo *tblinfo, int numTable
* that potentially-violating existing data is loaded before
* the constraint.
*/
+ constrs[j].conisvalid = validated;
constrs[j].separate = !validated;
constrs[j].dobj.dump = tbinfo->dobj.dump;
@@ -14350,15 +14367,12 @@ dumpConstraint(Archive *fout, DumpOptions *dopt, ConstraintInfo *coninfo)
}
else if (coninfo->contype == 'f')
{
- /*
- * XXX Potentially wrap in a 'SET CONSTRAINTS OFF' block so that the
- * current table data is not processed
- */
appendPQExpBuffer(q, "ALTER TABLE ONLY %s\n",
fmtId(tbinfo->dobj.name));
- appendPQExpBuffer(q, " ADD CONSTRAINT %s %s;\n",
+ appendPQExpBuffer(q, " ADD CONSTRAINT %s %s%s\n",
fmtId(coninfo->dobj.name),
- coninfo->condef);
+ coninfo->condef,
+ (coninfo->conisvalid && dopt->no_revalidation ? " NOT VALID;" : ";"));
/*
* DROP must be fully qualified in case same name appears in
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index a7eb2fd..da3568a 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -353,6 +353,7 @@ typedef struct _constraintInfo
bool condeferrable; /* TRUE if constraint is DEFERRABLE */
bool condeferred; /* TRUE if constraint is INITIALLY DEFERRED */
bool conislocal; /* TRUE if constraint has local definition */
+ bool conisvalid; /* TRUE if constraint is valid */
bool separate; /* TRUE if must dump as separate item */
} ConstraintInfo;
Simon Riggs wrote:
Magnus and I discussed the need for pg_dump to offer the use of NOT
VALID constraints.
Here's the patch.pg_dump --no-revalidaton
will add "NOT VALID" onto the recreation SQL for any FKs, but only for
ones that were already known to be valid.
Well. Constraints that haven't been validated already have a NOT VALID
emitted by ruleutils.c, yes? So what this patch does is add such a
clause for all *other* constraints. Right? In other words what it aims
to do is speed up loading of data by skipping the validation step on
restore. Is that right?
ISTM we could have the default pg_dump behavior emit NOT VALID
constraints, and add VALIDATE CONSTRAINT commands at the end; that way
the database is usable sooner but the constraints end up marked as
validated by the time the dump is finished.
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 10 November 2014 17:33, Alvaro Herrera <alvherre@2ndquadrant.com> wrote:
pg_dump --no-revalidaton
will add "NOT VALID" onto the recreation SQL for any FKs, but only for
ones that were already known to be valid.Well. Constraints that haven't been validated already have a NOT VALID
emitted by ruleutils.c, yes? So what this patch does is add such a
clause for all *other* constraints. Right? In other words what it aims
to do is speed up loading of data by skipping the validation step on
restore. Is that right?
Correct. CHECK constraints are added onto main table so they validate at load.
ISTM we could have the default pg_dump behavior emit NOT VALID
constraints, and add VALIDATE CONSTRAINT commands at the end; that way
the database is usable sooner but the constraints end up marked as
validated by the time the dump is finished.
Yes, may be an even better idea. We'd still want the --no-revalidation
option, AFAICS.
FKs are already "at the end". Perhaps we should add another
"validation" section?
I like the idea, just not sure how long it would take.
--
Simon Riggs http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 11/10/14, 12:00 PM, Simon Riggs wrote:
On 10 November 2014 17:33, Alvaro Herrera <alvherre@2ndquadrant.com> wrote:
pg_dump --no-revalidaton
will add "NOT VALID" onto the recreation SQL for any FKs, but only for
ones that were already known to be valid.Well. Constraints that haven't been validated already have a NOT VALID
emitted by ruleutils.c, yes? So what this patch does is add such a
clause for all *other* constraints. Right? In other words what it aims
to do is speed up loading of data by skipping the validation step on
restore. Is that right?Correct. CHECK constraints are added onto main table so they validate at load.
ISTM we could have the default pg_dump behavior emit NOT VALID
constraints, and add VALIDATE CONSTRAINT commands at the end; that way
the database is usable sooner but the constraints end up marked as
validated by the time the dump is finished.Yes, may be an even better idea. We'd still want the --no-revalidation
option, AFAICS.FKs are already "at the end". Perhaps we should add another
"validation" section?I like the idea, just not sure how long it would take.
Isn't the real use-case here that if constraints were valid when you dumped then we shouldn't have to *any* re-validate when we load? (Though, we'd have to be careful of that with CHECK because that can call user code...)
--
Jim Nasby, Data Architect, Blue Treble Consulting
Data in Trouble? Get it in Treble! http://BlueTreble.com
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 13 November 2014 00:20, Jim Nasby <Jim.Nasby@bluetreble.com> wrote:
Isn't the real use-case here that if constraints were valid when you dumped
then we shouldn't have to *any* re-validate when we load? (Though, we'd have
to be careful of that with CHECK because that can call user code...)
Yes, that is the use case this patch would improve considerably.
--
Simon Riggs http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 11/13/14 5:07 AM, Simon Riggs wrote:
On 13 November 2014 00:20, Jim Nasby <Jim.Nasby@bluetreble.com> wrote:
Isn't the real use-case here that if constraints were valid when you dumped
then we shouldn't have to *any* re-validate when we load? (Though, we'd have
to be careful of that with CHECK because that can call user code...)Yes, that is the use case this patch would improve considerably.
But your patch doesn't really address that. It just leaves the
constraints as invalid, and someone will have to revalidate them later
(how?). What Jim was describing was a mode that creates the constraints
as valid but doesn't actually validate them. I can see both sides of
that kind of feature.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Peter Eisentraut wrote:
On 11/13/14 5:07 AM, Simon Riggs wrote:
On 13 November 2014 00:20, Jim Nasby <Jim.Nasby@bluetreble.com> wrote:
Isn't the real use-case here that if constraints were valid when you dumped
then we shouldn't have to *any* re-validate when we load? (Though, we'd have
to be careful of that with CHECK because that can call user code...)Yes, that is the use case this patch would improve considerably.
But your patch doesn't really address that. It just leaves the
constraints as invalid, and someone will have to revalidate them later
(how?). What Jim was describing was a mode that creates the constraints
as valid but doesn't actually validate them. I can see both sides of
that kind of feature.
This might lead to users introducing invalid data by way of declaring
constants as valid but not checked by the system; if they turn out to be
invalid, the final state is a mess. I would only buy such a feature if
we had some way to pass down the knowledge of the constraint being valid
in the original system through some other means; say emit a CRC of the
copy data in the pg_dump output that can be verified while loading, and
only allow unvalidated constraints to be marked VALID if the sum
matches.
--
�lvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers