[PATCH] pg_dump: Do not dump statistics for excluded tables
Hi hackers,
I've attached a patch against master that addresses a small bug in pg_dump.
Previously, pg_dump would include CREATE STATISTICS statements for
tables that were excluded from the dump, causing reload to fail if any
excluded tables had extended statistics.
The patch skips the creation of the StatsExtInfo if the associated
table does not have the DUMP_COMPONENT_DEFINITION flag set. This is
similar to how getPublicationTables behaves if a table is excluded.
I've covered this with a regression test by altering one of the CREATE
STATISTICS examples to work with the existing 'exclude_test_table'
run. Without the fix, that causes the test to fail with:
# Failed test 'exclude_test_table: should not dump CREATE STATISTICS
extended_stats_no_options'
# at t/002_pg_dump.pl line 4934.
Regards,
Rian
Attachments:
v1-0001-pg_dump-Do-not-dump-statistics-for-excluded-tables.patchtext/plain; charset=UTF-8; name=v1-0001-pg_dump-Do-not-dump-statistics-for-excluded-tables.patchDownload
From 0fe06338728981fa727e3e6b99247741deda75fb Mon Sep 17 00:00:00 2001
From: Rian McGuire <rian.mcguire@buildkite.com>
Date: Wed, 27 Dec 2023 13:09:31 +1100
Subject: [PATCH] pg_dump: Do not dump statistics for excluded tables
Previously, pg_dump would include CREATE STATISTICS statements for
tables that were excluded from the dump, causing reload to fail.
---
src/bin/pg_dump/pg_dump.c | 43 +++++++++++++++++++++++---------
src/bin/pg_dump/t/002_pg_dump.pl | 5 ++--
2 files changed, 34 insertions(+), 14 deletions(-)
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 8c0b5486b9..c67ed416e9 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -7288,11 +7288,13 @@ getExtendedStatistics(Archive *fout)
int ntups;
int i_tableoid;
int i_oid;
+ int i_stxrelid;
int i_stxname;
int i_stxnamespace;
int i_stxowner;
int i_stattarget;
- int i;
+ int i,
+ j;
/* Extended statistics were new in v10 */
if (fout->remoteVersion < 100000)
@@ -7301,11 +7303,11 @@ getExtendedStatistics(Archive *fout)
query = createPQExpBuffer();
if (fout->remoteVersion < 130000)
- appendPQExpBufferStr(query, "SELECT tableoid, oid, stxname, "
+ appendPQExpBufferStr(query, "SELECT tableoid, oid, stxrelid, stxname, "
"stxnamespace, stxowner, (-1) AS stxstattarget "
"FROM pg_catalog.pg_statistic_ext");
else
- appendPQExpBufferStr(query, "SELECT tableoid, oid, stxname, "
+ appendPQExpBufferStr(query, "SELECT tableoid, oid, stxrelid, stxname, "
"stxnamespace, stxowner, stxstattarget "
"FROM pg_catalog.pg_statistic_ext");
@@ -7315,27 +7317,44 @@ getExtendedStatistics(Archive *fout)
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
+ i_stxrelid = PQfnumber(res, "stxrelid");
i_stxname = PQfnumber(res, "stxname");
i_stxnamespace = PQfnumber(res, "stxnamespace");
i_stxowner = PQfnumber(res, "stxowner");
i_stattarget = PQfnumber(res, "stxstattarget");
+ /* this allocation may be more than we need */
statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo));
+ j = 0;
for (i = 0; i < ntups; i++)
{
- statsextinfo[i].dobj.objType = DO_STATSEXT;
- statsextinfo[i].dobj.catId.tableoid = atooid(PQgetvalue(res, i, i_tableoid));
- statsextinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
- AssignDumpId(&statsextinfo[i].dobj);
- statsextinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_stxname));
- statsextinfo[i].dobj.namespace =
+ Oid stxrelid;
+ TableInfo *tbinfo;
+
+ /*
+ * Only dump extended statistics if we're going to dump the definition
+ * of the table that they're associated with.
+ */
+ stxrelid = atoi(PQgetvalue(res, i, i_stxrelid));
+ tbinfo = findTableByOid(stxrelid);
+ if (tbinfo == NULL || !(tbinfo->dobj.dump & DUMP_COMPONENT_DEFINITION))
+ continue;
+
+ statsextinfo[j].dobj.objType = DO_STATSEXT;
+ statsextinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, i, i_tableoid));
+ statsextinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
+ AssignDumpId(&statsextinfo[j].dobj);
+ statsextinfo[j].dobj.name = pg_strdup(PQgetvalue(res, i, i_stxname));
+ statsextinfo[j].dobj.namespace =
findNamespace(atooid(PQgetvalue(res, i, i_stxnamespace)));
- statsextinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_stxowner));
- statsextinfo[i].stattarget = atoi(PQgetvalue(res, i, i_stattarget));
+ statsextinfo[j].rolname = getRoleName(PQgetvalue(res, i, i_stxowner));
+ statsextinfo[j].stattarget = atoi(PQgetvalue(res, i, i_stattarget));
/* Decide whether we want to dump it */
- selectDumpableObject(&(statsextinfo[i].dobj), fout);
+ selectDumpableObject(&(statsextinfo[j].dobj), fout);
+
+ j++;
}
PQclear(res);
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index eb3ec534b4..a671603cd2 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -3742,14 +3742,15 @@ my %tests = (
'CREATE STATISTICS extended_stats_no_options' => {
create_order => 97,
create_sql => 'CREATE STATISTICS dump_test.test_ext_stats_no_options
- ON col1, col2 FROM dump_test.test_fifth_table',
+ ON col1, col2 FROM dump_test.test_table',
regexp => qr/^
- \QCREATE STATISTICS dump_test.test_ext_stats_no_options ON col1, col2 FROM dump_test.test_fifth_table;\E
+ \QCREATE STATISTICS dump_test.test_ext_stats_no_options ON col1, col2 FROM dump_test.test_table;\E
/xms,
like =>
{ %full_runs, %dump_test_schema_runs, section_post_data => 1, },
unlike => {
exclude_dump_test_schema => 1,
+ exclude_test_table => 1,
only_dump_measurement => 1,
},
},
--
2.43.0
Rian McGuire <rian.mcguire@buildkite.com> writes:
I've attached a patch against master that addresses a small bug in pg_dump.
Previously, pg_dump would include CREATE STATISTICS statements for
tables that were excluded from the dump, causing reload to fail if any
excluded tables had extended statistics.
I agree that's a bug ...
The patch skips the creation of the StatsExtInfo if the associated
table does not have the DUMP_COMPONENT_DEFINITION flag set. This is
similar to how getPublicationTables behaves if a table is excluded.
... but I don't like the details of this patch (and I'm not too
thrilled with the implementation of getPublicationTables, either).
The style in pg_dump is to put such decisions into separate
policy-setting subroutines. Also, skipping creation of the
DumpableObject altogether is the wrong thing because it'd prevent
pg_dump from tracing or reasoning about dependencies involving the
stats object, which can be relevant even if the object itself isn't
dumped --- this is why all the other data-collection subroutines
operate as they do. getPublicationTables can probably get away
with its low-rent approach given that publication membership isn't
represented by pg_depend entries, but it's far from clear that it'll
never be an issue for stats.
So I think it needs to be more like the attached.
(I did use your test case verbatim.)
regards, tom lane
Attachments:
v2-0001-pg_dump-Do-not-dump-statistics-for-excluded-tables.patchtext/x-diff; charset=us-ascii; name*0=v2-0001-pg_dump-Do-not-dump-statistics-for-excluded-tables.; name*1=patchDownload
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 8c0b5486b9..050a831226 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -2046,6 +2046,26 @@ selectDumpablePublicationObject(DumpableObject *dobj, Archive *fout)
DUMP_COMPONENT_ALL : DUMP_COMPONENT_NONE;
}
+/*
+ * selectDumpableStatisticsObject: policy-setting subroutine
+ * Mark an extended statistics object as to be dumped or not
+ *
+ * We dump an extended statistics object if the schema it's in and the table
+ * it's for are being dumped. (This'll need more thought if statistics
+ * objects ever support cross-table stats.)
+ */
+static void
+selectDumpableStatisticsObject(StatsExtInfo *sobj, Archive *fout)
+{
+ if (checkExtensionMembership(&sobj->dobj, fout))
+ return; /* extension membership overrides all else */
+
+ sobj->dobj.dump = sobj->dobj.namespace->dobj.dump_contains;
+ if (sobj->stattable == NULL ||
+ !(sobj->stattable->dobj.dump & DUMP_COMPONENT_DEFINITION))
+ sobj->dobj.dump = DUMP_COMPONENT_NONE;
+}
+
/*
* selectDumpableObject: policy-setting subroutine
* Mark a generic dumpable object as to be dumped or not
@@ -7291,6 +7311,7 @@ getExtendedStatistics(Archive *fout)
int i_stxname;
int i_stxnamespace;
int i_stxowner;
+ int i_stxrelid;
int i_stattarget;
int i;
@@ -7302,11 +7323,11 @@ getExtendedStatistics(Archive *fout)
if (fout->remoteVersion < 130000)
appendPQExpBufferStr(query, "SELECT tableoid, oid, stxname, "
- "stxnamespace, stxowner, (-1) AS stxstattarget "
+ "stxnamespace, stxowner, stxrelid, (-1) AS stxstattarget "
"FROM pg_catalog.pg_statistic_ext");
else
appendPQExpBufferStr(query, "SELECT tableoid, oid, stxname, "
- "stxnamespace, stxowner, stxstattarget "
+ "stxnamespace, stxowner, stxrelid, stxstattarget "
"FROM pg_catalog.pg_statistic_ext");
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -7318,6 +7339,7 @@ getExtendedStatistics(Archive *fout)
i_stxname = PQfnumber(res, "stxname");
i_stxnamespace = PQfnumber(res, "stxnamespace");
i_stxowner = PQfnumber(res, "stxowner");
+ i_stxrelid = PQfnumber(res, "stxrelid");
i_stattarget = PQfnumber(res, "stxstattarget");
statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo));
@@ -7332,10 +7354,12 @@ getExtendedStatistics(Archive *fout)
statsextinfo[i].dobj.namespace =
findNamespace(atooid(PQgetvalue(res, i, i_stxnamespace)));
statsextinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_stxowner));
+ statsextinfo[i].stattable =
+ findTableByOid(atooid(PQgetvalue(res, i, i_stxrelid)));
statsextinfo[i].stattarget = atoi(PQgetvalue(res, i, i_stattarget));
/* Decide whether we want to dump it */
- selectDumpableObject(&(statsextinfo[i].dobj), fout);
+ selectDumpableStatisticsObject(&(statsextinfo[i]), fout);
}
PQclear(res);
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 2fe3cbed9a..673ca5c92d 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -423,7 +423,8 @@ typedef struct _indexAttachInfo
typedef struct _statsExtInfo
{
DumpableObject dobj;
- const char *rolname;
+ const char *rolname; /* owner */
+ TableInfo *stattable; /* link to table the stats are for */
int stattarget; /* statistics target */
} StatsExtInfo;
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index eb3ec534b4..a671603cd2 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -3742,14 +3742,15 @@ my %tests = (
'CREATE STATISTICS extended_stats_no_options' => {
create_order => 97,
create_sql => 'CREATE STATISTICS dump_test.test_ext_stats_no_options
- ON col1, col2 FROM dump_test.test_fifth_table',
+ ON col1, col2 FROM dump_test.test_table',
regexp => qr/^
- \QCREATE STATISTICS dump_test.test_ext_stats_no_options ON col1, col2 FROM dump_test.test_fifth_table;\E
+ \QCREATE STATISTICS dump_test.test_ext_stats_no_options ON col1, col2 FROM dump_test.test_table;\E
/xms,
like =>
{ %full_runs, %dump_test_schema_runs, section_post_data => 1, },
unlike => {
exclude_dump_test_schema => 1,
+ exclude_test_table => 1,
only_dump_measurement => 1,
},
},