From ff0920780902a9b334297d8272d9c13b11e6a5ae Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Fri, 29 Dec 2023 22:14:05 +0800 Subject: [PATCH] Remove useless GROUP BY columns considering unique index. Before this commit, we try to remove any columns in the GROUP BY clause that are redundant due to being functionally dependent on other GROUP BY columns(primary key). But unique index whose columns all have NOT NULL constraint could also take the work. create temp table t2 (a int, b int, c int not null, primary key (a, b), unique(c)); explain (costs off) select * from t2 group by a,b,c; QUERY PLAN ---------------------- HashAggregate Group Key: c -> Seq Scan on t2 There may be multiple unique index candidates, choose the one with least column numbers so that we will drop more useless columns. Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/catalog/pg_constraint.c | 113 ++++++++++++++++++++++- src/backend/commands/tablecmds.c | 2 +- src/backend/optimizer/plan/planner.c | 28 +++++- src/backend/parser/parse_utilcmd.c | 4 +- src/include/catalog/pg_constraint.h | 4 +- src/test/regress/expected/aggregates.out | 58 ++++++++++++ src/test/regress/sql/aggregates.sql | 30 ++++++ 7 files changed, 229 insertions(+), 10 deletions(-) diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c index e9d4d6006e..9c579d4214 100644 --- a/src/backend/catalog/pg_constraint.c +++ b/src/backend/catalog/pg_constraint.c @@ -788,11 +788,11 @@ AdjustNotNullInheritance(Oid relid, Bitmapset *columns, int count) * * This is seldom needed, so we just scan pg_constraint each time. * - * XXX This is only used to create derived tables, so NO INHERIT constraints - * are always skipped. + * XXX When used to create derived tables, set skip_no_inherit tp true, + * so that NO INHERIT constraints will be skipped. */ List * -RelationGetNotNullConstraints(Oid relid, bool cooked) +RelationGetNotNullConstraints(Oid relid, bool cooked, bool skip_no_inherit) { List *notnulls = NIL; Relation constrRel; @@ -815,7 +815,7 @@ RelationGetNotNullConstraints(Oid relid, bool cooked) if (conForm->contype != CONSTRAINT_NOTNULL) continue; - if (conForm->connoinherit) + if (skip_no_inherit && conForm->connoinherit) continue; colnum = extractNotNullColumn(htup); @@ -1658,3 +1658,108 @@ check_functional_grouping(Oid relid, return false; } + +/* + * get_min_unique_not_null_attnos + * + * Identify the columns in a relation's unique index whose columns all have + * the NOT NULL constraint, if any. + * If there is more than one, return the one with the least column numbers. + */ +Bitmapset* +get_min_unique_not_null_attnos(Oid relid) +{ + List *not_null_attnos = NIL; + ListCell *lc = NULL; + Bitmapset *unique_notnull_attnos = NULL; + Relation pg_constraint; + HeapTuple tuple; + SysScanDesc scan; + ScanKeyData skey[1]; + int min_numkeys = 0; + + /* Use cooked to fetch attnos. */ + List* not_null_cs = RelationGetNotNullConstraints(relid, true, false); + if (not_null_cs == NIL) + return unique_notnull_attnos; + /* Fill a attno array to use it easily */ + foreach (lc, not_null_cs) + { + CookedConstraint* cc = lfirst(lc); + not_null_attnos = list_append_unique_int(not_null_attnos, cc->attnum); + } + + /* Scan pg_constraint for constraints of the target rel */ + pg_constraint = table_open(ConstraintRelationId, AccessShareLock); + + ScanKeyInit(&skey[0], + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + + scan = systable_beginscan(pg_constraint, ConstraintRelidTypidNameIndexId, true, + NULL, 1, skey); + + while (HeapTupleIsValid(tuple = systable_getnext(scan))) + { + Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple); + Datum adatum; + bool isNull; + ArrayType *arr; + int16 *attnums; + int numkeys; + int i; + bool all_notnull; + Bitmapset *candidate = NULL; + + /* Skip constraints that are not UNIQUE */ + if (con->contype != CONSTRAINT_UNIQUE) + continue; + + if (con->condeferrable) + break; + + /* Extract the conkey array, ie, attnums of UNIQUE's columns */ + adatum = heap_getattr(tuple, Anum_pg_constraint_conkey, + RelationGetDescr(pg_constraint), &isNull); + if (isNull) + elog(ERROR, "null conkey for constraint %u", + ((Form_pg_constraint) GETSTRUCT(tuple))->oid); + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + numkeys = ARR_DIMS(arr)[0]; + if (ARR_NDIM(arr) != 1 || + numkeys < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != INT2OID) + elog(ERROR, "conkey is not a 1-D smallint array"); + attnums = (int16 *) ARR_DATA_PTR(arr); + + /* Skip if it has more columns than current one. */ + if (min_numkeys > 0 && numkeys >= min_numkeys) + continue; + + all_notnull = true; + /* Construct the result value */ + for (i = 0; i < numkeys; i++) + { + /* Skip if unique key could be NULL. */ + if (!list_member_int(not_null_attnos, attnums[i])) + { + all_notnull = false; + break; + } + + candidate = bms_add_member(candidate, + attnums[i] - FirstLowInvalidHeapAttributeNumber); + } + if(!all_notnull) + continue; + min_numkeys = numkeys; + unique_notnull_attnos = candidate; + } + systable_endscan(scan); + + table_close(pg_constraint, AccessShareLock); + + return unique_notnull_attnos; +} diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 6b0a20010e..8797386326 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -2682,7 +2682,7 @@ MergeAttributes(List *columns, const List *supers, char relpersistence, */ pkattrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_PRIMARY_KEY); - nnconstrs = RelationGetNotNullConstraints(RelationGetRelid(relation), true); + nnconstrs = RelationGetNotNullConstraints(RelationGetRelid(relation), true, true); foreach(lc1, nnconstrs) nncols = bms_add_member(nncols, ((CookedConstraint *) lfirst(lc1))->attnum); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 6f45efde21..d7929c9ef6 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2718,6 +2718,8 @@ remove_useless_groupby_columns(PlannerInfo *root) Bitmapset *relattnos; Bitmapset *pkattnos; Oid constraintOid; + Bitmapset *unique_notnull_attnos; + bool use_pkattnos; relid++; @@ -2743,14 +2745,26 @@ remove_useless_groupby_columns(PlannerInfo *root) * (i.e., nondeferrable) primary key constraint. */ pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid); - if (pkattnos == NULL) + /* Check if there is unique not null index. */ + unique_notnull_attnos = get_min_unique_not_null_attnos(rte->relid); + if ((pkattnos == NULL) && (unique_notnull_attnos == NULL)) continue; + /* + * Choose primary key or unique index to remove useless group by columns. + * If Both primary key and unique not null index exist, choose the one with less attnum. + * Primary key is preferred if their column num are equal. + */ + if (pkattnos && unique_notnull_attnos) + use_pkattnos = bms_num_members(unique_notnull_attnos) >= bms_num_members(pkattnos); + else + use_pkattnos = (pkattnos != NULL); + /* * If the primary key is a proper subset of relattnos then we have * some items in the GROUP BY that can be removed. */ - if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1) + if (use_pkattnos && bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1) { /* * To easily remember whether we've found anything to do, we don't @@ -2763,6 +2777,16 @@ remove_useless_groupby_columns(PlannerInfo *root) /* Remember the attnos of the removable columns */ surplusvars[relid] = bms_difference(relattnos, pkattnos); } + + /* Find removable columns using unique not null index. */ + if (!use_pkattnos && bms_subset_compare(unique_notnull_attnos, relattnos) == BMS_SUBSET1) + { + if (surplusvars == NULL) + surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) * + (list_length(parse->rtable) + 1)); + + surplusvars[relid] = bms_difference(relattnos, unique_notnull_attnos); + } } /* diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index cf0d432ab1..fca88f6046 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -1195,7 +1195,7 @@ transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_cla * constraints in expandTableLikeClause, so that we skip this for * those. */ - foreach(lc, RelationGetNotNullConstraints(RelationGetRelid(relation), true)) + foreach(lc, RelationGetNotNullConstraints(RelationGetRelid(relation), true, true)) { CookedConstraint *cooked = (CookedConstraint *) lfirst(lc); @@ -1473,7 +1473,7 @@ expandTableLikeClause(RangeVar *heapRel, TableLikeClause *table_like_clause) * Copy not-null constraints, too (these do not require any option to have * been given). */ - foreach(lc, RelationGetNotNullConstraints(RelationGetRelid(relation), false)) + foreach(lc, RelationGetNotNullConstraints(RelationGetRelid(relation), false, true)) { AlterTableCmd *atsubcmd; diff --git a/src/include/catalog/pg_constraint.h b/src/include/catalog/pg_constraint.h index a026b42515..45c86dae4d 100644 --- a/src/include/catalog/pg_constraint.h +++ b/src/include/catalog/pg_constraint.h @@ -251,7 +251,7 @@ extern AttrNumber extractNotNullColumn(HeapTuple constrTup); extern bool AdjustNotNullInheritance1(Oid relid, AttrNumber attnum, int count, bool is_no_inherit); extern void AdjustNotNullInheritance(Oid relid, Bitmapset *columns, int count); -extern List *RelationGetNotNullConstraints(Oid relid, bool cooked); +extern List *RelationGetNotNullConstraints(Oid relid, bool cooked, bool skip_no_inherit); extern void RemoveConstraintById(Oid conId); extern void RenameConstraintById(Oid conId, const char *newname); @@ -269,6 +269,8 @@ extern Oid get_relation_idx_constraint_oid(Oid relationId, Oid indexId); extern Bitmapset *get_primary_key_attnos(Oid relid, bool deferrableOk, Oid *constraintOid); +extern Bitmapset *get_min_unique_not_null_attnos(Oid relid); + extern void DeconstructFkConstraintRow(HeapTuple tuple, int *numfks, AttrNumber *conkey, AttrNumber *confkey, Oid *pf_eq_oprs, Oid *pp_eq_oprs, Oid *ff_eq_oprs, diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index d8271da4d1..3186f2f7d7 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1398,6 +1398,64 @@ drop table t2; drop table t3; drop table p_t1; -- +-- Test removal of redundant GROUP BY columns using unique not null index. +-- materialized view +-- +create temp table t1 (a int, b int, c int, primary key (a, b), unique(c)); +create temp table t2 (a int, b int, c int not null, primary key (a, b), unique(c)); +create temp table t3 (a int, b int, c int not null, d int not null, primary key (a, b), unique(c, d)); +create temp table t4 (a int, b int not null, c int not null, d int not null, primary key (a, b), unique(b, c), unique(d)); +create temp table t5 (a int not null, b int not null, c int not null, d int not null, unique (a, b), unique(b, c), unique(a, c, d)); +-- Test unique index without not null constraint should not be used. +explain (costs off) select * from t1 group by a,b,c; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a, b + -> Seq Scan on t1 +(3 rows) + +-- Test unique not null index beats primary key. +explain (costs off) select * from t2 group by a,b,c; + QUERY PLAN +---------------------- + HashAggregate + Group Key: c + -> Seq Scan on t2 +(3 rows) + +-- Test primary key beats unique not null index. +explain (costs off) select * from t3 group by a,b,c,d; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a, b + -> Seq Scan on t3 +(3 rows) + +-- Test unique not null index with less columns wins. +explain (costs off) select * from t4 group by a,b,c,d; + QUERY PLAN +---------------------- + HashAggregate + Group Key: d + -> Seq Scan on t4 +(3 rows) + +-- Test unique not null indices have overlap. +explain (costs off) select * from t5 group by a,b,c,d; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a, b + -> Seq Scan on t5 +(3 rows) + +drop table t1; +drop table t2; +drop table t3; +drop table t4; +-- -- Test GROUP BY matching of join columns that are type-coerced due to USING -- create temp table t1(f1 int, f2 int); diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 75c78be640..2337281566 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -496,6 +496,36 @@ drop table t2; drop table t3; drop table p_t1; +-- +-- Test removal of redundant GROUP BY columns using unique not null index. +-- materialized view +-- +create temp table t1 (a int, b int, c int, primary key (a, b), unique(c)); +create temp table t2 (a int, b int, c int not null, primary key (a, b), unique(c)); +create temp table t3 (a int, b int, c int not null, d int not null, primary key (a, b), unique(c, d)); +create temp table t4 (a int, b int not null, c int not null, d int not null, primary key (a, b), unique(b, c), unique(d)); +create temp table t5 (a int not null, b int not null, c int not null, d int not null, unique (a, b), unique(b, c), unique(a, c, d)); + +-- Test unique index without not null constraint should not be used. +explain (costs off) select * from t1 group by a,b,c; + +-- Test unique not null index beats primary key. +explain (costs off) select * from t2 group by a,b,c; + +-- Test primary key beats unique not null index. +explain (costs off) select * from t3 group by a,b,c,d; + +-- Test unique not null index with less columns wins. +explain (costs off) select * from t4 group by a,b,c,d; + +-- Test unique not null indices have overlap. +explain (costs off) select * from t5 group by a,b,c,d; + +drop table t1; +drop table t2; +drop table t3; +drop table t4; + -- -- Test GROUP BY matching of join columns that are type-coerced due to USING -- -- 2.34.1