From 1a7fd763d5863be392b8b73e932c0145714cdbe7 Mon Sep 17 00:00:00 2001 From: hanweibo Date: Wed, 7 Aug 2024 09:25:34 +0800 Subject: [PATCH v4] fix group_by_has_partkey bug and add regress test case --- src/backend/optimizer/plan/planner.c | 24 +++++++-- .../regress/expected/partition_aggregate.out | 53 +++++++++++++++++++ src/test/regress/sql/partition_aggregate.sql | 29 ++++++++++ 3 files changed, 103 insertions(+), 3 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 948afd9094..88a8937b43 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -7993,6 +7993,10 @@ create_partitionwise_grouping_paths(PlannerInfo *root, } } +/* XXX see PartCollMatchesExprColl */ +#define PartKeyCollMatchesExprColl(partcoll, exprcoll) \ + ((partcoll) == InvalidOid || (partcoll) == (exprcoll)) + /* * group_by_has_partkey * @@ -8025,13 +8029,27 @@ group_by_has_partkey(RelOptInfo *input_rel, foreach(lc, partexprs) { + ListCell *lg; Expr *partexpr = lfirst(lc); + Oid partcoll = input_rel->part_scheme->partcollation[cnt]; - if (list_member(groupexprs, partexpr)) + foreach (lg, groupexprs) { - found = true; - break; + Expr *groupexpr = lfirst(lg); + Oid groupexpr_coll = exprCollation((Node *) groupexpr); + + /* + * Partition key match also requires collation match. + */ + if (equal(groupexpr, partexpr) && + PartKeyCollMatchesExprColl(partcoll, groupexpr_coll)) + { + found = true; + break; + } } + if (found) + break; } /* diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index 5f2c0cf578..bc4a1987a1 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -1518,3 +1518,56 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 21 | 6000 | 6.0000000000000000 | 1000 (6 rows) +-- Add collation check between groupkey and partkey. +-- If groupkey's collation is not equal to partkey's, we cannot use PARTITIONWISE_AGGREGATE_FULL. +SET enable_partitionwise_aggregate TO true; +SET enable_partitionwise_join TO true; +SET max_parallel_workers_per_gather TO 0; +SET enable_incremental_sort TO off; +CREATE COLLATION case_insensitive ( + provider = icu, + locale = 'und-u-ks-level2', + deterministic = false +); +CREATE TABLE pagg_tab3 (c text collate case_insensitive) PARTITION BY LIST(c collate "C"); +CREATE TABLE pagg_tab3_p1 PARTITION OF pagg_tab3 FOR VALUES IN ('a', 'b', 'c', 'd'); +CREATE TABLE pagg_tab3_p2 PARTITION OF pagg_tab3 FOR VALUES IN ('e', 'f', 'A'); +CREATE TABLE pagg_tab3_p3 PARTITION OF pagg_tab3 FOR VALUES IN ('B', 'C', 'D', 'E'); +INSERT INTO pagg_tab3 SELECT substr('abcdeABCDE', (i % 10) +1 , 1) FROM generate_series(0, 2999) i; +ANALYZE pagg_tab3; +EXPLAIN (COSTS OFF) +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + QUERY PLAN +-------------------------------------------------------------------- + Sort + Sort Key: (upper(pagg_tab3.c)) COLLATE case_insensitive + -> Finalize GroupAggregate + Group Key: pagg_tab3.c + -> Sort + Sort Key: pagg_tab3.c COLLATE case_insensitive + -> Append + -> Partial HashAggregate + Group Key: pagg_tab3.c + -> Seq Scan on pagg_tab3_p2 pagg_tab3 + -> Partial HashAggregate + Group Key: pagg_tab3_1.c + -> Seq Scan on pagg_tab3_p3 pagg_tab3_1 + -> Partial HashAggregate + Group Key: pagg_tab3_2.c + -> Seq Scan on pagg_tab3_p1 pagg_tab3_2 +(16 rows) + +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + upper | count +-------+------- + A | 600 + B | 600 + C | 600 + D | 600 + E | 600 +(5 rows) + +RESET enable_partitionwise_aggregate; +RESET enable_partitionwise_join; +RESET max_parallel_workers_per_gather; +RESET enable_incremental_sort; diff --git a/src/test/regress/sql/partition_aggregate.sql b/src/test/regress/sql/partition_aggregate.sql index ab070fee24..8f2de9ee96 100644 --- a/src/test/regress/sql/partition_aggregate.sql +++ b/src/test/regress/sql/partition_aggregate.sql @@ -334,3 +334,32 @@ RESET parallel_setup_cost; EXPLAIN (COSTS OFF) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + + +-- Add collation check between groupkey and partkey. +-- If groupkey's collation is not equal to partkey's, we cannot use PARTITIONWISE_AGGREGATE_FULL. +SET enable_partitionwise_aggregate TO true; +SET enable_partitionwise_join TO true; +SET max_parallel_workers_per_gather TO 0; +SET enable_incremental_sort TO off; + +CREATE COLLATION case_insensitive ( + provider = icu, + locale = 'und-u-ks-level2', + deterministic = false +); + +CREATE TABLE pagg_tab3 (c text collate case_insensitive) PARTITION BY LIST(c collate "C"); +CREATE TABLE pagg_tab3_p1 PARTITION OF pagg_tab3 FOR VALUES IN ('a', 'b', 'c', 'd'); +CREATE TABLE pagg_tab3_p2 PARTITION OF pagg_tab3 FOR VALUES IN ('e', 'f', 'A'); +CREATE TABLE pagg_tab3_p3 PARTITION OF pagg_tab3 FOR VALUES IN ('B', 'C', 'D', 'E'); +INSERT INTO pagg_tab3 SELECT substr('abcdeABCDE', (i % 10) +1 , 1) FROM generate_series(0, 2999) i; +ANALYZE pagg_tab3; +EXPLAIN (COSTS OFF) +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + +RESET enable_partitionwise_aggregate; +RESET enable_partitionwise_join; +RESET max_parallel_workers_per_gather; +RESET enable_incremental_sort; -- 2.34.1