From b4fa71bef855976d65427010dbf71fdf872a609b Mon Sep 17 00:00:00 2001 From: hanweibo Date: Thu, 8 Aug 2024 19:17:18 +0800 Subject: [PATCH] fix-group_by_has_partkey-bug-and-add-regress-test --- src/backend/optimizer/plan/planner.c | 25 +++- .../regress/expected/partition_aggregate.out | 124 ++++++++++++++++++ src/test/regress/sql/partition_aggregate.sql | 40 ++++++ 3 files changed, 186 insertions(+), 3 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 948afd9094..d04e622c24 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8025,13 +8025,32 @@ group_by_has_partkey(RelOptInfo *input_rel, foreach(lc, partexprs) { + ListCell *lg; Expr *partexpr = lfirst(lc); + Oid partcoll = input_rel->part_scheme->partcollation[cnt]; - if (list_member(groupexprs, partexpr)) + foreach (lg, groupexprs) { - found = true; - break; + Expr *groupexpr = lfirst(lg); + Oid groupcoll = exprCollation((Node *) groupexpr); + + /* + * GROUP BY xx COLLATE xxx + * Set collation in group by clause using COLLATE syntax, + * AND the collation IS NOT equal to column's default collation. + */ + if (IsA(groupexpr, RelabelType)) + groupexpr = ((RelabelType *) groupexpr)->arg; + + if (equal(groupexpr, partexpr) && partcoll == groupcoll) + { + found = true; + break; + } } + + if (found) + break; } /* diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index 5f2c0cf578..38cc35f9f3 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -1518,3 +1518,127 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 21 | 6000 | 6.0000000000000000 | 1000 (6 rows) +-- Add collation check between groupkey and partkey. +-- If groupkey's collation is not equal to partkey's, we cannot use PARTITIONWISE_AGGREGATE_FULL. +SET enable_partitionwise_aggregate TO true; +SET enable_partitionwise_join TO true; +SET max_parallel_workers_per_gather TO 0; +SET enable_incremental_sort TO off; +CREATE COLLATION case_insensitive ( + provider = icu, + locale = 'und-u-ks-level2', + deterministic = false +); +CREATE TABLE pagg_tab3 (c text collate case_insensitive) PARTITION BY LIST(c collate "C"); +CREATE TABLE pagg_tab3_p1 PARTITION OF pagg_tab3 FOR VALUES IN ('a', 'b', 'c', 'd'); +CREATE TABLE pagg_tab3_p2 PARTITION OF pagg_tab3 FOR VALUES IN ('e', 'f', 'A'); +CREATE TABLE pagg_tab3_p3 PARTITION OF pagg_tab3 FOR VALUES IN ('B', 'C', 'D', 'E'); +INSERT INTO pagg_tab3 SELECT substr('abcdeABCDE', (i % 10) +1 , 1) FROM generate_series(0, 2999) i; +ANALYZE pagg_tab3; +EXPLAIN (COSTS OFF) +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + QUERY PLAN +-------------------------------------------------------------------- + Sort + Sort Key: (upper(pagg_tab3.c)) COLLATE case_insensitive + -> Finalize GroupAggregate + Group Key: pagg_tab3.c + -> Sort + Sort Key: pagg_tab3.c COLLATE case_insensitive + -> Append + -> Partial HashAggregate + Group Key: pagg_tab3.c + -> Seq Scan on pagg_tab3_p2 pagg_tab3 + -> Partial HashAggregate + Group Key: pagg_tab3_1.c + -> Seq Scan on pagg_tab3_p3 pagg_tab3_1 + -> Partial HashAggregate + Group Key: pagg_tab3_2.c + -> Seq Scan on pagg_tab3_p1 pagg_tab3_2 +(16 rows) + +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + upper | count +-------+------- + A | 600 + B | 600 + C | 600 + D | 600 + E | 600 +(5 rows) + +-- group key's collation not equal to column's, but equal to partkey's. We can also use PARTITIONWISE_AGGREGATE_FULL. +EXPLAIN (COSTS OFF) +SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1; + QUERY PLAN +-------------------------------------------------------- + Sort + Sort Key: ((pagg_tab3.c)::text) COLLATE "C" + -> Append + -> HashAggregate + Group Key: (pagg_tab3.c)::text + -> Seq Scan on pagg_tab3_p2 pagg_tab3 + -> HashAggregate + Group Key: (pagg_tab3_1.c)::text + -> Seq Scan on pagg_tab3_p3 pagg_tab3_1 + -> HashAggregate + Group Key: (pagg_tab3_2.c)::text + -> Seq Scan on pagg_tab3_p1 pagg_tab3_2 +(12 rows) + +SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1; + c | count +---+------- + A | 300 + B | 300 + C | 300 + D | 300 + E | 300 + a | 300 + b | 300 + c | 300 + d | 300 + e | 300 +(10 rows) + +-- pushdown having filter to sub table to improve performance +EXPLAIN (COSTS OFF) +SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" HAVING count(c) > 1 ORDER BY 1; + QUERY PLAN +-------------------------------------------------------- + Sort + Sort Key: ((pagg_tab3.c)::text) COLLATE "C" + -> Append + -> HashAggregate + Group Key: (pagg_tab3.c)::text + Filter: (count(pagg_tab3.c) > 1) + -> Seq Scan on pagg_tab3_p2 pagg_tab3 + -> HashAggregate + Group Key: (pagg_tab3_1.c)::text + Filter: (count(pagg_tab3_1.c) > 1) + -> Seq Scan on pagg_tab3_p3 pagg_tab3_1 + -> HashAggregate + Group Key: (pagg_tab3_2.c)::text + Filter: (count(pagg_tab3_2.c) > 1) + -> Seq Scan on pagg_tab3_p1 pagg_tab3_2 +(15 rows) + +SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" HAVING count(c) > 1 ORDER BY 1; + c | count +---+------- + A | 300 + B | 300 + C | 300 + D | 300 + E | 300 + a | 300 + b | 300 + c | 300 + d | 300 + e | 300 +(10 rows) + +RESET enable_partitionwise_aggregate; +RESET enable_partitionwise_join; +RESET max_parallel_workers_per_gather; +RESET enable_incremental_sort; diff --git a/src/test/regress/sql/partition_aggregate.sql b/src/test/regress/sql/partition_aggregate.sql index ab070fee24..007d677198 100644 --- a/src/test/regress/sql/partition_aggregate.sql +++ b/src/test/regress/sql/partition_aggregate.sql @@ -334,3 +334,43 @@ RESET parallel_setup_cost; EXPLAIN (COSTS OFF) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + + +-- Add collation check between groupkey and partkey. +-- If groupkey's collation is not equal to partkey's, we cannot use PARTITIONWISE_AGGREGATE_FULL. +SET enable_partitionwise_aggregate TO true; +SET enable_partitionwise_join TO true; +SET max_parallel_workers_per_gather TO 0; +SET enable_incremental_sort TO off; + +CREATE COLLATION case_insensitive ( + provider = icu, + locale = 'und-u-ks-level2', + deterministic = false +); + +CREATE TABLE pagg_tab3 (c text collate case_insensitive) PARTITION BY LIST(c collate "C"); +CREATE TABLE pagg_tab3_p1 PARTITION OF pagg_tab3 FOR VALUES IN ('a', 'b', 'c', 'd'); +CREATE TABLE pagg_tab3_p2 PARTITION OF pagg_tab3 FOR VALUES IN ('e', 'f', 'A'); +CREATE TABLE pagg_tab3_p3 PARTITION OF pagg_tab3 FOR VALUES IN ('B', 'C', 'D', 'E'); +INSERT INTO pagg_tab3 SELECT substr('abcdeABCDE', (i % 10) +1 , 1) FROM generate_series(0, 2999) i; +ANALYZE pagg_tab3; + +EXPLAIN (COSTS OFF) +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + +-- group key's collation not equal to column's, but equal to partkey's. We can also use PARTITIONWISE_AGGREGATE_FULL. +EXPLAIN (COSTS OFF) +SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1; +SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1; + +-- pushdown having filter to sub table to improve performance +EXPLAIN (COSTS OFF) +SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" HAVING count(c) > 1 ORDER BY 1; +SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" HAVING count(c) > 1 ORDER BY 1; + +RESET enable_partitionwise_aggregate; +RESET enable_partitionwise_join; +RESET max_parallel_workers_per_gather; +RESET enable_incremental_sort; -- 2.25.1