From 2d584f9c67ce6d4e4631f4d9358975bc8ce9e85e Mon Sep 17 00:00:00 2001 From: Tender Wang Date: Wed, 23 Oct 2024 21:41:40 +0800 Subject: [PATCH] Fix wrong result due different collation between columan and partition key. --- src/backend/optimizer/util/plancat.c | 2 +- .../regress/expected/partition_aggregate.out | 75 +++++++++++++++++++ src/test/regress/sql/partition_aggregate.sql | 31 ++++++++ 3 files changed, 107 insertions(+), 1 deletion(-) diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index b913f91ff0..a96a4396f6 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -2537,7 +2537,7 @@ set_baserel_partition_key_exprs(Relation relation, partexpr = (Expr *) makeVar(varno, attno, partkey->parttypid[cnt], partkey->parttypmod[cnt], - partkey->parttypcoll[cnt], 0); + partkey->partcollation[cnt], 0); } else { diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index 5f2c0cf578..4f10066c48 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -1518,3 +1518,78 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 21 | 6000 | 6.0000000000000000 | 1000 (6 rows) +SET enable_partitionwise_aggregate TO true; +SET enable_partitionwise_join TO true; +SET max_parallel_workers_per_gather TO 0; +SET enable_incremental_sort TO off; +CREATE COLLATION case_insensitive ( + provider = icu, + locale = 'und-u-ks-level2', + deterministic = false +); +CREATE TABLE pagg_tab3 (c text collate case_insensitive) PARTITION BY LIST(c collate "C"); +CREATE TABLE pagg_tab3_p1 PARTITION OF pagg_tab3 FOR VALUES IN ('a', 'b', 'c', 'd'); +CREATE TABLE pagg_tab3_p2 PARTITION OF pagg_tab3 FOR VALUES IN ('e', 'f', 'A'); +CREATE TABLE pagg_tab3_p3 PARTITION OF pagg_tab3 FOR VALUES IN ('B', 'C', 'D', 'E'); +INSERT INTO pagg_tab3 SELECT substr('abcdeABCDE', (i % 10) + 1 , 1) FROM generate_series(0, 2999) i; +ANALYZE pagg_tab3; +EXPLAIN (COSTS OFF) +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + QUERY PLAN +-------------------------------------------------------------------- + Sort + Sort Key: (upper(pagg_tab3.c)) COLLATE case_insensitive + -> Finalize GroupAggregate + Group Key: pagg_tab3.c + -> Sort + Sort Key: pagg_tab3.c COLLATE case_insensitive + -> Append + -> Partial HashAggregate + Group Key: pagg_tab3.c + -> Seq Scan on pagg_tab3_p2 pagg_tab3 + -> Partial HashAggregate + Group Key: pagg_tab3_1.c + -> Seq Scan on pagg_tab3_p3 pagg_tab3_1 + -> Partial HashAggregate + Group Key: pagg_tab3_2.c + -> Seq Scan on pagg_tab3_p1 pagg_tab3_2 +(16 rows) + +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + upper | count +-------+------- + A | 600 + B | 600 + C | 600 + D | 600 + E | 600 +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 on t1.c = t2.c; + QUERY PLAN +------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (t1.c = t2.c) + -> Append + -> Seq Scan on pagg_tab3_p2 t1_1 + -> Seq Scan on pagg_tab3_p3 t1_2 + -> Seq Scan on pagg_tab3_p1 t1_3 + -> Hash + -> Append + -> Seq Scan on pagg_tab3_p2 t2_1 + -> Seq Scan on pagg_tab3_p3 t2_2 + -> Seq Scan on pagg_tab3_p1 t2_3 +(12 rows) + +SELECT count(*) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 on t1.c = t2.c; + count +--------- + 1800000 +(1 row) + +RESET enable_partitionwise_aggregate; +RESET enable_partitionwise_join; +RESET max_parallel_workers_per_gather; +RESET enable_incremental_sort; diff --git a/src/test/regress/sql/partition_aggregate.sql b/src/test/regress/sql/partition_aggregate.sql index ab070fee24..3c0c65de67 100644 --- a/src/test/regress/sql/partition_aggregate.sql +++ b/src/test/regress/sql/partition_aggregate.sql @@ -334,3 +334,34 @@ RESET parallel_setup_cost; EXPLAIN (COSTS OFF) SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + +SET enable_partitionwise_aggregate TO true; +SET enable_partitionwise_join TO true; +SET max_parallel_workers_per_gather TO 0; +SET enable_incremental_sort TO off; + +CREATE COLLATION case_insensitive ( + provider = icu, + locale = 'und-u-ks-level2', + deterministic = false +); + +CREATE TABLE pagg_tab3 (c text collate case_insensitive) PARTITION BY LIST(c collate "C"); +CREATE TABLE pagg_tab3_p1 PARTITION OF pagg_tab3 FOR VALUES IN ('a', 'b', 'c', 'd'); +CREATE TABLE pagg_tab3_p2 PARTITION OF pagg_tab3 FOR VALUES IN ('e', 'f', 'A'); +CREATE TABLE pagg_tab3_p3 PARTITION OF pagg_tab3 FOR VALUES IN ('B', 'C', 'D', 'E'); +INSERT INTO pagg_tab3 SELECT substr('abcdeABCDE', (i % 10) + 1 , 1) FROM generate_series(0, 2999) i; +ANALYZE pagg_tab3; +EXPLAIN (COSTS OFF) +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 on t1.c = t2.c; + +SELECT count(*) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 on t1.c = t2.c; + +RESET enable_partitionwise_aggregate; +RESET enable_partitionwise_join; +RESET max_parallel_workers_per_gather; +RESET enable_incremental_sort; \ No newline at end of file -- 2.25.1