From f3bd854e7ee04653a162c8980171fb4591813901 Mon Sep 17 00:00:00 2001 From: Amit Langote Date: Fri, 1 Nov 2024 15:32:33 +0900 Subject: [PATCH v5 2/2] Disallow partitionwise join when collation doesn't match Insist that the collation used for joining matches exactly with the collation used for partitioning to allow using partitionwise join. Reported-by: Tender Wang Author: Jian He Author: Amit Langote Reviewed-by: Tender Wang Reviewed-by: Junwang Zhao Discussion: https://postgr.es/m/18568-2a9afb6b9f7e6ed3@postgresql.org Discussion: https://postgr.es/m/tencent_9D9103CDA420C07768349CC1DFF88465F90A@qq.com Discussion: https://postgr.es/m/CAHewXNno_HKiQ6PqyLYfuqDtwp7KKHZiH1J7Pqyz0nr+PS2Dwg@mail.gmail.com --- src/backend/optimizer/util/relnode.c | 28 +++- .../regress/expected/collate.icu.utf8.out | 154 ++++++++++++++++++ src/test/regress/sql/collate.icu.utf8.sql | 35 ++++ 3 files changed, 215 insertions(+), 2 deletions(-) diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index d7266e4cdb..489fe6c26f 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -2185,6 +2185,10 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, if (pk_known_equal[ipk1]) continue; + /* Reject if the partition key collation differs from the clause's. */ + if (rel1->part_scheme->partcollation[ipk1] != opexpr->inputcollid) + return false; + /* * The clause allows partitionwise join only if it uses the same * operator family as that specified by the partition key. @@ -2258,6 +2262,8 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, { Node *expr1 = (Node *) lfirst(lc); ListCell *lc2; + Oid partcoll1 = rel1->part_scheme->partcollation[ipk]; + Oid exprcoll1 = exprCollation(expr1); foreach(lc2, rel2->partexprs[ipk]) { @@ -2265,8 +2271,26 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel, if (exprs_known_equal(root, expr1, expr2, btree_opfamily)) { - pk_known_equal[ipk] = true; - break; + /* + * Ensure that the collation of the expression matches + * that of the partition key. Checking just one collation + * (partcoll1 and exprcoll1) suffices because partcoll1 + * and partcoll2, as well as exprcoll1 and exprcoll2, + * should be identical. This holds because both rel1 and + * rel2 use the same PartitionScheme and expr1 and expr2 + * are equal. + */ + if (partcoll1 == exprcoll1) + { + Oid partcoll2 PG_USED_FOR_ASSERTS_ONLY = + rel1->part_scheme->partcollation[ipk]; + Oid exprcoll2 PG_USED_FOR_ASSERTS_ONLY = + exprCollation(expr2); + + Assert(partcoll2 == exprcoll2); + pk_known_equal[ipk] = true; + break; + } } } if (pk_known_equal[ipk]) diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 1ec9a2548b..e6af9b405f 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -2164,7 +2164,161 @@ SELECT a, count(c collate "C") FROM pagg_tab3 GROUP BY a ORDER BY 1; 4 | 3 (4 rows) +-- Partitionwise join should not be allowed too when the collation used by the +-- join keys doesn't match the partition key collation. +SET enable_partitionwise_join TO false; +EXPLAIN (COSTS OFF) +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY 1; + QUERY PLAN +------------------------------------------------------------- + Sort + Sort Key: t1.c COLLATE case_insensitive + -> HashAggregate + Group Key: t1.c + -> Hash Join + Hash Cond: (t1.c = t2.c) + -> Append + -> Seq Scan on pagg_tab3_p2 t1_1 + -> Seq Scan on pagg_tab3_p1 t1_2 + -> Hash + -> Append + -> Seq Scan on pagg_tab3_p2 t2_1 + -> Seq Scan on pagg_tab3_p1 t2_2 +(13 rows) + +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY 1; + c | count +---+------- + A | 36 + B | 36 +(2 rows) + +SET enable_partitionwise_join TO true; +EXPLAIN (COSTS OFF) +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY 1; + QUERY PLAN +------------------------------------------------------------- + Sort + Sort Key: t1.c COLLATE case_insensitive + -> HashAggregate + Group Key: t1.c + -> Hash Join + Hash Cond: (t1.c = t2.c) + -> Append + -> Seq Scan on pagg_tab3_p2 t1_1 + -> Seq Scan on pagg_tab3_p1 t1_2 + -> Hash + -> Append + -> Seq Scan on pagg_tab3_p2 t2_1 + -> Seq Scan on pagg_tab3_p1 t2_2 +(13 rows) + +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY 1; + c | count +---+------- + A | 36 + B | 36 +(2 rows) + +-- OK when the join clause uses the same collation as the partition key. +EXPLAIN (COSTS OFF) +SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY 1; + QUERY PLAN +------------------------------------------------------------------ + Sort + Sort Key: ((t1.c)::text) COLLATE "C" + -> Append + -> HashAggregate + Group Key: (t1.c)::text + -> Hash Join + Hash Cond: ((t1.c)::text = (t2.c)::text) + -> Seq Scan on pagg_tab3_p2 t1 + -> Hash + -> Seq Scan on pagg_tab3_p2 t2 + -> HashAggregate + Group Key: (t1_1.c)::text + -> Hash Join + Hash Cond: ((t1_1.c)::text = (t2_1.c)::text) + -> Seq Scan on pagg_tab3_p1 t1_1 + -> Hash + -> Seq Scan on pagg_tab3_p1 t2_1 +(17 rows) + +SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY 1; + c | count +---+------- + A | 9 + B | 9 + a | 9 + b | 9 +(4 rows) + +-- Another case where the partition keys are matched via equivalence class, +-- not a join restriction clause. +CREATE TABLE pagg_tab4 (c text collate case_insensitive, b text collate case_insensitive) PARTITION BY LIST(b collate "C"); +CREATE TABLE pagg_tab4_p1 PARTITION OF pagg_tab4 FOR VALUES IN ('a', 'b'); +CREATE TABLE pagg_tab4_p2 PARTITION OF pagg_tab4 FOR VALUES IN ('B', 'A'); +INSERT INTO pagg_tab4 (b, c) SELECT substr('abAB', (i % 4) + 1 , 1), substr('abAB', (i % 2) + 1 , 1) FROM generate_series(0, 11) i; +ANALYZE pagg_tab4; +EXPLAIN (COSTS OFF) +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab4 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY 1; + QUERY PLAN +------------------------------------------------------------- + Sort + Sort Key: t1.c COLLATE case_insensitive + -> HashAggregate + Group Key: t1.c + -> Hash Join + Hash Cond: (t1.c = t2.c) + -> Append + -> Seq Scan on pagg_tab3_p2 t1_1 + -> Seq Scan on pagg_tab3_p1 t1_2 + -> Hash + -> Append + -> Seq Scan on pagg_tab4_p2 t2_1 + Filter: (c = b) + -> Seq Scan on pagg_tab4_p1 t2_2 + Filter: (c = b) +(15 rows) + +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab4 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY 1; + c | count +---+------- + A | 36 + B | 36 +(2 rows) + +-- OK when the join clause uses the same collation as the partition key +EXPLAIN (COSTS OFF) +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab4 t2 ON t1.c = t2.c AND t1.c = t2.b COLLATE "C" GROUP BY 1 ORDER BY 1; + QUERY PLAN +------------------------------------------------------------------------------------------ + Sort + Sort Key: t1.c COLLATE case_insensitive + -> HashAggregate + Group Key: t1.c + -> Append + -> Hash Join + Hash Cond: ((t1_1.c = t2_1.c) AND ((t1_1.c)::text = (t2_1.b)::text)) + -> Seq Scan on pagg_tab3_p2 t1_1 + -> Hash + -> Seq Scan on pagg_tab4_p2 t2_1 + -> Hash Join + Hash Cond: ((t1_2.c = t2_2.c) AND ((t1_2.c)::text = (t2_2.b)::text)) + -> Seq Scan on pagg_tab3_p1 t1_2 + -> Hash + -> Seq Scan on pagg_tab4_p1 t2_2 +(15 rows) + +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab4 t2 ON t1.c = t2.c AND t1.c = t2.b COLLATE "C" GROUP BY 1 ORDER BY 1; + c | count +---+------- + A | 18 + B | 18 +(2 rows) + DROP TABLE pagg_tab3; +DROP TABLE pagg_tab4; RESET enable_partitionwise_aggregate; RESET max_parallel_workers_per_gather; RESET enable_incremental_sort; diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 792bc94361..407e5e5b21 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -834,7 +834,42 @@ EXPLAIN (COSTS OFF) SELECT a, count(c collate "C") FROM pagg_tab3 GROUP BY a ORDER BY 1; SELECT a, count(c collate "C") FROM pagg_tab3 GROUP BY a ORDER BY 1; +-- Partitionwise join should not be allowed too when the collation used by the +-- join keys doesn't match the partition key collation. +SET enable_partitionwise_join TO false; +EXPLAIN (COSTS OFF) +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY 1; +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY 1; + +SET enable_partitionwise_join TO true; +EXPLAIN (COSTS OFF) +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY 1; +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY 1; + +-- OK when the join clause uses the same collation as the partition key. +EXPLAIN (COSTS OFF) +SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY 1; +SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY 1; + +-- Another case where the partition keys are matched via equivalence class, +-- not a join restriction clause. +CREATE TABLE pagg_tab4 (c text collate case_insensitive, b text collate case_insensitive) PARTITION BY LIST(b collate "C"); +CREATE TABLE pagg_tab4_p1 PARTITION OF pagg_tab4 FOR VALUES IN ('a', 'b'); +CREATE TABLE pagg_tab4_p2 PARTITION OF pagg_tab4 FOR VALUES IN ('B', 'A'); +INSERT INTO pagg_tab4 (b, c) SELECT substr('abAB', (i % 4) + 1 , 1), substr('abAB', (i % 2) + 1 , 1) FROM generate_series(0, 11) i; +ANALYZE pagg_tab4; + +EXPLAIN (COSTS OFF) +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab4 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY 1; +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab4 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY 1; + +-- OK when the join clause uses the same collation as the partition key +EXPLAIN (COSTS OFF) +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab4 t2 ON t1.c = t2.c AND t1.c = t2.b COLLATE "C" GROUP BY 1 ORDER BY 1; +SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab4 t2 ON t1.c = t2.c AND t1.c = t2.b COLLATE "C" GROUP BY 1 ORDER BY 1; + DROP TABLE pagg_tab3; +DROP TABLE pagg_tab4; RESET enable_partitionwise_aggregate; RESET max_parallel_workers_per_gather; -- 2.43.0