From 11601360a4471cd6fafffb91e64b6c34d2b99c1c Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Thu, 28 Dec 2023 18:52:11 +0800 Subject: [PATCH v1] Fix the issue that SJE mistakenly omits qual clauses When the SJE code handles the transfer of qual clauses from the removed relation to the remaining one, it replaces the Vars of the removed relation with the Vars of the remaining relation for each clause, and then reintegrates these clauses into the appropriate restriction or join clause lists, while attempting to avoid duplicates. However, the code compares RestrictInfo->clause to determine if two clauses are duplicates. This is just flat wrong. Two RestrictInfos with the same clause can have different required_relids, incompatible_relids, is_pushed_down and so on. This can cause qual clauses to be mistakenly omited, leading to wrong result. This patch fixes it by comparing the entire RestrictInfos not just their clauses. To make it work, this patch makes 'rinfo_serial' equal_ignore. --- src/backend/optimizer/plan/analyzejoins.c | 4 ++-- src/include/nodes/pathnodes.h | 2 +- src/test/regress/expected/join.out | 27 ++++++++++++++++++++++- src/test/regress/sql/join.sql | 18 ++++++++++++++- 4 files changed, 46 insertions(+), 5 deletions(-) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 80739451b7..1f035f2743 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -1760,7 +1760,7 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark, if (src == rinfo || (rinfo->parent_ec != NULL && src->parent_ec == rinfo->parent_ec) - || equal(rinfo->clause, src->clause)) + || equal(rinfo, src)) { is_redundant = true; break; @@ -1788,7 +1788,7 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark, if (src == rinfo || (rinfo->parent_ec != NULL && src->parent_ec == rinfo->parent_ec) - || equal(rinfo->clause, src->clause)) + || equal(rinfo, src)) { is_redundant = true; break; diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index ed85dc7414..c791ca0c3f 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -2595,7 +2595,7 @@ typedef struct RestrictInfo * of redundant pushed-down equality clauses. *---------- */ - int rinfo_serial; + int rinfo_serial pg_node_attr(equal_ignore); /* * Generating EquivalenceClass. This field is NULL unless clause is diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 1557e17299..45259e08e2 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -6742,7 +6742,7 @@ explain (costs off) select 1 from reset join_collapse_limit; reset enable_seqscan; -- Check that clauses from the join filter list is not lost on the self-join removal -CREATE TABLE emp1 ( id SERIAL PRIMARY KEY NOT NULL, code int); +CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int); explain (verbose, costs off) SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code; QUERY PLAN @@ -6868,6 +6868,31 @@ select * from emp1 t1 -> Seq Scan on emp1 t3 (6 rows) +-- Check that SJE does not mistakenly omit qual clauses +insert into emp1 values (1, 1); +explain (costs off) +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + ?column? +---------- +(0 rows) + -- We can remove the join even if we find the join can't duplicate rows and -- the base quals of each side are different. In the following case we end up -- moving quals over to s1 to make it so it can't match any rows. diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index fed9e83e31..b48eb701c8 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -2568,7 +2568,7 @@ reset join_collapse_limit; reset enable_seqscan; -- Check that clauses from the join filter list is not lost on the self-join removal -CREATE TABLE emp1 ( id SERIAL PRIMARY KEY NOT NULL, code int); +CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int); explain (verbose, costs off) SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code; @@ -2616,6 +2616,22 @@ select * from emp1 t1 inner join emp1 t2 on t1.id = t2.id left join emp1 t3 on t1.id > 1 and t1.id < 2; +-- Check that SJE does not mistakenly omit qual clauses +insert into emp1 values (1, 1); +explain (costs off) +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + -- We can remove the join even if we find the join can't duplicate rows and -- the base quals of each side are different. In the following case we end up -- moving quals over to s1 to make it so it can't match any rows. -- 2.31.0