From 19d091c74a8a37f6572db3e13f04b222b284e906 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 26 Aug 2025 12:50:27 +0900 Subject: [PATCH v4] fix variable not found in subplan target lists --- src/backend/optimizer/path/equivclass.c | 87 +++++++++++++++++++++++++ src/backend/optimizer/plan/planner.c | 49 +++++++++++--- src/include/optimizer/paths.h | 2 + src/test/regress/expected/join.out | 62 ++++++++++++++++++ src/test/regress/sql/join.sql | 26 ++++++++ 5 files changed, 218 insertions(+), 8 deletions(-) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 441f12f6c50..bebbc914618 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -2690,6 +2690,93 @@ exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2, Oid opfamily) return false; } +/* + * expr_known_equal_to_constant + * Detect whether the given expression is known equal to a constant due to + * equivalence relationships. + * + * "sortop" is the OID of the ordering operator. + */ +bool +expr_known_equal_to_constant(PlannerInfo *root, Expr *expr, Oid sortop) +{ + Oid opfamily, + opcintype, + collation; + CompareType cmptype; + Oid equality_op; + List *opfamilies; + ListCell *lc; + + /* Find the operator in pg_amop --- failure shouldn't happen */ + if (!get_ordering_op_properties(sortop, + &opfamily, &opcintype, &cmptype)) + elog(ERROR, "operator %u is not a valid ordering operator", + sortop); + + collation = exprCollation((Node *) expr); + + /* + * EquivalenceClasses need to contain opfamily lists based on the family + * membership of mergejoinable equality operators, which could belong to + * more than one opfamily. So we have to look up the opfamily's equality + * operator and get its membership. + */ + equality_op = get_opfamily_member_for_cmptype(opfamily, + opcintype, + opcintype, + COMPARE_EQ); + if (!OidIsValid(equality_op)) /* shouldn't happen */ + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + COMPARE_EQ, opcintype, opcintype, opfamily); + opfamilies = get_mergejoin_opfamilies(equality_op); + if (!opfamilies) /* certainly should find some */ + elog(ERROR, "could not find opfamilies for equality operator %u", + equality_op); + + /* + * Ensure the expression exposes the correct type and collation. + */ + expr = canonicalize_ec_expression(expr, opcintype, collation); + + /* + * Scan through the existing EquivalenceClasses for a match + */ + foreach(lc, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc); + EquivalenceMemberIterator it; + EquivalenceMember *cur_em; + + /* Ignore EC unless it contains pseudoconstants */ + if (!cur_ec->ec_has_const) + continue; + + /* Never match to a volatile EC */ + if (cur_ec->ec_has_volatile) + continue; + + if (collation != cur_ec->ec_collation) + continue; + if (!equal(opfamilies, cur_ec->ec_opfamilies)) + continue; + + setup_eclass_member_iterator(&it, cur_ec, NULL); + while ((cur_em = eclass_member_iterator_next(&it)) != NULL) + { + /* Ignore child members */ + if (cur_em->em_is_child) + continue; + + if (opcintype == cur_em->em_datatype && + equal(expr, cur_em->em_expr)) + return true; /* Match! */ + } + } + + return false; +} + /* * match_eclasses_to_foreign_key_col diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 65f17101591..1ac9cc869f8 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8284,8 +8284,8 @@ generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist) * the needs of the semijoin represented by sjinfo. If it is not possible * to identify how to make the data unique, NULL is returned. * - * If used at all, this is likely to be called repeatedly on the same rel; - * So we cache the result. + * If used at all, this is likely to be called repeatedly on the same rel, + * so we cache the result. */ RelOptInfo * create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) @@ -8294,6 +8294,10 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) List *sortPathkeys = NIL; List *groupClause = NIL; MemoryContext oldcontext; + List *semi_rhs_exprs = NIL; + List *semi_operators = NIL; + ListCell *lc1; + ListCell *lc2; /* Caller made a mistake if SpecialJoinInfo is the wrong one */ Assert(sjinfo->jointype == JOIN_SEMI); @@ -8307,6 +8311,39 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash)) return NULL; + /* + * To complicate matters, some of the values to be unique-ified may be + * known redundant by the EquivalenceClass machinery (e.g., because they + * have been equated to constants). There is no need to compare such + * values during unique-ification, and indeed we had better not try + * because the Vars involved may not have propagated as high as the + * semijoin's level. + */ + forboth(lc1, sjinfo->semi_rhs_exprs, lc2, sjinfo->semi_operators) + { + Expr *uniqexpr = lfirst(lc1); + Oid in_oper = lfirst_oid(lc2); + Oid sortop; + + sortop = get_ordering_op_for_equality_op(in_oper, false); + + if (OidIsValid(sortop)) + { + if (expr_known_equal_to_constant(root, uniqexpr, sortop)) + continue; + } + else if (sjinfo->semi_can_btree) /* shouldn't happen */ + elog(ERROR, "could not find ordering operator for equality operator %u", + in_oper); + + semi_rhs_exprs = lappend(semi_rhs_exprs, uniqexpr); + semi_operators = lappend_oid(semi_operators, in_oper); + } + + /* If there are no columns left to unique-ify, return NULL. */ + if (semi_rhs_exprs == NIL) + return NULL; + /* * When called during GEQO join planning, we are in a short-lived memory * context. We must make sure that the unique rel and any subsidiary data @@ -8367,8 +8404,6 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) List *newtlist; int nextresno; List *sortList = NIL; - ListCell *lc1; - ListCell *lc2; /* * The values we are supposed to unique-ify may be expressions in the @@ -8382,7 +8417,7 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) newtlist = make_tlist_from_pathtarget(rel->reltarget); nextresno = list_length(newtlist) + 1; - forboth(lc1, sjinfo->semi_rhs_exprs, lc2, sjinfo->semi_operators) + forboth(lc1, semi_rhs_exprs, lc2, semi_operators) { Expr *uniqexpr = lfirst(lc1); Oid in_oper = lfirst_oid(lc2); @@ -8407,9 +8442,7 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) SortGroupClause *sortcl; sortop = get_ordering_op_for_equality_op(in_oper, false); - if (!OidIsValid(sortop)) /* shouldn't happen */ - elog(ERROR, "could not find ordering operator for equality operator %u", - in_oper); + Assert(OidIsValid(sortop)); /* * The Unique node will need equality operators. Normally diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index cbade77b717..db47bb88eb1 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -159,6 +159,8 @@ extern List *generate_join_implied_equalities_for_ecs(PlannerInfo *root, RelOptInfo *inner_rel); extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2, Oid opfamily); +extern bool expr_known_equal_to_constant(PlannerInfo *root, Expr *expr, + Oid sortop); extern EquivalenceClass *match_eclasses_to_foreign_key_col(PlannerInfo *root, ForeignKeyOptInfo *fkinfo, int colno); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 98b05c94a11..054bcb20596 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -6500,6 +6500,68 @@ where t1.a = s.c; ---------- (0 rows) +rollback; +-- check handling of semijoins after join removal: we must suppress +-- unique-ification of known-constant values +begin; +create temp table t (a int unique, b int); +insert into t values (1, 2); +explain (verbose, costs off) +select t1.a from t t1 + left join t t2 on t1.a = t2.a + join t t3 on true +where exists (select 1 from t t4 + join t t5 on t4.b = t5.b + join t t6 on t5.b = t6.b + where t1.a = t4.a and t3.a = t5.a and t4.a = 1); + QUERY PLAN +------------------------------------------------------------------------------------ + Nested Loop + Output: t1.a + Inner Unique: true + -> Nested Loop + Output: t1.a, t5.a + -> Index Only Scan using t_a_key on pg_temp.t t1 + Output: t1.a + Index Cond: (t1.a = 1) + -> HashAggregate + Output: t5.a + Group Key: t5.a + -> Hash Join + Output: t5.a + Hash Cond: (t6.b = t4.b) + -> Seq Scan on pg_temp.t t6 + Output: t6.a, t6.b + -> Hash + Output: t4.b, t5.b, t5.a + -> Hash Join + Output: t4.b, t5.b, t5.a + Inner Unique: true + Hash Cond: (t5.b = t4.b) + -> Seq Scan on pg_temp.t t5 + Output: t5.a, t5.b + -> Hash + Output: t4.b, t4.a + -> Index Scan using t_a_key on pg_temp.t t4 + Output: t4.b, t4.a + Index Cond: (t4.a = 1) + -> Index Only Scan using t_a_key on pg_temp.t t3 + Output: t3.a + Index Cond: (t3.a = t5.a) +(32 rows) + +select t1.a from t t1 + left join t t2 on t1.a = t2.a + join t t3 on true +where exists (select 1 from t t4 + join t t5 on t4.b = t5.b + join t t6 on t5.b = t6.b + where t1.a = t4.a and t3.a = t5.a and t4.a = 1); + a +--- + 1 +(1 row) + rollback; -- test cases where we can remove a join, but not a PHV computed at it begin; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 5f0a475894d..817132d90de 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -2420,6 +2420,32 @@ where t1.a = s.c; rollback; +-- check handling of semijoins after join removal: we must suppress +-- unique-ification of known-constant values +begin; + +create temp table t (a int unique, b int); +insert into t values (1, 2); + +explain (verbose, costs off) +select t1.a from t t1 + left join t t2 on t1.a = t2.a + join t t3 on true +where exists (select 1 from t t4 + join t t5 on t4.b = t5.b + join t t6 on t5.b = t6.b + where t1.a = t4.a and t3.a = t5.a and t4.a = 1); + +select t1.a from t t1 + left join t t2 on t1.a = t2.a + join t t3 on true +where exists (select 1 from t t4 + join t t5 on t4.b = t5.b + join t t6 on t5.b = t6.b + where t1.a = t4.a and t3.a = t5.a and t4.a = 1); + +rollback; + -- test cases where we can remove a join, but not a PHV computed at it begin; -- 2.43.0