diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 7eec3f3..5350ab5 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -306,10 +306,12 @@ ExecHashJoin(HashJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * In a semijoin or if the planner found that no more than + * 1 inner row will match a single outer row, we'll + * consider returning the first match, but after that we're + * done with this outer tuple. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.jointype == JOIN_SEMI || node->js.unique_inner) node->hj_JoinState = HJ_NEED_NEW_OUTER; if (otherqual == NIL || @@ -469,6 +471,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) ExecInitExpr((Expr *) node->join.plan.qual, (PlanState *) hjstate); hjstate->js.jointype = node->join.jointype; + hjstate->js.unique_inner = node->join.unique_inner; hjstate->js.joinqual = (List *) ExecInitExpr((Expr *) node->join.joinqual, (PlanState *) hjstate); diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index fdf2f4c..6122b47 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -832,10 +832,12 @@ ExecMergeJoin(MergeJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * In a semijoin or if the planner found that no more than + * 1 inner row will match a single outer row, we'll + * consider returning the first match, but after that we're + * done with this outer tuple. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.jointype == JOIN_SEMI || node->js.unique_inner ) node->mj_JoinState = EXEC_MJ_NEXTOUTER; qualResult = (otherqual == NIL || @@ -1503,6 +1505,7 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) ExecInitExpr((Expr *) node->join.plan.qual, (PlanState *) mergestate); mergestate->js.jointype = node->join.jointype; + mergestate->js.unique_inner = node->join.unique_inner; mergestate->js.joinqual = (List *) ExecInitExpr((Expr *) node->join.joinqual, (PlanState *) mergestate); diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index 6cdd4ff..a89be06 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -247,10 +247,11 @@ ExecNestLoop(NestLoopState *node) } /* - * In a semijoin, we'll consider returning the first match, but - * after that we're done with this outer tuple. + * In a semijoin or if the planner found that no more than 1 inner + * row will match a single outer row, we'll consider returning the + * first match, but after that we're done with this outer tuple. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.jointype == JOIN_SEMI || node->js.unique_inner) node->nl_NeedNewOuter = true; if (otherqual == NIL || ExecQual(otherqual, econtext, false)) @@ -327,6 +328,7 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) ExecInitExpr((Expr *) node->join.plan.qual, (PlanState *) nlstate); nlstate->js.jointype = node->join.jointype; + nlstate->js.unique_inner = node->join.unique_inner; nlstate->js.joinqual = (List *) ExecInitExpr((Expr *) node->join.joinqual, (PlanState *) nlstate); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index a737d7d..1ea346a 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -639,6 +639,7 @@ CopyJoinFields(const Join *from, Join *newnode) CopyPlanFields((const Plan *) from, (Plan *) newnode); COPY_SCALAR_FIELD(jointype); + COPY_SCALAR_FIELD(unique_inner); COPY_NODE_FIELD(joinqual); } @@ -1938,6 +1939,7 @@ _copySpecialJoinInfo(const SpecialJoinInfo *from) COPY_SCALAR_FIELD(jointype); COPY_SCALAR_FIELD(lhs_strict); COPY_SCALAR_FIELD(delay_upper_joins); + COPY_SCALAR_FIELD(unique_inner); COPY_NODE_FIELD(join_quals); return newnode; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 14e190a..f6672c6 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -796,6 +796,7 @@ _equalSpecialJoinInfo(const SpecialJoinInfo *a, const SpecialJoinInfo *b) COMPARE_SCALAR_FIELD(jointype); COMPARE_SCALAR_FIELD(lhs_strict); COMPARE_SCALAR_FIELD(delay_upper_joins); + COMPARE_SCALAR_FIELD(unique_inner); COMPARE_NODE_FIELD(join_quals); return true; diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index e99d416..e764bda 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -32,12 +32,33 @@ #include "utils/lsyscache.h" /* local functions */ +static bool join_is_unique_join(PlannerInfo *root, SpecialJoinInfo *sjinfo); static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static Oid distinct_col_search(int colno, List *colnos, List *opids); +void +mark_unique_joins(PlannerInfo *root, List *joinlist) +{ + ListCell *lc; + + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + + /* it may have already been marked by a previous call. */ + if (sjinfo->unique_inner) + continue; + + /* + * Determine if the inner side of the join can produce at most 1 row + * for any single outer row. + */ + sjinfo->unique_inner = join_is_unique_join(root, sjinfo); + } +} /* * remove_useless_joins @@ -91,6 +112,16 @@ restart: root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo); /* + * The removal of the join could have caused push down quals to + * have been removed, which could previously have stopped us + * from marking the join as a unique join. We'd better make + * another pass over each join to make sure otherwise we may fail + * to remove other joins which had a join condition on the join + * that we just removed. + */ + mark_unique_joins(root, joinlist); + + /* * Restart the scan. This is necessary to ensure we find all * removable joins independently of ordering of the join_info_list * (note that removal of attr_needed bits may make a join appear @@ -135,36 +166,21 @@ clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, return false; /* no good for these input relations */ } -/* - * join_is_removable - * Check whether we need not perform this special join at all, because - * it will just duplicate its left input. - * - * This is true for a left join for which the join condition cannot match - * more than one inner-side row. (There are other possibly interesting - * cases, but we don't have the infrastructure to prove them.) We also - * have to check that the inner side doesn't generate any variables needed - * above the join. - */ static bool -join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) +join_is_unique_join(PlannerInfo *root, SpecialJoinInfo *sjinfo) { int innerrelid; RelOptInfo *innerrel; Query *subquery = NULL; Relids joinrelids; - List *clause_list = NIL; ListCell *l; - int attroff; + List *clause_list = NIL; - /* - * Must be a non-delaying left join to a single baserel, else we aren't - * going to be able to do anything with it. - */ - if (sjinfo->jointype != JOIN_LEFT || - sjinfo->delay_upper_joins) + /* if there's any delayed upper joins then we'll need to punt. */ + if (sjinfo->delay_upper_joins) return false; + /* if there's more than 1 relation involved then punt */ if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) return false; @@ -174,9 +190,9 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; /* - * Before we go to the effort of checking whether any innerrel variables - * are needed above the join, make a quick check to eliminate cases in - * which we will surely be unable to prove uniqueness of the innerrel. + * Before we go to the effort of pulling out the join condition's columns, + * make a quick check to eliminate cases in which we will surely be unable + * to prove uniqueness of the innerrel. */ if (innerrel->rtekind == RTE_RELATION) { @@ -207,53 +223,6 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); /* - * We can't remove the join if any inner-rel attributes are used above the - * join. - * - * Note that this test only detects use of inner-rel attributes in higher - * join conditions and the target list. There might be such attributes in - * pushed-down conditions at this join, too. We check that case below. - * - * As a micro-optimization, it seems better to start with max_attr and - * count down rather than starting with min_attr and counting up, on the - * theory that the system attributes are somewhat less likely to be wanted - * and should be tested last. - */ - for (attroff = innerrel->max_attr - innerrel->min_attr; - attroff >= 0; - attroff--) - { - if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids)) - return false; - } - - /* - * Similarly check that the inner rel isn't needed by any PlaceHolderVars - * that will be used above the join. We only need to fail if such a PHV - * actually references some inner-rel attributes; but the correct check - * for that is relatively expensive, so we first check against ph_eval_at, - * which must mention the inner rel if the PHV uses any inner-rel attrs as - * non-lateral references. Note that if the PHV's syntactic scope is just - * the inner rel, we can't drop the rel even if the PHV is variable-free. - */ - foreach(l, root->placeholder_list) - { - PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); - - if (bms_is_subset(phinfo->ph_needed, joinrelids)) - continue; /* PHV is not used above the join */ - if (bms_overlap(phinfo->ph_lateral, innerrel->relids)) - return false; /* it references innerrel laterally */ - if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids)) - continue; /* it definitely doesn't reference innerrel */ - if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids)) - return false; /* there isn't any other place to eval PHV */ - if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr), - innerrel->relids)) - return false; /* it does reference innerrel */ - } - - /* * Search for mergejoinable clauses that constrain the inner rel against * either the outer rel or a pseudoconstant. If an operator is * mergejoinable then it behaves like equality for some btree opclass, so @@ -368,6 +337,94 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; } +/* + * join_is_removable + * Check whether we need not perform this special join at all, because + * it will just duplicate its left input. + * + * This is true for a left join for which the join condition cannot match + * more than one inner-side row. (There are other possibly interesting + * cases, but we don't have the infrastructure to prove them.) We also + * have to check that the inner side doesn't generate any variables needed + * above the join. + */ +static bool +join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) +{ + int innerrelid; + RelOptInfo *innerrel; + Relids joinrelids; + int attroff; + ListCell *l; + + /* + * We can only remove LEFT JOINs where the inner side is unique on the + * join condition. + */ + if (sjinfo->jointype != JOIN_LEFT || + sjinfo->unique_inner == false) + return false; + + if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) + return false; + + innerrel = find_base_rel(root, innerrelid); + + Assert(innerrel->reloptkind == RELOPT_BASEREL); + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + + /* + * We can't remove the join if any inner-rel attributes are used above the + * join. + * + * Note that this test only detects use of inner-rel attributes in higher + * join conditions and the target list. There might be such attributes in + * pushed-down conditions at this join, too. We check that case below. + * + * As a micro-optimization, it seems better to start with max_attr and + * count down rather than starting with min_attr and counting up, on the + * theory that the system attributes are somewhat less likely to be wanted + * and should be tested last. + */ + for (attroff = innerrel->max_attr - innerrel->min_attr; + attroff >= 0; + attroff--) + { + if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids)) + return false; + } + + /* + * Similarly check that the inner rel isn't needed by any PlaceHolderVars + * that will be used above the join. We only need to fail if such a PHV + * actually references some inner-rel attributes; but the correct check + * for that is relatively expensive, so we first check against ph_eval_at, + * which must mention the inner rel if the PHV uses any inner-rel attrs as + * non-lateral references. Note that if the PHV's syntactic scope is just + * the inner rel, we can't drop the rel even if the PHV is variable-free. + */ + foreach(l, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); + + if (bms_is_subset(phinfo->ph_needed, joinrelids)) + continue; /* PHV is not used above the join */ + if (bms_overlap(phinfo->ph_lateral, innerrel->relids)) + return false; /* it references innerrel laterally */ + if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids)) + continue; /* it definitely doesn't reference innerrel */ + if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids)) + return false; /* there isn't any other place to eval PHV */ + if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr), + innerrel->relids)) + return false; /* it does reference innerrel */ + } + + return true; +} + /* * Remove the target relid from the planner's data structures, having diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 8f9ae4f..00c746c 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -132,12 +132,12 @@ static BitmapOr *make_bitmap_or(List *bitmapplans); static NestLoop *make_nestloop(List *tlist, List *joinclauses, List *otherclauses, List *nestParams, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinType jointype, bool unique_inner); static HashJoin *make_hashjoin(List *tlist, List *joinclauses, List *otherclauses, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinType jointype, bool unique_inner); static Hash *make_hash(Plan *lefttree, Oid skewTable, AttrNumber skewColumn, @@ -152,7 +152,7 @@ static MergeJoin *make_mergejoin(List *tlist, int *mergestrategies, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinType jointype, bool unique_inner); static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols, AttrNumber *sortColIdx, Oid *sortOperators, Oid *collations, bool *nullsFirst, @@ -2189,7 +2189,8 @@ create_nestloop_plan(PlannerInfo *root, nestParams, outer_plan, inner_plan, - best_path->jointype); + best_path->jointype, + best_path->unique_inner); copy_path_costsize(&join_plan->join.plan, &best_path->path); @@ -2483,7 +2484,8 @@ create_mergejoin_plan(PlannerInfo *root, mergenullsfirst, outer_plan, inner_plan, - best_path->jpath.jointype); + best_path->jpath.jointype, + best_path->jpath.unique_inner); /* Costs of sort and material steps are included in path cost already */ copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path); @@ -2609,7 +2611,8 @@ create_hashjoin_plan(PlannerInfo *root, hashclauses, outer_plan, (Plan *) hash_plan, - best_path->jpath.jointype); + best_path->jpath.jointype, + best_path->jpath.unique_inner); copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path); @@ -3714,7 +3717,8 @@ make_nestloop(List *tlist, List *nestParams, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinType jointype, + bool unique_inner) { NestLoop *node = makeNode(NestLoop); Plan *plan = &node->join.plan; @@ -3725,6 +3729,7 @@ make_nestloop(List *tlist, plan->lefttree = lefttree; plan->righttree = righttree; node->join.jointype = jointype; + node->join.unique_inner = unique_inner; node->join.joinqual = joinclauses; node->nestParams = nestParams; @@ -3738,7 +3743,8 @@ make_hashjoin(List *tlist, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinType jointype, + bool unique_inner) { HashJoin *node = makeNode(HashJoin); Plan *plan = &node->join.plan; @@ -3750,6 +3756,7 @@ make_hashjoin(List *tlist, plan->righttree = righttree; node->hashclauses = hashclauses; node->join.jointype = jointype; + node->join.unique_inner = unique_inner; node->join.joinqual = joinclauses; return node; @@ -3798,7 +3805,8 @@ make_mergejoin(List *tlist, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinType jointype, + bool unique_inner) { MergeJoin *node = makeNode(MergeJoin); Plan *plan = &node->join.plan; @@ -3814,6 +3822,7 @@ make_mergejoin(List *tlist, node->mergeStrategies = mergestrategies; node->mergeNullsFirst = mergenullsfirst; node->join.jointype = jointype; + node->join.unique_inner = unique_inner; node->join.joinqual = joinclauses; return node; diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 93484a0..b794d8e 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -175,6 +175,12 @@ query_planner(PlannerInfo *root, List *tlist, fix_placeholder_input_needed_levels(root); /* + * Analyze all joins and mark which ones can produce at most 1 row + * for each outer row. + */ + mark_unique_joins(root, joinlist); + + /* * Remove any useless outer joins. Ideally this would be done during * jointree preprocessing, but the necessary information isn't available * until we've built baserel data structures and classified qual clauses. diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 319e8b2..e6bb11f 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1722,6 +1722,7 @@ create_nestloop_path(PlannerInfo *root, &restrict_clauses); pathnode->path.pathkeys = pathkeys; pathnode->jointype = jointype; + pathnode->unique_inner = sjinfo->unique_inner; pathnode->outerjoinpath = outer_path; pathnode->innerjoinpath = inner_path; pathnode->joinrestrictinfo = restrict_clauses; @@ -1779,6 +1780,7 @@ create_mergejoin_path(PlannerInfo *root, &restrict_clauses); pathnode->jpath.path.pathkeys = pathkeys; pathnode->jpath.jointype = jointype; + pathnode->jpath.unique_inner = sjinfo->unique_inner; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; @@ -1847,6 +1849,7 @@ create_hashjoin_path(PlannerInfo *root, */ pathnode->jpath.path.pathkeys = NIL; pathnode->jpath.jointype = jointype; + pathnode->jpath.unique_inner = sjinfo->unique_inner; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 41b13b2..c0192b7 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1560,6 +1560,7 @@ typedef struct JoinState { PlanState ps; JoinType jointype; + bool unique_inner; /* Inner side will match 0 or 1 outer rows */ List *joinqual; /* JOIN quals (in addition to ps.qual) */ } JoinState; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 48203a0..61a1898 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -524,9 +524,12 @@ typedef struct CustomScan /* ---------------- * Join node * - * jointype: rule for joining tuples from left and right subtrees - * joinqual: qual conditions that came from JOIN/ON or JOIN/USING - * (plan.qual contains conditions that came from WHERE) + * jointype: rule for joining tuples from left and right subtrees + * unique_inner: true if the planner discovered that each inner tuple + * can only match at most 1 outer tuple. Proofs include + * UNIQUE indexes and GROUP BY/DISTINCT clauses etc. + * joinqual: qual conditions that came from JOIN/ON or JOIN/USING + * (plan.qual contains conditions that came from WHERE) * * When jointype is INNER, joinqual and plan.qual are semantically * interchangeable. For OUTER jointypes, the two are *not* interchangeable; @@ -541,6 +544,7 @@ typedef struct Join { Plan plan; JoinType jointype; + bool unique_inner; /* Inner side will match 0 or 1 outer rows */ List *joinqual; /* JOIN quals (in addition to plan.qual) */ } Join; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 7116496..da28b46 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1028,6 +1028,8 @@ typedef struct JoinPath JoinType jointype; + bool unique_inner; /* inner side of join is unique */ + Path *outerjoinpath; /* path for the outer side of the join */ Path *innerjoinpath; /* path for the inner side of the join */ @@ -1404,6 +1406,7 @@ typedef struct SpecialJoinInfo JoinType jointype; /* always INNER, LEFT, FULL, SEMI, or ANTI */ bool lhs_strict; /* joinclause is strict for some LHS rel */ bool delay_upper_joins; /* can't commute with upper RHS */ + bool unique_inner; /* inner side is unique on join condition */ List *join_quals; /* join quals, in implicit-AND list format */ } SpecialJoinInfo; diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 3fdc2cb..c4710de 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -121,6 +121,7 @@ extern RestrictInfo *build_implied_join_equality(Oid opno, /* * prototypes for plan/analyzejoins.c */ +extern void mark_unique_joins(PlannerInfo *root, List *joinlist); extern List *remove_useless_joins(PlannerInfo *root, List *joinlist); extern bool query_supports_distinctness(Query *query); extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);