diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 7bf67a0529..353d385a0e 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2346,12 +2346,10 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, inner_run_cost = inner_path->total_cost - inner_path->startup_cost; inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost; - if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || - extra->inner_unique) + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) { /* - * With a SEMI or ANTI join, or if the innerrel is known unique, the - * executor will stop after the first match. + * SEMI or ANTI join: executor will stop after first match. * * Getting decent estimates requires inspection of the join quals, * which we choose to postpone to final_cost_nestloop. @@ -2432,12 +2430,10 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, /* cost of inner-relation source data (we already dealt with outer rel) */ - if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI || - extra->inner_unique) + if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI) { /* - * With a SEMI or ANTI join, or if the innerrel is known unique, the - * executor will stop after the first match. + * SEMI or ANTI join: executor will stop after first match. */ Cost inner_run_cost = workspace->inner_run_cost; Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost; @@ -2932,7 +2928,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, * The whole issue is moot if we are working from a unique-ified outer * input, or if we know we don't need to mark/restore at all. */ - if (IsA(outer_path, UniquePath) ||path->skip_mark_restore) + if (IsA(outer_path, UniquePath) || path->skip_mark_restore) rescannedtuples = 0; else { @@ -3410,16 +3406,13 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, /* CPU costs */ - if (path->jpath.jointype == JOIN_SEMI || - path->jpath.jointype == JOIN_ANTI || - extra->inner_unique) + if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI) { double outer_matched_rows; Selectivity inner_scan_frac; /* - * With a SEMI or ANTI join, or if the innerrel is known unique, the - * executor will stop after the first match. + * SEMI or ANTI join: executor will stop after first match. * * For an outer-rel row that has at least one match, we can expect the * bucket scan to stop after a fraction 1/(match_count+1) of the @@ -4008,12 +4001,11 @@ get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel, /* * compute_semi_anti_join_factors - * Estimate how much of the inner input a SEMI, ANTI, or inner_unique join + * Estimate how much of the inner input a SEMI or ANTI join * can be expected to scan. * * In a hash or nestloop SEMI/ANTI join, the executor will stop scanning * inner rows as soon as it finds a match to the current outer row. - * The same happens if we have detected the inner rel is unique. * We should therefore adjust some of the cost components for this effect. * This function computes some estimates needed for these adjustments. * These estimates will be the same regardless of the particular paths used @@ -4024,7 +4016,7 @@ get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel, * joinrel: join relation under consideration * outerrel: outer relation under consideration * innerrel: inner relation under consideration - * jointype: if not JOIN_SEMI or JOIN_ANTI, we assume it's inner_unique + * jointype: must be JOIN_SEMI or JOIN_ANTI * sjinfo: SpecialJoinInfo relevant to this join * restrictlist: join quals * Output parameters: @@ -4047,14 +4039,16 @@ compute_semi_anti_join_factors(PlannerInfo *root, List *joinquals; ListCell *l; + /* Should only be called in these cases */ + Assert(jointype == JOIN_SEMI || jointype == JOIN_ANTI); + /* * In an ANTI join, we must ignore clauses that are "pushed down", since * those won't affect the match logic. In a SEMI join, we do not * distinguish joinquals from "pushed down" quals, so just use the whole - * restrictinfo list. For other outer join types, we should consider only - * non-pushed-down quals, so that this devolves to an IS_OUTER_JOIN check. + * restrictinfo list. */ - if (IS_OUTER_JOIN(jointype)) + if (jointype == JOIN_ANTI) { joinquals = NIL; foreach(l, restrictlist) @@ -4074,7 +4068,7 @@ compute_semi_anti_join_factors(PlannerInfo *root, jselec = clauselist_selectivity(root, joinquals, 0, - (jointype == JOIN_ANTI) ? JOIN_ANTI : JOIN_SEMI, + jointype, sjinfo); /* @@ -4101,7 +4095,7 @@ compute_semi_anti_join_factors(PlannerInfo *root, &norm_sjinfo); /* Avoid leaking a lot of ListCells */ - if (IS_OUTER_JOIN(jointype)) + if (jointype == JOIN_ANTI) list_free(joinquals); /* diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 642f951093..cd618ed6cd 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -205,10 +205,10 @@ add_paths_to_joinrel(PlannerInfo *root, &mergejoin_allowed); /* - * If it's SEMI, ANTI, or inner_unique join, compute correction factors - * for cost estimation. These will be the same for all paths. + * If it's SEMI or ANTI join, compute correction factors for cost + * estimation. These will be the same for all paths. */ - if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique) + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel, jointype, sjinfo, restrictlist, &extra.semifactors); diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 717e965f30..1b6ed8f7e4 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1110,31 +1110,29 @@ explain (costs off) select a,c from t1 group by a,c,d; explain (costs off) select * from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z; - QUERY PLAN ------------------------------------------------------- - HashAggregate + QUERY PLAN +------------------------------------------------------- + Group Group Key: t1.a, t1.b, t2.x, t2.y - -> Hash Join - Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) - -> Seq Scan on t2 - -> Hash - -> Seq Scan on t1 -(7 rows) + -> Merge Join + Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y)) + -> Index Scan using t1_pkey on t1 + -> Index Scan using t2_pkey on t2 +(6 rows) -- Test case where t1 can be optimized but not t2 explain (costs off) select t1.*,t2.x,t2.z from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z; - QUERY PLAN ------------------------------------------------------- + QUERY PLAN +------------------------------------------------------- HashAggregate Group Key: t1.a, t1.b, t2.x, t2.z - -> Hash Join - Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) - -> Seq Scan on t2 - -> Hash - -> Seq Scan on t1 -(7 rows) + -> Merge Join + Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y)) + -> Index Scan using t1_pkey on t1 + -> Index Scan using t2_pkey on t2 +(6 rows) -- Cannot optimize when PK is deferrable explain (costs off) select * from t3 group by a,b,c; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index dc6262be43..306b096794 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -5657,15 +5657,14 @@ select * from j1 inner join j3 on j1.id = j3.id; ----------------------------------- Hash Join Output: j1.id, j3.id - Inner Unique: true - Hash Cond: (j3.id = j1.id) - -> Seq Scan on public.j3 - Output: j3.id - -> Hash + Hash Cond: (j1.id = j3.id) + -> Seq Scan on public.j1 Output: j1.id - -> Seq Scan on public.j1 - Output: j1.id -(10 rows) + -> Hash + Output: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(9 rows) -- ensure left join is marked as unique explain (verbose, costs off)