diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index d396ef1..3630469 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3439,9 +3439,12 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, reldistinct = clamp; /* - * Multiply by restriction selectivity. + * Estimate the number of groups observed in the expected number of + * rows, using a formula for selection without replacement, assuming + * uniform distribution. */ - reldistinct *= rel->rows / rel->tuples; + reldistinct *= (1 - powl(1 - rel->rows / rel->tuples, + rel->tuples / reldistinct)); /* * Update estimate of total distinct groups. diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index de64ca7..0fc93d9 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -807,27 +807,24 @@ select * from int4_tbl where explain (verbose, costs off) select * from int4_tbl o where (f1, f1) in (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); - QUERY PLAN ----------------------------------------------------------------------- - Hash Join + QUERY PLAN +---------------------------------------------------------------- + Hash Semi Join Output: o.f1 Hash Cond: (o.f1 = "ANY_subquery".f1) -> Seq Scan on public.int4_tbl o Output: o.f1 -> Hash Output: "ANY_subquery".f1, "ANY_subquery".g - -> HashAggregate + -> Subquery Scan on "ANY_subquery" Output: "ANY_subquery".f1, "ANY_subquery".g - Group Key: "ANY_subquery".f1, "ANY_subquery".g - -> Subquery Scan on "ANY_subquery" - Output: "ANY_subquery".f1, "ANY_subquery".g - Filter: ("ANY_subquery".f1 = "ANY_subquery".g) - -> HashAggregate - Output: i.f1, (generate_series(1, 2) / 10) - Group Key: i.f1 - -> Seq Scan on public.int4_tbl i - Output: i.f1 -(18 rows) + Filter: ("ANY_subquery".f1 = "ANY_subquery".g) + -> HashAggregate + Output: i.f1, (generate_series(1, 2) / 10) + Group Key: i.f1 + -> Seq Scan on public.int4_tbl i + Output: i.f1 +(15 rows) select * from int4_tbl o where (f1, f1) in (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);