diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 0a2562c149..00f80e4f34 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2809,6 +2809,96 @@ cost_agg(Path *path, PlannerInfo *root, path->total_cost = total_cost; } +/* + * get_windowclause_startup_tuples + * Determine how many tuples we'll need to fetch from a WindowAgg's + * subnode before we can output the first WindowAgg tuple. + */ +static double +get_windowclause_startup_tuples(PlannerInfo *root, WindowClause *wc, + double input_tuples) +{ + double partition_tuples; + + /* Figure out how many partitions there are likely to be */ + if (wc->partitionClause != NIL) + { + double num_partitions; + List *partexprs = get_sortgrouplist_exprs(wc->partitionClause, + root->parse->targetList); + + num_partitions = estimate_num_groups(root, partexprs, input_tuples, + NULL, NULL); + list_free(partexprs); + + partition_tuples = input_tuples / num_partitions; + } + else + { + /* all tuples belong to the same partition */ + partition_tuples = input_tuples; + } + + /* + * An empty ORDER BY means all rows are peers and WindowAgg will process + * the entire partition for the first tuple. + */ + if (wc->orderClause == NIL) + return clamp_row_est(partition_tuples); + + /* + * BETWEEN ... AND CURRENT ROW means the executor only needs to read at + * most 1 row from the subnode before outputting the first tuple. + */ + if (wc->frameOptions & FRAMEOPTION_END_CURRENT_ROW) + return 1.0; + + /* + * BETWEEN ... AND UNBOUNDED FOLLOWING means we need to read the entire + * partition before outputting the first tuple. + */ + if (wc->frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING) + return clamp_row_est(partition_tuples); + + /* + * BETWEEN ... AND N PRECEDING does not need to read from the subnode for + * the first N tuples. To be safe, we return 1.0. The endOffset could be + * 0. + */ + if (wc->frameOptions & FRAMEOPTION_END_OFFSET_PRECEDING) + return 1.0; + + /* + * BETWEEN ... AND N FOLLOWING is more complex. We need to try to + * determine the value of endOffset. + */ + if (wc->frameOptions & FRAMEOPTION_END_OFFSET_FOLLOWING && + IsA(wc->endOffset, Const)) + { + Const *endOffset = (Const *) wc->endOffset; + + if (endOffset->constisnull) + { + /* + * This should raise an ERROR during execution, but we'd better do + * something sane here. + */ + return 1.0; + } + + /* XXX do we expect any other type of Const? */ + else if (endOffset->consttype == INT4OID) + return clamp_row_est(Min(partition_tuples, + (double) DatumGetInt32(endOffset->constvalue))); + else if (endOffset->consttype == INT8OID) + return clamp_row_est(Min(partition_tuples, + (double) DatumGetInt64(endOffset->constvalue))); + } + + /* Just assume we'll need DEFAULT_INEQ_SEL of the partition's tuples */ + return clamp_row_est(partition_tuples * DEFAULT_INEQ_SEL); +} + /* * cost_windowagg * Determines and returns the cost of performing a WindowAgg plan node, @@ -2818,17 +2908,34 @@ cost_agg(Path *path, PlannerInfo *root, */ void cost_windowagg(Path *path, PlannerInfo *root, - List *windowFuncs, int numPartCols, int numOrderCols, + List *windowFuncs, WindowClause *winclause, Cost input_startup_cost, Cost input_total_cost, double input_tuples) { Cost startup_cost; Cost total_cost; + double startup_tuples; + int numPartCols; + int numOrderCols; ListCell *lc; + numPartCols = list_length(winclause->partitionClause); + numOrderCols = list_length(winclause->orderClause); + startup_cost = input_startup_cost; total_cost = input_total_cost; + /* + * Estimate how many tuples we'll need to read from the subnode before we + * can output the first WindowAgg row. + * + * XXX is it worth caching this value in the WindowClause to prevent + * having to calculate it for each path we generate? The number of + * input_tuples could vary from path to path, however. + */ + startup_tuples = get_windowclause_startup_tuples(root, winclause, + input_tuples); + /* * Window functions are assumed to cost their stated execution cost, plus * the cost of evaluating their input expressions, per tuple. Since they @@ -2880,6 +2987,18 @@ cost_windowagg(Path *path, PlannerInfo *root, path->rows = input_tuples; path->startup_cost = startup_cost; path->total_cost = total_cost; + + /* + * Also take into account how many tuples we need to read from the subnode + * in order to produce the first tuple from the WindowAgg. To do this we + * proportion the run cost (total cost not including startup costs) over + * the estimated startup tuples. We already included the startup cost of + * the subnode, so we only need to do this when the estimated startup + * tuples is above 1.0. + */ + if (startup_tuples > 1.0) + path->startup_cost += (total_cost - startup_cost) / input_tuples * + (startup_tuples - 1.0); } /* diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 65a191ebfd..8c4dbff00a 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -3445,8 +3445,7 @@ create_windowagg_path(PlannerInfo *root, */ cost_windowagg(&pathnode->path, root, windowFuncs, - list_length(winclause->partitionClause), - list_length(winclause->orderClause), + winclause, subpath->startup_cost, subpath->total_cost, subpath->rows); diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 6cf49705d3..bee090ffc2 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -131,7 +131,7 @@ extern void cost_agg(Path *path, PlannerInfo *root, Cost input_startup_cost, Cost input_total_cost, double input_tuples, double input_width); extern void cost_windowagg(Path *path, PlannerInfo *root, - List *windowFuncs, int numPartCols, int numOrderCols, + List *windowFuncs, WindowClause *winclause, Cost input_startup_cost, Cost input_total_cost, double input_tuples); extern void cost_group(Path *path, PlannerInfo *root, diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 1d4b78b9b2..2628033327 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -4808,6 +4808,80 @@ SELECT i, b, bool_and(b) OVER w, bool_or(b) OVER w 5 | t | t | t (5 rows) +-- +-- Test WindowAgg costing takes into account the number of rows that need to +-- be fetched before the first row can be output. +-- +-- Ensure we get a cheap start up plan as the WindowAgg can output the first +-- row after reading 1 row from the join. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER (ORDER BY t1.unique1) +FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous +LIMIT 1; + QUERY PLAN +-------------------------------------------------------------------------- + Limit + -> WindowAgg + -> Nested Loop + -> Index Only Scan using tenk1_unique1 on tenk1 t1 + -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2 + Index Cond: (tenthous = t1.unique1) +(6 rows) + +-- Ensure we get a cheap total plan. Lack of ORDER BY in the WindowClause +-- means that all rows must be read from the join, so a cheap startup plan +-- isn't a good choice. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER () +FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous +LIMIT 1; + QUERY PLAN +-------------------------------------------------------------------------------- + Limit + -> WindowAgg + -> Hash Join + Hash Cond: (t1.unique1 = t2.tenthous) + -> Index Only Scan using tenk1_unique1 on tenk1 t1 + -> Hash + -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2 +(7 rows) + +-- Ensure we get a cheap total plan. This time use UNBOUNDED FOLLOWING, which +-- needs to read all join rows to output the first WindowAgg row. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER (ORDER BY t1.unique1 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous +LIMIT 1; + QUERY PLAN +-------------------------------------------------------------------------------- + Limit + -> WindowAgg + -> Merge Join + Merge Cond: (t1.unique1 = t2.tenthous) + -> Index Only Scan using tenk1_unique1 on tenk1 t1 + -> Sort + Sort Key: t2.tenthous + -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2 +(8 rows) + +-- Ensure we get a cheap total plan. This time use 10000 FOLLOWING so we need +-- to read all join rows. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER (ORDER BY t1.unique1 ROWS BETWEEN UNBOUNDED PRECEDING AND 10000 FOLLOWING) +FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous +LIMIT 1; + QUERY PLAN +-------------------------------------------------------------------------------- + Limit + -> WindowAgg + -> Merge Join + Merge Cond: (t1.unique1 = t2.tenthous) + -> Index Only Scan using tenk1_unique1 on tenk1 t1 + -> Sort + Sort Key: t2.tenthous + -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2 +(8 rows) + -- Tests for problems with failure to walk or mutate expressions -- within window frame clauses. -- test walker (fails with collation error if expressions are not walked) diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 3ab6ac715d..4789de0937 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -1716,6 +1716,40 @@ SELECT i, b, bool_and(b) OVER w, bool_or(b) OVER w FROM (VALUES (1,true), (2,true), (3,false), (4,false), (5,true)) v(i,b) WINDOW w AS (ORDER BY i ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING); +-- +-- Test WindowAgg costing takes into account the number of rows that need to +-- be fetched before the first row can be output. +-- + +-- Ensure we get a cheap start up plan as the WindowAgg can output the first +-- row after reading 1 row from the join. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER (ORDER BY t1.unique1) +FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous +LIMIT 1; + +-- Ensure we get a cheap total plan. Lack of ORDER BY in the WindowClause +-- means that all rows must be read from the join, so a cheap startup plan +-- isn't a good choice. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER () +FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous +LIMIT 1; + +-- Ensure we get a cheap total plan. This time use UNBOUNDED FOLLOWING, which +-- needs to read all join rows to output the first WindowAgg row. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER (ORDER BY t1.unique1 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous +LIMIT 1; + +-- Ensure we get a cheap total plan. This time use 10000 FOLLOWING so we need +-- to read all join rows. +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER (ORDER BY t1.unique1 ROWS BETWEEN UNBOUNDED PRECEDING AND 10000 FOLLOWING) +FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.tenthous +LIMIT 1; + -- Tests for problems with failure to walk or mutate expressions -- within window frame clauses.