From 90c4adb2db083718e5f2ac89b691120463241369 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Tue, 12 Jun 2018 14:13:48 +0200 Subject: [PATCH] Order windows on partition/ordering prefix to reuse Sort nodes By ordering the Windows on partitioning/ordering prefix, the result from Sort nodes can be reused by multiple WindowAgg nodes and thus we can avoid Sort nodes. --- src/backend/optimizer/plan/planner.c | 91 +++++++++++++++++++++++++----------- src/test/regress/expected/window.out | 60 ++++++++++++++++++------ src/test/regress/sql/window.sql | 16 +++++++ 3 files changed, 126 insertions(+), 41 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index fd45c9767d..11f7c81099 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -110,6 +110,12 @@ typedef struct int *tleref_to_colnum_map; } grouping_sets_data; +typedef struct +{ + WindowClause *wc; + List *uniqueOrder; +} WindowClauseSortNode; + /* Local functions */ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); @@ -237,6 +243,8 @@ static void create_partitionwise_grouping_paths(PlannerInfo *root, static bool group_by_has_partkey(RelOptInfo *input_rel, List *targetList, List *groupClause); +static int common_prefix_cmp(const void *a, const void *b); + /***************************************************************************** @@ -5268,7 +5276,19 @@ select_active_windows(PlannerInfo *root, WindowFuncLists *wflists) /* It's only active if wflists shows some related WindowFuncs */ Assert(wc->winref <= wflists->maxWinRef); if (wflists->windowFuncs[wc->winref] != NIL) - actives = lappend(actives, wc); + { + WindowClauseSortNode *wcs = palloc(sizeof(WindowClauseSortNode)); + + wcs->wc = wc; + /* + * Generating the uniqueOrder can be offloaded to the comparison + * function to optimize for the case where we only have a single + * window. For now, optimize for readibility. + */ + wcs->uniqueOrder = list_concat_unique( + list_copy(wc->partitionClause), wc->orderClause); + actives = lappend(actives, wcs); + } } /* @@ -5277,45 +5297,60 @@ select_active_windows(PlannerInfo *root, WindowFuncLists *wflists) * says that only one sort is to be used for such windows, even if they * are otherwise distinct (eg, different names or framing clauses). * - * There is room to be much smarter here, for example detecting whether - * one window's sort keys are a prefix of another's (so that sorting for - * the latter would do for the former), or putting windows first that - * match a sort order available for the underlying query. For the moment - * we are content with meeting the spec. + * Also sort windows by partitioning/ordering prefixes where the sorting + * required for a window will cover the sorting required for another + * window, thus reducing the number of Sort nodes. If we only have a + * single window then skip the sorting. */ + if (list_length(actives) > 1) + actives = list_qsort(actives, common_prefix_cmp); + result = NIL; while (actives != NIL) { - WindowClause *wc = linitial_node(WindowClause, actives); - ListCell *prev; - ListCell *next; + WindowClauseSortNode *wcs = (WindowClauseSortNode *) linitial(actives); - /* Move wc from actives to result */ actives = list_delete_first(actives); - result = lappend(result, wc); - - /* Now move any matching windows from actives to result */ - prev = NULL; - for (lc = list_head(actives); lc; lc = next) - { - WindowClause *wc2 = lfirst_node(WindowClause, lc); + result = lappend(result, wcs->wc); - next = lnext(lc); - /* framing options are NOT to be compared here! */ - if (equal(wc->partitionClause, wc2->partitionClause) && - equal(wc->orderClause, wc2->orderClause)) - { - actives = list_delete_cell(actives, lc, prev); - result = lappend(result, wc2); - } - else - prev = lc; - } + list_free(wcs->uniqueOrder); + pfree(wcs); } return result; } +static int +common_prefix_cmp(const void *a, const void *b) +{ + WindowClauseSortNode *wcsa = (WindowClauseSortNode *) lfirst(*(ListCell **) a); + WindowClauseSortNode *wcsb = (WindowClauseSortNode *) lfirst(*(ListCell **) b); + ListCell *item_a; + ListCell *item_b; + + forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder) + { + SortGroupClause *sca = lfirst(item_a); + SortGroupClause *scb = lfirst(item_b); + + if (sca->tleSortGroupRef > scb->tleSortGroupRef) + return -1; + else if (sca->tleSortGroupRef < scb->tleSortGroupRef) + return 1; + else if (sca->sortop > scb->sortop) + return -1; + else if (sca->sortop < scb->sortop) + return 1; + } + + if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder)) + return -1; + else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder)) + return 1; + + return 0; +} + /* * make_window_input_target * Generate appropriate PathTarget for initial input to WindowAgg nodes. diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 85d81e7c9f..eb67a77c85 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -504,9 +504,9 @@ SELECT sum(salary), FROM empsalary GROUP BY depname; sum | row_number | sum -------+------------+------- - 14600 | 3 | 14600 - 7400 | 2 | 22000 25100 | 1 | 47100 + 7400 | 2 | 22000 + 14600 | 3 | 14600 (3 rows) -- identical windows with different names @@ -2899,9 +2899,9 @@ SELECT sum(salary), row_number() OVER (ORDER BY depname), sum( FROM empsalary GROUP BY depname; sum | row_number | filtered_sum | depname -------+------------+--------------+----------- - 14600 | 3 | | sales - 7400 | 2 | 3500 | personnel 25100 | 1 | 22600 | develop + 7400 | 2 | 3500 | personnel + 14600 | 3 | | sales (3 rows) -- Test pushdown of quals into a subquery containing window functions @@ -2913,13 +2913,13 @@ SELECT * FROM min(salary) OVER (PARTITION BY depname || 'A', depname) depminsalary FROM empsalary) emp WHERE depname = 'sales'; - QUERY PLAN ---------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------- Subquery Scan on emp -> WindowAgg - -> Sort - Sort Key: (((empsalary.depname)::text || 'A'::text)) - -> WindowAgg + -> WindowAgg + -> Sort + Sort Key: (((empsalary.depname)::text || 'A'::text)) -> Seq Scan on empsalary Filter: ((depname)::text = 'sales'::text) (7 rows) @@ -2932,19 +2932,53 @@ SELECT * FROM min(salary) OVER (PARTITION BY depname) depminsalary FROM empsalary) emp WHERE depname = 'sales'; - QUERY PLAN ------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------- Subquery Scan on emp Filter: ((emp.depname)::text = 'sales'::text) -> WindowAgg -> Sort - Sort Key: empsalary.depname + Sort Key: empsalary.enroll_date -> WindowAgg -> Sort - Sort Key: empsalary.enroll_date + Sort Key: empsalary.depname -> Seq Scan on empsalary (9 rows) +-- Test Sort node collapsing +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT depname, + sum(salary) OVER (PARTITION BY depname order by empno) depsalary, + min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + QUERY PLAN +---------------------------------------------------------------------- + Subquery Scan on emp + -> WindowAgg + -> WindowAgg + -> Sort + Sort Key: empsalary.empno, empsalary.enroll_date + -> Seq Scan on empsalary + Filter: ((depname)::text = 'sales'::text) +(7 rows) + +-- Test Sort node reordering +EXPLAIN (COSTS OFF) +SELECT + lead(1) OVER (PARTITION BY depname ORDER BY salary, enroll_date), + lag(1) OVER (PARTITION BY depname ORDER BY salary,enroll_date,empno) + from empsalary; + QUERY PLAN +------------------------------------------------------------- + WindowAgg + -> WindowAgg + -> Sort + Sort Key: depname, salary, enroll_date, empno + -> Seq Scan on empsalary +(5 rows) + -- cleanup DROP TABLE empsalary; -- test user-defined window function with named args and default args diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 051b50b2d3..2809aa991b 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -854,6 +854,22 @@ SELECT * FROM FROM empsalary) emp WHERE depname = 'sales'; +-- Test Sort node collapsing +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT depname, + sum(salary) OVER (PARTITION BY depname order by empno) depsalary, + min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + +-- Test Sort node reordering +EXPLAIN (COSTS OFF) +SELECT + lead(1) OVER (PARTITION BY depname ORDER BY salary, enroll_date), + lag(1) OVER (PARTITION BY depname ORDER BY salary,enroll_date,empno) + from empsalary; + -- cleanup DROP TABLE empsalary; -- 2.14.1.145.gb3622a4ee