diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index fdaa964..d70d88d 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -1673,10 +1673,7 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) * 1. If the subquery has a LIMIT clause, we must not push down any quals, * since that could change the set of rows returned. * - * 2. If the subquery contains any window functions, we can't push quals - * into it, because that could change the results. - * - * 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push + * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push * quals into it, because that could change the results. * * In addition, we make several checks on the subquery's output columns @@ -1699,10 +1696,6 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, if (subquery->limitOffset != NULL || subquery->limitCount != NULL) return false; - /* Check point 2 */ - if (subquery->hasWindowFuncs) - return false; - /* * If we're at a leaf query, check for unsafe expressions in its target * list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in @@ -1798,6 +1791,12 @@ recurse_pushdown_safe(Node *setOp, Query *topquery, * operator sees as equal. This is a bit shaky but we have no way to test * for the case, and it's unlikely enough that we shouldn't refuse the * optimization just because it could theoretically happen.) + * + * 4. If the subquery has windowing functions we are able to push down any + * quals that are referenced in all of the subquery's window PARTITION BY + * clauses. This is permitted as window partitions are completed isolated + * from each other and removing records from unneeded partitions early has + * no affect on the query results. */ static void check_output_expressions(Query *subquery, bool *unsafeColumns) @@ -1837,6 +1836,19 @@ check_output_expressions(Query *subquery, bool *unsafeColumns) unsafeColumns[tle->resno] = true; continue; } + + /* Check if all partition by clauses, check point 4 */ + if (subquery->hasWindowFuncs && + !targetExistsInAllQueryPartitionByClauses(tle, subquery)) + { + /* + * Not all the window partition by clauses contain this column + * so mark it as unsafe + */ + unsafeColumns[tle->resno] = true; + continue; + } + } } diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index fcee137..c2c8f52 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -2449,6 +2449,26 @@ targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList) } /* + * targetExistsInAllQueryPartitionByClauses + * returns true if the given Var exists in the PARTITION BY clause in every + * WINDOW defined in the subquery + */ +bool +targetExistsInAllQueryPartitionByClauses(TargetEntry *tle, Query *query) +{ + ListCell *lc; + + foreach(lc, query->windowClause) + { + WindowClause *wc = (WindowClause *) lfirst(lc); + + if (!targetIsInSortList(tle, InvalidOid, wc->partitionClause)) + return false; + } + return true; +} + +/* * findWindowClause * Find the named WindowClause in the list, or return NULL if not there */ diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index e9e7cdc..3fecd69 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -47,5 +47,7 @@ extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, bool resolveUnknown); extern Index assignSortGroupRef(TargetEntry *tle, List *tlist); extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList); +extern bool targetExistsInAllQueryPartitionByClauses(TargetEntry *tle, + Query *query); #endif /* PARSE_CLAUSE_H */ diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index c2cc742..9f93593 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -18,6 +18,152 @@ INSERT INTO empsalary VALUES ('sales', 3, 4800, '2007-08-01'), ('develop', 8, 6000, '2006-10-01'), ('develop', 11, 5200, '2007-08-15'); +-- Test that push down of clauses into the sub query works as expected. +-- depname is in the partition clause, so pushdown is safe. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname,sum(salary) OVER (PARTITION BY depname) depsalary FROM empsalary) emp WHERE depname = 'sales'; + QUERY PLAN +--------------------------------------------------- + WindowAgg + -> Seq Scan on empsalary + Filter: ((depname)::text = 'sales'::text) +(3 rows) + +-- depname not in partition clause, don't pushdown into subquery. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname,sum(salary) OVER (PARTITION BY enroll_date) depsalary FROM empsalary) emp WHERE depname = 'sales'; + QUERY PLAN +------------------------------------------------- + Subquery Scan on emp + Filter: ((emp.depname)::text = 'sales'::text) + -> WindowAgg + -> Sort + Sort Key: empsalary.enroll_date + -> Seq Scan on empsalary +(6 rows) + +-- sub query contains a partition by clause that does not have depname, don't push down. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname, + sum(salary) OVER (PARTITION BY enroll_date) enroll_salary, + min(salary) OVER (PARTITION BY depname) depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + QUERY PLAN +----------------------------------------------------------- + Subquery Scan on emp + Filter: ((emp.depname)::text = 'sales'::text) + -> WindowAgg + -> Sort + Sort Key: empsalary.depname + -> WindowAgg + -> Sort + Sort Key: empsalary.enroll_date + -> Seq Scan on empsalary +(9 rows) + +-- all sub queries contain depname, so it can be pushed down. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname, + sum(salary) OVER (PARTITION BY depname) depsalary, + min(salary) OVER (PARTITION BY depname || 'A',depname,depname || 'B') depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Subquery Scan on emp + -> WindowAgg + -> Sort + Sort Key: (((empsalary.depname)::text || 'A'::text)), (((empsalary.depname)::text || 'B'::text)) + -> WindowAgg + -> Seq Scan on empsalary + Filter: ((depname)::text = 'sales'::text) +(7 rows) + +-- all sub queries contain depname, so it can be pushed down. (similar to above but columns in different order) +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname, + sum(salary) OVER (PARTITION BY depname) depsalary, + min(salary) OVER (PARTITION BY depname || 'A',depname || 'B',depname) depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Subquery Scan on emp + -> WindowAgg + -> Sort + Sort Key: (((empsalary.depname)::text || 'A'::text)), (((empsalary.depname)::text || 'B'::text)) + -> WindowAgg + -> Seq Scan on empsalary + Filter: ((depname)::text = 'sales'::text) +(7 rows) + +-- one sub query does not contain depname, so it cannot be pushed down. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname, + sum(salary) OVER (PARTITION BY depname) depsalary, + min(salary) OVER (PARTITION BY depname || 'A',depname || 'B') depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Subquery Scan on emp + Filter: ((emp.depname)::text = 'sales'::text) + -> WindowAgg + -> Sort + Sort Key: (((empsalary.depname)::text || 'A'::text)), (((empsalary.depname)::text || 'B'::text)) + -> WindowAgg + -> Sort + Sort Key: empsalary.depname + -> Seq Scan on empsalary +(9 rows) + +-- empty window clause, so cannot push outer where into subquery. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname,sum(salary) OVER () depsalary FROM empsalary) emp WHERE depname = 'sales'; + QUERY PLAN +------------------------------------------------- + Subquery Scan on emp + Filter: ((emp.depname)::text = 'sales'::text) + -> WindowAgg + -> Seq Scan on empsalary +(4 rows) + +-- both window clauses contain depname, so we can push down the clause into the subquery. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname, + sum(salary) OVER (PARTITION BY depname) depsalary1, + rank() OVER (PARTITION BY depname ORDER BY enroll_date) enroll_rank + FROM empsalary) emp +WHERE depname = 'sales'; + QUERY PLAN +--------------------------------------------------------------------- + Subquery Scan on emp + -> WindowAgg + -> Sort + Sort Key: empsalary.enroll_date + -> WindowAgg + -> Seq Scan on empsalary + Filter: ((depname)::text = 'sales'::text) +(7 rows) + +-- an unused window currently stops the pushdown from happening. (This could probably be optimized). +EXPLAIN (COSTS OFF) +SELECT depname,depsalary FROM (SELECT depname, + sum(salary) OVER (PARTITION BY depname) depsalary, + rank() OVER (ORDER BY enroll_date) emprank + FROM empsalary) emp +WHERE depname = 'sales'; + QUERY PLAN +------------------------------------------------- + Subquery Scan on emp + Filter: ((emp.depname)::text = 'sales'::text) + -> WindowAgg + -> Sort + Sort Key: empsalary.depname + -> Seq Scan on empsalary +(6 rows) + SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; depname | empno | salary | sum -----------+-------+--------+------- diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 31c98eb..c2d1c60 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -21,6 +21,68 @@ INSERT INTO empsalary VALUES ('develop', 8, 6000, '2006-10-01'), ('develop', 11, 5200, '2007-08-15'); +-- Test that push down of clauses into the sub query works as expected. + +-- depname is in the partition clause, so pushdown is safe. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname,sum(salary) OVER (PARTITION BY depname) depsalary FROM empsalary) emp WHERE depname = 'sales'; + +-- depname not in partition clause, don't pushdown into subquery. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname,sum(salary) OVER (PARTITION BY enroll_date) depsalary FROM empsalary) emp WHERE depname = 'sales'; + +-- sub query contains a partition by clause that does not have depname, don't push down. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname, + sum(salary) OVER (PARTITION BY enroll_date) enroll_salary, + min(salary) OVER (PARTITION BY depname) depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + +-- all sub queries contain depname, so it can be pushed down. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname, + sum(salary) OVER (PARTITION BY depname) depsalary, + min(salary) OVER (PARTITION BY depname || 'A',depname,depname || 'B') depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + +-- all sub queries contain depname, so it can be pushed down. (similar to above but columns in different order) +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname, + sum(salary) OVER (PARTITION BY depname) depsalary, + min(salary) OVER (PARTITION BY depname || 'A',depname || 'B',depname) depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + +-- one sub query does not contain depname, so it cannot be pushed down. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname, + sum(salary) OVER (PARTITION BY depname) depsalary, + min(salary) OVER (PARTITION BY depname || 'A',depname || 'B') depminsalary + FROM empsalary) emp +WHERE depname = 'sales'; + +-- empty window clause, so cannot push outer where into subquery. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname,sum(salary) OVER () depsalary FROM empsalary) emp WHERE depname = 'sales'; + +-- both window clauses contain depname, so we can push down the clause into the subquery. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT depname, + sum(salary) OVER (PARTITION BY depname) depsalary1, + rank() OVER (PARTITION BY depname ORDER BY enroll_date) enroll_rank + FROM empsalary) emp +WHERE depname = 'sales'; + +-- an unused window currently stops the pushdown from happening. (This could probably be optimized). +EXPLAIN (COSTS OFF) +SELECT depname,depsalary FROM (SELECT depname, + sum(salary) OVER (PARTITION BY depname) depsalary, + rank() OVER (ORDER BY enroll_date) emprank + FROM empsalary) emp +WHERE depname = 'sales'; + SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary;