diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 10644dfac4..690ba88e7e 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1979,6 +1979,10 @@ ExplainNode(PlanState *planstate, List *ancestors, show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); break; + case T_WindowAgg: + show_upper_qual(((WindowAgg *) plan)->runconditionorig, + "Run Condition", planstate, ancestors, es); + break; case T_Group: show_group_keys(castNode(GroupState, planstate), ancestors, es); show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index f8ea9e96d8..ad5fcdd13b 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -2023,6 +2023,7 @@ static TupleTableSlot * ExecWindowAgg(PlanState *pstate) { WindowAggState *winstate = castNode(WindowAggState, pstate); + TupleTableSlot *slot; ExprContext *econtext; int i; int numfuncs; @@ -2235,7 +2236,20 @@ ExecWindowAgg(PlanState *pstate) */ econtext->ecxt_outertuple = winstate->ss.ss_ScanTupleSlot; - return ExecProject(winstate->ss.ps.ps_ProjInfo); + slot = ExecProject(winstate->ss.ps.ps_ProjInfo); + + /* + * Now evaluate the run condition to see if we need to continue further + * with execution. + */ + econtext->ecxt_scantuple = slot; + if (!ExecQual(winstate->runcondition, econtext)) + { + winstate->all_done = true; + return NULL; + } + + return slot; } /* ----------------- @@ -2307,6 +2321,18 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) Assert(node->plan.qual == NIL); winstate->ss.ps.qual = NULL; + /* + * Setup the run condition, if we received one from the query planner. + * When set, this can allow us to finish execution early because we know + * some higher-level filter exists that would just filter out any further + * results that we produce. + */ + if (node->runcondition != NIL) + winstate->runcondition = ExecInitQual(node->runcondition, + (PlanState *) winstate); + else + winstate->runcondition = NULL; + /* * initialize child nodes */ diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 38251c2b8e..a1876fe54c 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1095,6 +1095,8 @@ _copyWindowAgg(const WindowAgg *from) COPY_SCALAR_FIELD(frameOptions); COPY_NODE_FIELD(startOffset); COPY_NODE_FIELD(endOffset); + COPY_NODE_FIELD(runcondition); + COPY_NODE_FIELD(runconditionorig); COPY_SCALAR_FIELD(startInRangeFunc); COPY_SCALAR_FIELD(endInRangeFunc); COPY_SCALAR_FIELD(inRangeColl); @@ -2571,6 +2573,8 @@ _copyWindowClause(const WindowClause *from) COPY_SCALAR_FIELD(frameOptions); COPY_NODE_FIELD(startOffset); COPY_NODE_FIELD(endOffset); + COPY_NODE_FIELD(runcondition); + COPY_NODE_FIELD(runconditionorig); COPY_SCALAR_FIELD(startInRangeFunc); COPY_SCALAR_FIELD(endInRangeFunc); COPY_SCALAR_FIELD(inRangeColl); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 8a1762000c..79136ec47d 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2834,6 +2834,8 @@ _equalWindowClause(const WindowClause *a, const WindowClause *b) COMPARE_SCALAR_FIELD(frameOptions); COMPARE_NODE_FIELD(startOffset); COMPARE_NODE_FIELD(endOffset); + COMPARE_NODE_FIELD(runcondition); + COMPARE_NODE_FIELD(runconditionorig); COMPARE_SCALAR_FIELD(startInRangeFunc); COMPARE_SCALAR_FIELD(endInRangeFunc); COMPARE_SCALAR_FIELD(inRangeColl); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 87561cbb6f..41b3d66fe3 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -819,6 +819,8 @@ _outWindowAgg(StringInfo str, const WindowAgg *node) WRITE_INT_FIELD(frameOptions); WRITE_NODE_FIELD(startOffset); WRITE_NODE_FIELD(endOffset); + WRITE_NODE_FIELD(runcondition); + WRITE_NODE_FIELD(runconditionorig); WRITE_OID_FIELD(startInRangeFunc); WRITE_OID_FIELD(endInRangeFunc); WRITE_OID_FIELD(inRangeColl); @@ -3142,6 +3144,8 @@ _outWindowClause(StringInfo str, const WindowClause *node) WRITE_INT_FIELD(frameOptions); WRITE_NODE_FIELD(startOffset); WRITE_NODE_FIELD(endOffset); + WRITE_NODE_FIELD(runcondition); + WRITE_NODE_FIELD(runconditionorig); WRITE_OID_FIELD(startInRangeFunc); WRITE_OID_FIELD(endInRangeFunc); WRITE_OID_FIELD(inRangeColl); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 0dd1ad7dfc..366d2ef43d 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -384,6 +384,8 @@ _readWindowClause(void) READ_INT_FIELD(frameOptions); READ_NODE_FIELD(startOffset); READ_NODE_FIELD(endOffset); + READ_NODE_FIELD(runcondition); + READ_NODE_FIELD(runconditionorig); READ_OID_FIELD(startInRangeFunc); READ_OID_FIELD(endInRangeFunc); READ_OID_FIELD(inRangeColl); @@ -2346,6 +2348,8 @@ _readWindowAgg(void) READ_INT_FIELD(frameOptions); READ_NODE_FIELD(startOffset); READ_NODE_FIELD(endOffset); + READ_NODE_FIELD(runcondition); + READ_NODE_FIELD(runconditionorig); READ_OID_FIELD(startInRangeFunc); READ_OID_FIELD(endInRangeFunc); READ_OID_FIELD(inRangeColl); diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 296dd75c1b..5c612e53e5 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -27,6 +27,7 @@ #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" #ifdef OPTIMIZER_DEBUG #include "nodes/print.h" #endif @@ -2090,6 +2091,377 @@ has_multiple_baserels(PlannerInfo *root) return false; } +/* + * find_window_run_conditions + * Call wfunc's prosupport function to ask if the given window function + * is monotonic and then see if 'opexpr' can be used to stop processing + * WindowAgg nodes early. + * + * For example row_number() over (order by ...) always produces a value one + * higher than the previous. If someone has a window function such as that + * in a subquery and just wants, say all rows with a row number less than or + * equal to 10, then we may as well stop processing the windowagg once the row + * number reaches 11. Here we look for opexprs that might help us to stop + * doing needless extra processing in WindowAgg nodes. + * + * To do this we make use of the window function's prosupport function to + * determine if the given window function with the given window clause is a + * monotonically increasing or decreasing function. + * + * '*keep_original' is set to true if the caller should also use 'opexpr' for + * its original purpose. This is set to false if the caller can assume that + * the run condition will handle all of the required filtering. + * + * Returns true if a run condition qual was found and added to the + * wclause->runcondition list and sets *keep_original accordingly. Returns + * false if unable to use 'opexpr' as a run condition and does not set + * *keep_original. + */ +static bool +find_window_run_conditions(Query *subquery, RangeTblEntry *rte, Index rti, + AttrNumber attno, WindowClause *wclause, + WindowFunc *wfunc, OpExpr *opexpr, bool wfunc_left, + bool *keep_original) +{ + Oid prosupport; + Expr *otherexpr; + SupportRequestWFuncMonotonic req; + SupportRequestWFuncMonotonic *res; + List *opinfos; + OpExpr *runopexpr; + ListCell *lc; + + *keep_original = true; + + prosupport = get_func_support(wfunc->winfnoid); + + /* Check if there's a support function for 'wfunc' */ + if (!OidIsValid(prosupport)) + return false; + + /* + * Currently the WindowAgg node just stop when the run condition is no + * longer true. If there is a PARTITION BY clause then we cannot just + * stop as other partitions still need to be processed. + */ + if (wclause->partitionClause != NIL) + return false; + + /* get the Expr from the other side of the OpExpr */ + if (wfunc_left) + otherexpr = lsecond(opexpr->args); + else + otherexpr = linitial(opexpr->args); + + /* + * The value being compared must not change during the evaluation of the + * window partition. + */ + if (!is_pseudo_constant_clause((Node *) otherexpr)) + return false; + + req.type = T_SupportRequestWFuncMonotonic; + req.window_func = wfunc; + req.window_clause = wclause; + + /* Call the support function */ + res = (SupportRequestWFuncMonotonic *) + DatumGetPointer(OidFunctionCall1(prosupport, + PointerGetDatum(&req))); + + /* + * Nothing to do if the function is not monotonically increasing or + * decreasing. + */ + if (res == NULL || res->monotonic == MONOTONICFUNC_NONE) + return false; + + runopexpr = NULL; + opinfos = get_op_btree_interpretation(opexpr->opno); + + foreach(lc, opinfos) + { + OpBtreeInterpretation *opinfo = (OpBtreeInterpretation *) lfirst(lc); + int strategy = opinfo->strategy; + + if (wfunc_left) + { + /* Handle < / <= */ + if (strategy == BTLessStrategyNumber || + strategy == BTLessEqualStrategyNumber) + { + /* + * If the frame is bound to the top of the window then the + * result cannot decrease. + */ + if (res->monotonic & MONOTONICFUNC_INCREASING) + { + *keep_original = false; + runopexpr = opexpr; + } + break; + } + /* Handle > / >= */ + else if (strategy == BTGreaterStrategyNumber || + strategy == BTGreaterEqualStrategyNumber) + { + /* + * If the frame is bound to the bottom of the window then the + * result cannot increase. + */ + if (res->monotonic & MONOTONICFUNC_DECREASING) + { + *keep_original = false; + runopexpr = opexpr; + } + break; + } + /* Handle = */ + else if (strategy == BTEqualStrategyNumber) + { + OpExpr *newopexpr; + Oid op; + int16 newstrategy; + + /* + * When both monotonically increasing and decreasing then the + * return value of the window function will be the same each + * time. We can simply use 'opexpr' as the run condition + * without modifying it. + */ + if ((res->monotonic & MONOTONICFUNC_BOTH) == MONOTONICFUNC_BOTH) + { + *keep_original = false; + runopexpr = opexpr; + break; + } + + /* + * When monotonically increasing we make a qual with + * <= in order to filter out values which are above + * the value in the equality condition. For monotonically + * decreasing we want to filter values below the value in the + * equality condition. + */ + if (res->monotonic & MONOTONICFUNC_INCREASING) + newstrategy = BTLessEqualStrategyNumber; + else + newstrategy = BTGreaterEqualStrategyNumber; + + op = get_opfamily_member(opinfo->opfamily_id, + opinfo->oplefttype, + opinfo->oprighttype, + newstrategy); + + newopexpr = (OpExpr *) make_opclause(op, + opexpr->opresulttype, + opexpr->opretset, + (Expr *) wfunc, + otherexpr, + opexpr->opcollid, + opexpr->inputcollid); + newopexpr->opfuncid = get_opcode(op); + + *keep_original = true; + runopexpr = newopexpr; + break; + } + } + else + { + /* Handle > / >= */ + if (strategy == BTGreaterStrategyNumber || + strategy == BTGreaterEqualStrategyNumber) + { + /* + * If the frame is bound to the top of the window then the + * result cannot decrease. + */ + if (res->monotonic & MONOTONICFUNC_INCREASING) + { + *keep_original = false; + runopexpr = opexpr; + } + break; + } + /* Handle < / <= */ + else if (strategy == BTLessStrategyNumber || + strategy == BTLessEqualStrategyNumber) + { + /* + * If the frame is bound to the bottom of the window then the + * result cannot increase. + */ + if (res->monotonic & MONOTONICFUNC_DECREASING) + { + *keep_original = false; + runopexpr = opexpr; + } + break; + } + /* Handle = */ + else if (strategy == BTEqualStrategyNumber) + { + OpExpr *newopexpr; + Oid op; + int16 newstrategy; + + /* + * When both monotonically increasing and decreasing then the + * return value of the window function will be the same each + * time. We can simply use 'opexpr' as the run condition + * without modifying it. + */ + if ((res->monotonic & MONOTONICFUNC_BOTH) == MONOTONICFUNC_BOTH) + { + *keep_original = false; + runopexpr = opexpr; + break; + } + + /* + * When monotonically increasing we make a qual with + * >= in order to filter out values which are above + * the value in the equality condition. For monotonically + * decreasing we want to filter values below the value in the + * equality condition. + */ + if (res->monotonic & MONOTONICFUNC_INCREASING) + newstrategy = BTGreaterEqualStrategyNumber; + else + newstrategy = BTLessEqualStrategyNumber; + + op = get_opfamily_member(opinfo->opfamily_id, + opinfo->oplefttype, + opinfo->oprighttype, + newstrategy); + + newopexpr = (OpExpr *) make_opclause(op, + opexpr->opresulttype, + opexpr->opretset, + otherexpr, + (Expr *) wfunc, + opexpr->opcollid, + opexpr->inputcollid); + newopexpr->opfuncid = get_opcode(op); + + *keep_original = true; + runopexpr = newopexpr; + break; + } + } + } + + if (runopexpr != NULL) + { + Expr *origexpr; + + wclause->runcondition = lappend(wclause->runcondition, runopexpr); + + /* also create a version of the qual that we can display in EXPLAIN */ + if (wfunc_left) + origexpr = make_opclause(runopexpr->opno, + runopexpr->opresulttype, + runopexpr->opretset, (Expr *) wfunc, + otherexpr, runopexpr->opcollid, + runopexpr->inputcollid); + else + origexpr = make_opclause(runopexpr->opno, + runopexpr->opresulttype, + runopexpr->opretset, + otherexpr, (Expr *) wfunc, + runopexpr->opcollid, + runopexpr->inputcollid); + + wclause->runconditionorig = lappend(wclause->runconditionorig, origexpr); + return true; + } + + /* unsupported OpExpr */ + return false; +} + +/* + * check_and_push_window_quals + * Check if 'rinfo' is a qual that can be pushed into a WindowFunc as a + * 'runcondition' qual. These, when present, cause the window function + * evaluation to stop when the condition becomes false. + * + * Returns true if the caller still must keep the original qual or false if + * the caller can safely ignore the original qual because the window function + * will use the run condition to stop at the right time. + */ +static bool +check_and_push_window_quals(Query *subquery, RangeTblEntry *rte, Index rti, + Node *clause) +{ + OpExpr *opexpr = (OpExpr *) clause; + bool keep_original = true; + Var *var1; + Var *var2; + + if (!IsA(opexpr, OpExpr)) + return true; + + if (list_length(opexpr->args) != 2) + return true; + + /* + * Check for plain Vars which reference window functions in the subquery. + * If we find any, we'll ask find_window_run_conditions() if 'opexpr' can + * be used as a run condition to allow us to stop windowagg execution + * early. + */ + + /* Check the left side of the OpExpr */ + var1 = linitial(opexpr->args); + if (IsA(var1, Var) && var1->varattno > 0) + { + TargetEntry *tle = list_nth(subquery->targetList, var1->varattno - 1); + WindowFunc *wfunc = (WindowFunc *) tle->expr; + + while (IsA(wfunc, RelabelType)) + wfunc = (WindowFunc *) ((RelabelType *) wfunc)->arg; + + if (IsA(wfunc, WindowFunc)) + { + WindowClause *wclause = (WindowClause *) + list_nth(subquery->windowClause, + wfunc->winref - 1); + + if (find_window_run_conditions(subquery, rte, rti, tle->resno, + wclause, wfunc, opexpr, true, + &keep_original)) + return keep_original; + } + } + + /* and check the right side */ + var2 = lsecond(opexpr->args); + if (IsA(var2, Var) && var2->varattno > 0) + { + TargetEntry *tle = list_nth(subquery->targetList, var2->varattno - 1); + WindowFunc *wfunc = (WindowFunc *) tle->expr; + + while (IsA(wfunc, RelabelType)) + wfunc = (WindowFunc *) ((RelabelType *) wfunc)->arg; + + if (IsA(wfunc, WindowFunc)) + { + WindowClause *wclause = (WindowClause *) + list_nth(subquery->windowClause, + wfunc->winref - 1); + + if (find_window_run_conditions(subquery, rte, rti, tle->resno, + wclause, wfunc, opexpr, false, + &keep_original)) + return keep_original; + } + } + + return true; +} + /* * set_subquery_pathlist * Generate SubqueryScan access paths for a subquery RTE @@ -2178,19 +2550,30 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, foreach(l, rel->baserestrictinfo) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Node *clause = (Node *) rinfo->clause; if (!rinfo->pseudoconstant && qual_is_pushdown_safe(subquery, rti, rinfo, &safetyInfo)) { - Node *clause = (Node *) rinfo->clause; - /* Push it down */ subquery_push_qual(subquery, rte, rti, clause); } else { - /* Keep it in the upper query */ - upperrestrictlist = lappend(upperrestrictlist, rinfo); + /* + * Since we can't push the qual down into the subquery, check + * if it happens to reference a window function. If so then + * it might be useful to allow the window evaluation to stop + * early. + */ + if (check_and_push_window_quals(subquery, rte, rti, clause)) + { + /* + * It's not a suitable window run condition qual or it is, + * but the original must also be kept in the upper query. + */ + upperrestrictlist = lappend(upperrestrictlist, rinfo); + } } } rel->baserestrictinfo = upperrestrictlist; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index a5f6d678cc..642e088f7e 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -286,6 +286,7 @@ static WindowAgg *make_windowagg(List *tlist, Index winref, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, + List *runcondition, List *runconditionorig, Plan *lefttree); static Group *make_group(List *tlist, List *qual, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, @@ -2619,6 +2620,8 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) wc->inRangeColl, wc->inRangeAsc, wc->inRangeNullsFirst, + wc->runcondition, + wc->runconditionorig, subplan); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -6477,7 +6480,7 @@ make_windowagg(List *tlist, Index winref, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, - Plan *lefttree) + List *runcondition, List *runconditionorig, Plan *lefttree) { WindowAgg *node = makeNode(WindowAgg); Plan *plan = &node->plan; @@ -6494,6 +6497,8 @@ make_windowagg(List *tlist, Index winref, node->frameOptions = frameOptions; node->startOffset = startOffset; node->endOffset = endOffset; + node->runcondition = runcondition; + node->runconditionorig = runconditionorig; node->startInRangeFunc = startInRangeFunc; node->endInRangeFunc = endInRangeFunc; node->inRangeColl = inRangeColl; diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index e50624c465..a925aa3d83 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -870,6 +870,14 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) fix_scan_expr(root, wplan->startOffset, rtoffset, 1); wplan->endOffset = fix_scan_expr(root, wplan->endOffset, rtoffset, 1); + wplan->runcondition = fix_scan_list(root, + wplan->runcondition, + rtoffset, + NUM_EXEC_TLIST(plan)); + wplan->runconditionorig = fix_scan_list(root, + wplan->runconditionorig, + rtoffset, + NUM_EXEC_TLIST(plan)); } break; case T_Result: diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index 2168080dcc..8abd4e8598 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -25,6 +25,7 @@ #include "optimizer/optimizer.h" #include "utils/builtins.h" #include "utils/int8.h" +#include "utils/lsyscache.h" typedef struct @@ -877,6 +878,7 @@ int8dec(PG_FUNCTION_ARGS) } + /* * These functions are exactly like int8inc/int8dec but are used for * aggregates that count only non-null values. Since the functions are @@ -904,6 +906,49 @@ int8dec_any(PG_FUNCTION_ARGS) return int8dec(fcinfo); } +/* + * int8inc_support + * prosupport function for int8inc() and int8inc_any() + */ +Datum +int8inc_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + MonotonicFunction monotonic = MONOTONICFUNC_NONE; + int frameOptions = req->window_clause->frameOptions; + + /* No ORDER BY clause then all rows are peers */ + if (req->window_clause->orderClause == NIL) + monotonic = MONOTONICFUNC_BOTH; + else + { + /* + * Otherwise take into account the frame options. When the frame + * bound is the start of the window then the resulting value can + * never decrease, therefore is monotonically increasing + */ + if (frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING) + monotonic |= MONOTONICFUNC_INCREASING; + + /* + * Likewise, if the frame bound is the end of the window then the + * resulting value can never increase. + */ + if (frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING) + monotonic |= MONOTONICFUNC_DECREASING; + } + + req->monotonic = monotonic; + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} + Datum int8larger(PG_FUNCTION_ARGS) diff --git a/src/backend/utils/adt/windowfuncs.c b/src/backend/utils/adt/windowfuncs.c index 9c127617d1..27257b7628 100644 --- a/src/backend/utils/adt/windowfuncs.c +++ b/src/backend/utils/adt/windowfuncs.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "nodes/supportnodes.h" #include "utils/builtins.h" #include "windowapi.h" @@ -88,6 +89,26 @@ window_row_number(PG_FUNCTION_ARGS) PG_RETURN_INT64(curpos + 1); } +/* + * window_row_number_support + * prosupport function for window_row_number() + */ +Datum +window_row_number_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + + /* row_number() is monotonically increasing */ + req->monotonic = MONOTONICFUNC_INCREASING; + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} /* * rank @@ -110,6 +131,27 @@ window_rank(PG_FUNCTION_ARGS) PG_RETURN_INT64(context->rank); } +/* + * window_rank_support + * prosupport function for window_rank() + */ +Datum +window_rank_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + + /* rank() is monotonically increasing */ + req->monotonic = MONOTONICFUNC_INCREASING; + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} + /* * dense_rank * Rank increases by 1 when key columns change. @@ -130,6 +172,27 @@ window_dense_rank(PG_FUNCTION_ARGS) PG_RETURN_INT64(context->rank); } +/* + * window_dense_rank_support + * prosupport function for window_dense_rank() + */ +Datum +window_dense_rank_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + + /* dense_rank() is monotonically increasing */ + req->monotonic = MONOTONICFUNC_INCREASING; + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} + /* * percent_rank * return fraction between 0 and 1 inclusive, diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index b603700ed9..22988d4c59 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -6626,11 +6626,16 @@ # count has two forms: count(any) and count(*) { oid => '2147', descr => 'number of input rows for which the input expression is not null', - proname => 'count', prokind => 'a', proisstrict => 'f', prorettype => 'int8', - proargtypes => 'any', prosrc => 'aggregate_dummy' }, + proname => 'count', prosupport => 'int8inc_support', prokind => 'a', + proisstrict => 'f', prorettype => 'int8', proargtypes => 'any', + prosrc => 'aggregate_dummy' }, { oid => '2803', descr => 'number of input rows', - proname => 'count', prokind => 'a', proisstrict => 'f', prorettype => 'int8', - proargtypes => '', prosrc => 'aggregate_dummy' }, + proname => 'count', prosupport => 'int8inc_support', prokind => 'a', + proisstrict => 'f', prorettype => 'int8', proargtypes => '', + prosrc => 'aggregate_dummy' }, +{ oid => '8802', descr => 'planner support for count run condition', + proname => 'int8inc_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'int8inc_support' }, { oid => '2718', descr => 'population variance of bigint input values (square of the population standard deviation)', @@ -10064,14 +10069,26 @@ # SQL-spec window functions { oid => '3100', descr => 'row number within partition', - proname => 'row_number', prokind => 'w', proisstrict => 'f', - prorettype => 'int8', proargtypes => '', prosrc => 'window_row_number' }, + proname => 'row_number', prosupport => 'window_row_number_support', + prokind => 'w', proisstrict => 'f', prorettype => 'int8', + proargtypes => '', prosrc => 'window_row_number' }, +{ oid => '8799', descr => 'planner support for row_number run condition', + proname => 'window_row_number_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'window_row_number_support' }, { oid => '3101', descr => 'integer rank with gaps', - proname => 'rank', prokind => 'w', proisstrict => 'f', prorettype => 'int8', + proname => 'rank', prosupport => 'window_rank_support', + prokind => 'w', proisstrict => 'f', prorettype => 'int8', proargtypes => '', prosrc => 'window_rank' }, +{ oid => '8800', descr => 'planner support for rank run condition', + proname => 'window_rank_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'window_rank_support' }, { oid => '3102', descr => 'integer rank without gaps', - proname => 'dense_rank', prokind => 'w', proisstrict => 'f', - prorettype => 'int8', proargtypes => '', prosrc => 'window_dense_rank' }, + proname => 'dense_rank', prosupport => 'window_dense_rank_support', + prokind => 'w', proisstrict => 'f', prorettype => 'int8', proargtypes => '', + prosrc => 'window_dense_rank' }, +{ oid => '8801', descr => 'planner support for dense rank run condition', + proname => 'window_dense_rank_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'window_dense_rank_support' }, { oid => '3103', descr => 'fractional rank within partition', proname => 'percent_rank', prokind => 'w', proisstrict => 'f', prorettype => 'float8', proargtypes => '', prosrc => 'window_percent_rank' }, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 37cb4f3d59..1e2befd797 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -2408,6 +2408,10 @@ typedef struct WindowAggState MemoryContext curaggcontext; /* current aggregate's working data */ ExprContext *tmpcontext; /* short-term evaluation context */ + ExprState *runcondition; /* Condition which must remain true otherwise + * execution of the WindowAgg will finish, or + * NULL if there is no such condition. */ + bool all_first; /* true if the scan is starting */ bool all_done; /* true if the scan is finished */ bool partition_spooled; /* true if all tuples in current partition diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 6a4d82f0a8..9c4799d3e1 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -526,7 +526,8 @@ typedef enum NodeTag T_SupportRequestSelectivity, /* in nodes/supportnodes.h */ T_SupportRequestCost, /* in nodes/supportnodes.h */ T_SupportRequestRows, /* in nodes/supportnodes.h */ - T_SupportRequestIndexCondition /* in nodes/supportnodes.h */ + T_SupportRequestIndexCondition, /* in nodes/supportnodes.h */ + T_SupportRequestWFuncMonotonic /* in nodes/supportnodes.h */ } NodeTag; /* diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index e28248af32..b76e3106dd 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1381,6 +1381,8 @@ typedef struct WindowClause int frameOptions; /* frame_clause options, see WindowDef */ Node *startOffset; /* expression for starting bound, if any */ Node *endOffset; /* expression for ending bound, if any */ + List *runcondition; /* Exec WindowAgg while this is true */ + List *runconditionorig; /* EXPLAIN compatible version of above */ Oid startInRangeFunc; /* in_range function for startOffset */ Oid endInRangeFunc; /* in_range function for endOffset */ Oid inRangeColl; /* collation for in_range tests */ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index ec9a8b0c81..1c71e6db41 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -893,6 +893,9 @@ typedef struct WindowAgg int frameOptions; /* frame_clause options, see WindowDef */ Node *startOffset; /* expression for starting bound, if any */ Node *endOffset; /* expression for ending bound, if any */ + List *runcondition; /* Conditions that must remain true in order + * for execution to continue */ + List *runconditionorig; /* runcondition for display in EXPLAIN */ /* these fields are used with RANGE offset PRECEDING/FOLLOWING: */ Oid startInRangeFunc; /* in_range function for startOffset */ Oid endInRangeFunc; /* in_range function for endOffset */ @@ -1291,4 +1294,21 @@ typedef struct PlanInvalItem uint32 hashValue; /* hash value of object's cache lookup key */ } PlanInvalItem; +/* + * MonotonicFunction + * + * Allows the planner to track monotonic properties of functions. A function + * is monotonically increasing if a subsequent call cannot yield a lower value + * than the previous call. A monotonically decreasing function cannot yield a + * higher value, and a function which is both must return the same value on + * each call. + */ +typedef enum MonotonicFunction +{ + MONOTONICFUNC_NONE = 0, + MONOTONICFUNC_INCREASING = (1 << 0), + MONOTONICFUNC_DECREASING = (1 << 1), + MONOTONICFUNC_BOTH = MONOTONICFUNC_INCREASING | MONOTONICFUNC_DECREASING +} MonotonicFunction; + #endif /* PLANNODES_H */ diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h index 85e1b8a832..33718332c9 100644 --- a/src/include/nodes/supportnodes.h +++ b/src/include/nodes/supportnodes.h @@ -33,12 +33,12 @@ #ifndef SUPPORTNODES_H #define SUPPORTNODES_H -#include "nodes/primnodes.h" +#include "nodes/plannodes.h" struct PlannerInfo; /* avoid including pathnodes.h here */ struct IndexOptInfo; struct SpecialJoinInfo; - +struct WindowClause; /* * The Simplify request allows the support function to perform plan-time @@ -239,4 +239,58 @@ typedef struct SupportRequestIndexCondition * equivalent of the function call */ } SupportRequestIndexCondition; +/* ---------- + * To allow more efficient execution of any monotonically increasing and/or + * monotonically decreasing window functions, we support calling the window + * function's prosupport function passing along this struct whenever the + * planner sees an OpExpr qual directly reference a window function in a + * subquery. When the planner encounters this, we populate this struct and + * pass it along to the window function's prosupport function so that it can + * evaluate if the given WindowFunc is; + * + * a) monotonically increasing, or + * b) monotonically decreasing, or + * c) both monotonically increasing and decreasing, or + * d) none of the above. + * + * A function that is monotonically increasing can never return a value that + * is lower than a value returned in a "previous call". A monotonically + * decreasing function can never return a value higher than a value returned + * in a previous call. A function which is both must return the same value + * each time. + * + * We define "previous call" to mean a previous call to the same WindowFunc + * struct in the same window partition. + * + * row_number() is an example of a monotonically increasing function. The + * return value will be reset back to 1 in each new partition. An example of + * a monotonically decreasing function is count(*) over (). Since there is + * no ORDER BY clause in this example, all rows in the partition are peers and + * all rows within the partition will be within the frame bound. Likewise for + * count(*) over(order by a rows between unbounded preceding and unbounded + * following). + * + * Inputs: + * 'window_func' is the pointer to the window function being called. + * + * 'window_clause' pointer to the WindowClause data. Support functions can + * use this to check frame bounds and partition by clauses, etc. + * + * Outputs: + * 'monotonic' the resulting MonotonicFunction value for the given input + * window function and window clause. + * ---------- + */ +typedef struct SupportRequestWFuncMonotonic +{ + NodeTag type; + + /* Input fields: */ + WindowFunc *window_func; /* Pointer to the window function data */ + struct WindowClause *window_clause; /* Pointer to the window clause data */ + + /* Output fields: */ + MonotonicFunction monotonic; +} SupportRequestWFuncMonotonic; + #endif /* SUPPORTNODES_H */ diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index bb9ff7f07b..7c343dc3e6 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -3336,6 +3336,275 @@ WHERE depname = 'sales'; -> Seq Scan on empsalary (9 rows) +-- Test window function run conditions are properly pushed down into the +-- WindowAgg +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + QUERY PLAN +---------------------------------------------- + WindowAgg + Run Condition: (row_number() OVER (?) < 3) + -> Sort + Sort Key: empsalary.empno + -> Seq Scan on empsalary +(5 rows) + +-- The following 3 statements should result the same result. +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + empno | rn +-------+---- + 1 | 1 + 2 | 2 +(2 rows) + +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE 3 > rn; + empno | rn +-------+---- + 1 | 1 + 2 | 2 +(2 rows) + +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE 2 >= rn; + empno | rn +-------+---- + 1 | 1 + 2 | 2 +(2 rows) + +-- Ensure r <= 3 is pushed down into the run condition of the window agg +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + rank() OVER (ORDER BY salary DESC) r + FROM empsalary) emp +WHERE r <= 3; + QUERY PLAN +----------------------------------------- + WindowAgg + Run Condition: (rank() OVER (?) <= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(5 rows) + +SELECT * FROM + (SELECT empno, + salary, + rank() OVER (ORDER BY salary DESC) r + FROM empsalary) emp +WHERE r <= 3; + empno | salary | r +-------+--------+--- + 8 | 6000 | 1 + 10 | 5200 | 2 + 11 | 5200 | 2 +(3 rows) + +-- Ensure dr = 1 is converted to dr <= 1 to get all rows leading up to dr = 1 +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + dense_rank() OVER (ORDER BY salary DESC) dr + FROM empsalary) emp +WHERE dr = 1; + QUERY PLAN +----------------------------------------------------- + Subquery Scan on emp + Filter: (emp.dr = 1) + -> WindowAgg + Run Condition: (dense_rank() OVER (?) <= 1) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(7 rows) + +SELECT * FROM + (SELECT empno, + salary, + dense_rank() OVER (ORDER BY salary DESC) dr + FROM empsalary) emp +WHERE dr = 1; + empno | salary | dr +-------+--------+---- + 8 | 6000 | 1 +(1 row) + +-- Check COUNT() and COUNT(*) +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + QUERY PLAN +------------------------------------------- + WindowAgg + Run Condition: (count(*) OVER (?) <= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(5 rows) + +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + empno | salary | c +-------+--------+--- + 8 | 6000 | 1 + 10 | 5200 | 3 + 11 | 5200 | 3 +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(empno) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + QUERY PLAN +--------------------------------------------------------- + WindowAgg + Run Condition: (count(empsalary.empno) OVER (?) <= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(5 rows) + +SELECT * FROM + (SELECT empno, + salary, + count(empno) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + empno | salary | c +-------+--------+--- + 8 | 6000 | 1 + 10 | 5200 | 3 + 11 | 5200 | 3 +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) c + FROM empsalary) emp +WHERE c >= 3; + QUERY PLAN +------------------------------------------- + WindowAgg + Run Condition: (count(*) OVER (?) >= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER () c + FROM empsalary) emp +WHERE 11 <= c; + QUERY PLAN +-------------------------------------------- + WindowAgg + Run Condition: (11 <= count(*) OVER (?)) + -> Seq Scan on empsalary +(3 rows) + +-- Tests to ensure we don't push down the run condition when it's not valid to +-- do so. +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + row_number() OVER (PARTITION BY depname ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + QUERY PLAN +------------------------------------------------------------ + Subquery Scan on emp + Filter: (emp.rn < 3) + -> WindowAgg + -> Sort + Sort Key: empsalary.depname, empsalary.empno + -> Seq Scan on empsalary +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(empno) OVER (PARTITION BY depname ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + QUERY PLAN +------------------------------------------------------------------ + Subquery Scan on emp + Filter: (emp.c <= 3) + -> WindowAgg + -> Sort + Sort Key: empsalary.depname, empsalary.salary DESC + -> Seq Scan on empsalary +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) c + FROM empsalary) emp +WHERE c <= 3; + QUERY PLAN +----------------------------------------------- + Subquery Scan on emp + Filter: (emp.c <= 3) + -> WindowAgg + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary) c + FROM empsalary) emp +WHERE 3 <= c; + QUERY PLAN +------------------------------------------ + Subquery Scan on emp + Filter: (3 <= emp.c) + -> WindowAgg + -> Sort + Sort Key: empsalary.salary + -> Seq Scan on empsalary +(6 rows) + -- Test Sort node collapsing EXPLAIN (COSTS OFF) SELECT * FROM diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 41a8e0d152..c746d5343a 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -988,6 +988,146 @@ SELECT * FROM FROM empsalary) emp WHERE depname = 'sales'; +-- Test window function run conditions are properly pushed down into the +-- WindowAgg +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + +-- The following 3 statements should result the same result. +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE 3 > rn; + +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE 2 >= rn; + +-- Ensure r <= 3 is pushed down into the run condition of the window agg +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + rank() OVER (ORDER BY salary DESC) r + FROM empsalary) emp +WHERE r <= 3; + +SELECT * FROM + (SELECT empno, + salary, + rank() OVER (ORDER BY salary DESC) r + FROM empsalary) emp +WHERE r <= 3; + +-- Ensure dr = 1 is converted to dr <= 1 to get all rows leading up to dr = 1 +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + dense_rank() OVER (ORDER BY salary DESC) dr + FROM empsalary) emp +WHERE dr = 1; + +SELECT * FROM + (SELECT empno, + salary, + dense_rank() OVER (ORDER BY salary DESC) dr + FROM empsalary) emp +WHERE dr = 1; + +-- Check COUNT() and COUNT(*) +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(empno) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + +SELECT * FROM + (SELECT empno, + salary, + count(empno) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) c + FROM empsalary) emp +WHERE c >= 3; + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER () c + FROM empsalary) emp +WHERE 11 <= c; + +-- Tests to ensure we don't push down the run condition when it's not valid to +-- do so. +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + row_number() OVER (PARTITION BY depname ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(empno) OVER (PARTITION BY depname ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) c + FROM empsalary) emp +WHERE c <= 3; + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary) c + FROM empsalary) emp +WHERE 3 <= c; + -- Test Sort node collapsing EXPLAIN (COSTS OFF) SELECT * FROM