diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 3162980..ffe7120 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -684,7 +684,7 @@ initialize_aggregates(AggState *aggstate, */ static void advance_transition_function(AggState *aggstate, -AggStatePerTrans pertrans, + AggStatePerTrans pertrans, AggStatePerGroup pergroupstate) { FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo; @@ -993,7 +993,7 @@ process_ordered_aggregate_single(AggState *aggstate, */ static void process_ordered_aggregate_multi(AggState *aggstate, -AggStatePerTrans pertrans, + AggStatePerTrans pertrans, AggStatePerGroup pergroupstate) { MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory; @@ -1257,8 +1257,8 @@ finalize_aggregates(AggState *aggstate, for (aggno = 0; aggno < aggstate->numaggs; aggno++) { - AggStatePerAgg peragg = &peraggs[aggno]; - int transno = peragg->transno; + AggStatePerAgg peragg = &peraggs[aggno]; + int transno = peragg->transno; AggStatePerTrans pertrans = &aggstate->pertrans[transno]; AggStatePerGroup pergroupstate; @@ -2306,9 +2306,41 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->pergroup = pergroup; } - /* + /* ----------------- * Perform lookups of aggregate function info, and initialize the * unchanging fields of the per-agg and per-trans data. + * + * Here we perform optimization in the form of 'merging' duplicate + * aggregate functions so that their state and final values are re-used + * rather than needlessly being re-calculated independently. We also + * perform a 'semi-merge' of aggregates which can share the same transition + * state as another aggregate, but cannot share the same peragg due to + * having different final functions. + * + * Scenarios: + * + * 1. An aggregate function appears more than once in query: + * + * SELECT SUM(x) FROM ... HAVING SUM(x) > 0 + * + * Since in this case the aggregates are both the same we can optimize by + * only calculating aggregate state and calling the final function just + * once. In this case both aggregates will share the same 'aggno' value. + * + * 2. Two different aggregate functions appear in the query but the two + * functions happen to share the same transition function and initial + * value, but have different final functions. + * + * SELECT SUM(x), AVG(x) FROM ... + * + * In this case we must create a new peragg for the varying aggregate, but + * since the transition function and initial value are the same, both + * aggregate functions may share the same transition state. + * + * For either of these optimizations to be valid the aggregate parameters + * mustn't contain any volatile functions and must be exactly the same, + * including any modifiers such as ORDER BY, DISTINCT and FILTER. + * ----------------- */ aggno = -1; transno = -1; @@ -2338,17 +2370,17 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) /* Planner should have assigned aggregate to correct level */ Assert(aggref->agglevelsup == 0); - /* - * For performance reasons we detect duplicate aggregates (for - * example, "SELECT sum(x) ... HAVING sum(x) > 0"). When duplicates - * are detected, we only make an AggStatePerAgg struct for the first - * one. The clones are simply pointed at the same result entry by - * giving them duplicate aggno values. - */ + /* 1. check for already processed aggs which can be re-used */ existing_aggno = find_compatible_peragg(aggref, aggstate, aggno, &same_input_transnos); if (existing_aggno != -1) { + /* + * existing compatible agg found, just reuse the existing one for + * this aggregate. The existing one is already initialized, so the + * only thing we need to setup is to point it to the existing + * aggregate's aggno which it should use. + */ aggrefstate->aggno = existing_aggno; continue; } @@ -2464,11 +2496,11 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) initValue = GetAggInitVal(textInitVal, aggtranstype); /* - * Build working state for invoking the transition function, or look + * 2. Build working state for invoking the transition function, or look * up previously initialized working state, if we can share it. * * find_compatible_peragg() already collected a list of per-Trans's - * with the same inputs. Check if any of them have the transition + * with the same inputs. Check if any of them have the same transition * function and initial value. */ existing_transno = find_compatible_pertrans(aggstate, aggref, @@ -2477,6 +2509,12 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) same_input_transnos); if (existing_transno != -1) { + /* + * existing compatible trans found, just reuse the existing one for + * this aggregate .The existing one is already initialized, so the + * only thing we need to setup is to point it to the existing + * 'transno'. + */ pertrans = &pertransstates[existing_transno]; peragg->transno = existing_transno; } @@ -2757,60 +2795,14 @@ GetAggInitVal(Datum textInitVal, Oid transtype) /* * find_compatible_peragg - search for a previously initialized per-Agg struct * - * Searches the previously looked at aggregates in order to find a compatible - * aggregate. If a positive match is found then foundaggno is set to the - * aggregate which matches. + * Searches the previously looked at aggregates to find one which is compatible + * with this one, with the same input parameters. + * When an compatible aggregate cannot be found -1 is returned. * * As a side-effect, this also collects a list of existing per-Trans structs * with matching inputs. If no identical AggRef is found, the list is passed * later to find_compatible_perstate, to see if we can at least reuse the * state value of another aggregate. - * - * FIXME: The below examples are a good, but they don't belong here anymore. - * - * Scenario 1 -- An aggregate function appears more than once in query: - * - * SELECT SUM(x) FROM ... HAVING SUM(x) > 0 - * - * Since in this case the aggregates are both the same we can optimize by - * only calculating aggregate state and calling the finalfn just once. This - * would be an AGGREF_EXACT_MATCH, meaning both the state and the final - * function call are shared. - * - * Scenario 2 -- Two different aggregate functions appear in the query but - * the two functions happen to share the same transfn, but have - * different finalfn. - * - * SELECT SUM(x), AVG(x) FROM ... - * - * Since in our case these two aggregates both share the same transfn, but - * naturally they have different finalfns. This situation is classed as an - * AGGREF_STATE_MATCH. This means that the same state can be shared by both - * aggregates. Since the finalfn call is not the same this cannot be reused. - * For this case to be valid the INITCOND of the aggregate, if one exists, must - * also match. - * - * Scenario 3 -- The same aggregate function is called with different - * parameters. - * - * SELECT SUM(x),SUM(DISTINCT x) FROM ... - * SELECT SUM(x),SUM(y) FROM ... - * SELECT SUM(x),SUM(x) FILTER(WHERE x > 0) FROM ... - * - * All three of the above queries cannot share the same state and have to be - * calculated independently. - * - * Scenario 4 -- Different aggregates with the same parameters and the same - * transfn and finalfn. - * - * SELECT SUM(x),SUM2(x) FROM ... - * - * A perhaps unlikely scenario where two aggregate functions exist which have, - * both the same transfn and the same finalfn. In this case we can report an - * AGGREF_EXACT_MATCH, providing the INITCOND of both aggregates are the same. - * - * - * Returns -1 if no match found. */ static int find_compatible_peragg(Aggref *newagg, AggState *aggstate, @@ -2828,10 +2820,13 @@ find_compatible_peragg(Aggref *newagg, AggState *aggstate, peraggs = aggstate->peragg; /* - * Search through the list of already seen aggregates. We'll stop when we - * find an exact match, but until then we'll note any state matches that - * we find. We may have to fall back on these should we fail to find an - * exact match. + * Search through the list of already seen aggregates. If we find an + * existing aggregate with the same aggregate function and input parameters + * as an existing one, then we can re-use that one. While searching we'll + * collect a list of Aggrefs with the same input parameters. The caller may + * use these when a matching Aggref cannot be found. Potentially this list + * could contain a transno which this aggregate can re-use the transition + * state from. */ for (aggno = 0; aggno <= lastaggno; aggno++) { diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 7091a9d..5796de8 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1837,7 +1837,7 @@ typedef struct AggState int current_phase; /* current phase number */ FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ AggStatePerAgg peragg; /* per-Aggref information */ - AggStatePerTrans pertrans; /* per-Agg trans state information */ + AggStatePerTrans pertrans; /* per-Trans state information */ ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */ ExprContext *tmpcontext; /* econtext for input expressions */ AggStatePerTrans curpertrans; /* currently active trans state */ diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 4dad4fe..de826b5 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1636,8 +1636,17 @@ create aggregate my_sum(int4) sfunc = avg_transfn, finalfunc = sum_finalfn ); +-- aggregate state should be shared as aggs are the same. +select my_avg(one),my_avg(one) from (values(1),(3)) t(one); +NOTICE: avg_transfn called with 1 +NOTICE: avg_transfn called with 3 + my_avg | my_avg +--------+-------- + 2 | 2 +(1 row) + -- aggregate state should be shared as transfn is the same for both aggs. -select my_avg(one),my_sum(one) from (values(1,2),(3,4)) t(one,two); +select my_avg(one),my_sum(one) from (values(1),(3)) t(one); NOTICE: avg_transfn called with 1 NOTICE: avg_transfn called with 3 my_avg | my_sum @@ -1646,7 +1655,7 @@ NOTICE: avg_transfn called with 3 (1 row) -- shouldn't share states due to the distinctness not matching. -select my_avg(distinct one),my_sum(one) from (values(1,2),(3,4)) t(one,two); +select my_avg(distinct one),my_sum(one) from (values(1),(3)) t(one); NOTICE: avg_transfn called with 1 NOTICE: avg_transfn called with 3 NOTICE: avg_transfn called with 1 @@ -1656,6 +1665,16 @@ NOTICE: avg_transfn called with 3 2 | 4 (1 row) +-- shouldn't share states due to the filter clause not matching. +select my_avg(one) filter (where one > 1),my_sum(one) from (values(1),(3)) t(one); +NOTICE: avg_transfn called with 1 +NOTICE: avg_transfn called with 3 +NOTICE: avg_transfn called with 3 + my_avg | my_sum +--------+-------- + 3 | 4 +(1 row) + -- this should not share the state due to different input columns. select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two); NOTICE: avg_transfn called with 2 @@ -1667,6 +1686,7 @@ NOTICE: avg_transfn called with 3 2 | 6 (1 row) +-- test that aggs with the same sfunc and initcond share the same agg state create aggregate my_sum_init(int4) ( stype = avg_state, @@ -1679,17 +1699,33 @@ create aggregate my_avg_init(int4) stype = avg_state, sfunc = avg_transfn, finalfunc = avg_finalfn, - initcond = '(5,0)' + initcond = '(10,0)' +); +create aggregate my_avg_init2(int4) +( + stype = avg_state, + sfunc = avg_transfn, + finalfunc = avg_finalfn, + initcond = '(4,0)' ); +-- state should be shared if INITCONDs are matching +select my_sum_init(one),my_avg_init(one) from (values(1),(3)) t(one); +NOTICE: avg_transfn called with 1 +NOTICE: avg_transfn called with 3 + my_sum_init | my_avg_init +-------------+------------- + 14 | 7 +(1 row) + -- Varying INITCONDs should cause the states not to be shared. -select my_avg_init(one),my_sum_init(one) from (values(1,2),(3,4)) t(one,two); +select my_sum_init(one),my_avg_init2(one) from (values(1),(3)) t(one); NOTICE: avg_transfn called with 1 NOTICE: avg_transfn called with 1 NOTICE: avg_transfn called with 3 NOTICE: avg_transfn called with 3 - my_avg_init | my_sum_init --------------+------------- - 4 | 14 + my_sum_init | my_avg_init2 +-------------+-------------- + 14 | 4 (1 row) rollback; diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 42c3b3c..8d501dc 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -655,16 +655,22 @@ create aggregate my_sum(int4) finalfunc = sum_finalfn ); +-- aggregate state should be shared as aggs are the same. +select my_avg(one),my_avg(one) from (values(1),(3)) t(one); + -- aggregate state should be shared as transfn is the same for both aggs. -select my_avg(one),my_sum(one) from (values(1,2),(3,4)) t(one,two); +select my_avg(one),my_sum(one) from (values(1),(3)) t(one); -- shouldn't share states due to the distinctness not matching. -select my_avg(distinct one),my_sum(one) from (values(1,2),(3,4)) t(one,two); +select my_avg(distinct one),my_sum(one) from (values(1),(3)) t(one); + +-- shouldn't share states due to the filter clause not matching. +select my_avg(one) filter (where one > 1),my_sum(one) from (values(1),(3)) t(one); -- this should not share the state due to different input columns. select my_avg(one),my_sum(two) from (values(1,2),(3,4)) t(one,two); - +-- test that aggs with the same sfunc and initcond share the same agg state create aggregate my_sum_init(int4) ( stype = avg_state, @@ -678,11 +684,22 @@ create aggregate my_avg_init(int4) stype = avg_state, sfunc = avg_transfn, finalfunc = avg_finalfn, - initcond = '(5,0)' + initcond = '(10,0)' ); +create aggregate my_avg_init2(int4) +( + stype = avg_state, + sfunc = avg_transfn, + finalfunc = avg_finalfn, + initcond = '(4,0)' +); + +-- state should be shared if INITCONDs are matching +select my_sum_init(one),my_avg_init(one) from (values(1),(3)) t(one); + -- Varying INITCONDs should cause the states not to be shared. -select my_avg_init(one),my_sum_init(one) from (values(1,2),(3,4)) t(one,two); +select my_sum_init(one),my_avg_init2(one) from (values(1),(3)) t(one); rollback;