Parallel Aggregate costs don't consider combine/serial/deserial funcs
Hi,
I realised a few days ago that the parallel aggregate code does not
cost for the combine, serialisation and deserialisation functions at
all.
I've attached a patch which fixes this.
One small point which I was a little unsure of in the attached is,
should the "if (aggref->aggdirectargs)" part of
count_agg_clauses_walker() be within the "if
(!context->combineStates)". I simply couldn't decide. We currently
have no aggregates which this affects anyway, per; select * from
pg_aggregate where aggcombinefn <> 0 and aggkind <> 'n'; so for now
I've left it outwith.
Another thing I thought of is that it's not too nice that I have to
pass 3 bools to count_agg_clauses() in order to tell it what to do. I
was tempted to invent some bitmask flags for this, then modify
create_agg_path() to use the same flags, but I thought I'd better not
cause too much churn with this patch.
--
David Rowley http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
Attachments:
parallel_aggs_costs_fix.patchapplication/octet-stream; name=parallel_aggs_costs_fix.patchDownload
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index b2a9a80..7376175 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3257,6 +3257,8 @@ create_grouping_paths(PlannerInfo *root,
RelOptInfo *grouped_rel;
PathTarget *partial_grouping_target = NULL;
AggClauseCosts agg_costs;
+ AggClauseCosts agg_partial_costs; /* parallel only */
+ AggClauseCosts agg_final_costs; /* parallel only */
Size hashaggtablesize;
double dNumGroups;
double dNumPartialGroups = 0;
@@ -3341,8 +3343,10 @@ create_grouping_paths(PlannerInfo *root,
MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
if (parse->hasAggs)
{
- count_agg_clauses(root, (Node *) target->exprs, &agg_costs);
- count_agg_clauses(root, parse->havingQual, &agg_costs);
+ count_agg_clauses(root, (Node *) target->exprs, &agg_costs, true,
+ false, false);
+ count_agg_clauses(root, parse->havingQual, &agg_costs, true, false,
+ false);
}
/*
@@ -3417,6 +3421,25 @@ create_grouping_paths(PlannerInfo *root,
NIL,
NIL);
+ /*
+ * Collect statistics about aggregates for estimating costs of
+ * performing aggregation in parallel.
+ */
+ MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
+ MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
+ if (parse->hasAggs)
+ {
+ /* partial phase */
+ count_agg_clauses(root, (Node *) partial_grouping_target->exprs,
+ &agg_partial_costs, false, false, true);
+
+ /* final phase */
+ count_agg_clauses(root, (Node *) target->exprs, &agg_final_costs,
+ true, true, true);
+ count_agg_clauses(root, parse->havingQual, &agg_final_costs, true,
+ true, true);
+ }
+
if (can_sort)
{
/* Checked in set_grouped_rel_consider_parallel() */
@@ -3452,7 +3475,7 @@ create_grouping_paths(PlannerInfo *root,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
parse->groupClause,
NIL,
- &agg_costs,
+ &agg_partial_costs,
dNumPartialGroups,
false,
false,
@@ -3477,7 +3500,7 @@ create_grouping_paths(PlannerInfo *root,
hashaggtablesize =
estimate_hashagg_tablesize(cheapest_partial_path,
- &agg_costs,
+ &agg_partial_costs,
dNumPartialGroups);
/*
@@ -3494,7 +3517,7 @@ create_grouping_paths(PlannerInfo *root,
AGG_HASHED,
parse->groupClause,
NIL,
- &agg_costs,
+ &agg_partial_costs,
dNumPartialGroups,
false,
false,
@@ -3626,7 +3649,7 @@ create_grouping_paths(PlannerInfo *root,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
parse->groupClause,
(List *) parse->havingQual,
- &agg_costs,
+ &agg_final_costs,
dNumGroups,
true,
true,
@@ -3686,7 +3709,7 @@ create_grouping_paths(PlannerInfo *root,
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
hashaggtablesize = estimate_hashagg_tablesize(path,
- &agg_costs,
+ &agg_final_costs,
dNumGroups);
if (hashaggtablesize < work_mem * 1024L)
@@ -3708,7 +3731,7 @@ create_grouping_paths(PlannerInfo *root,
AGG_HASHED,
parse->groupClause,
(List *) parse->havingQual,
- &agg_costs,
+ &agg_final_costs,
dNumGroups,
true,
true,
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 5674a73..759566a 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -61,6 +61,9 @@ typedef struct
{
PlannerInfo *root;
AggClauseCosts *costs;
+ bool finalizeAggs;
+ bool combineStates;
+ bool serialStates;
} count_agg_clauses_context;
typedef struct
@@ -540,12 +543,16 @@ contain_agg_clause_walker(Node *node, void *context)
* are no subqueries. There mustn't be outer-aggregate references either.
*/
void
-count_agg_clauses(PlannerInfo *root, Node *clause, AggClauseCosts *costs)
+count_agg_clauses(PlannerInfo *root, Node *clause, AggClauseCosts *costs,
+ bool finalizeAggs, bool combineStates, bool serialStates)
{
count_agg_clauses_context context;
context.root = root;
context.costs = costs;
+ context.finalizeAggs = finalizeAggs;
+ context.combineStates = combineStates;
+ context.serialStates = serialStates;
(void) count_agg_clauses_walker(clause, &context);
}
@@ -562,6 +569,9 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
Form_pg_aggregate aggform;
Oid aggtransfn;
Oid aggfinalfn;
+ Oid aggcombinefn;
+ Oid aggserialfn;
+ Oid aggdeserialfn;
Oid aggtranstype;
int32 aggtransspace;
QualCost argcosts;
@@ -583,6 +593,9 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
aggtransfn = aggform->aggtransfn;
aggfinalfn = aggform->aggfinalfn;
+ aggcombinefn = aggform->aggcombinefn;
+ aggserialfn = aggform->aggserialfn;
+ aggdeserialfn = aggform->aggdeserialfn;
aggtranstype = aggform->aggtranstype;
aggtransspace = aggform->aggtransspace;
ReleaseSysCache(aggTuple);
@@ -592,28 +605,58 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
if (aggref->aggorder != NIL || aggref->aggdistinct != NIL)
costs->numOrderedAggs++;
- /* add component function execution costs to appropriate totals */
- costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
- if (OidIsValid(aggfinalfn))
- costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
+ /*
+ * Add the appropriate component function execution costs to
+ * appropriate totals.
+ */
+ if (context->combineStates)
+ {
+ /* charge for combining previously aggregated states */
+ costs->transCost.per_tuple += get_func_cost(aggcombinefn) * cpu_operator_cost;
- /* also add the input expressions' cost to per-input-row costs */
- cost_qual_eval_node(&argcosts, (Node *) aggref->args, context->root);
- costs->transCost.startup += argcosts.startup;
- costs->transCost.per_tuple += argcosts.per_tuple;
+ /* charge for deserialization, when appropriate */
+ if (context->serialStates && OidIsValid(aggdeserialfn))
+ costs->transCost.per_tuple += get_func_cost(aggdeserialfn) * cpu_operator_cost;
+ }
+ else
+ costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
+
+ if (context->finalizeAggs)
+ {
+ if (OidIsValid(aggfinalfn))
+ costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
+ }
+ else if (context->serialStates)
+ {
+ if (OidIsValid(aggserialfn))
+ costs->finalCost += get_func_cost(aggserialfn) * cpu_operator_cost;
+ }
/*
- * Add any filter's cost to per-input-row costs.
- *
- * XXX Ideally we should reduce input expression costs according to
- * filter selectivity, but it's not clear it's worth the trouble.
+ * Some costs will already have been incurred by the initial aggregate
+ * node, so we mustn't include these again.
*/
- if (aggref->aggfilter)
+ if (!context->combineStates)
{
- cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
- context->root);
+ /* add the input expressions' cost to per-input-row costs */
+ cost_qual_eval_node(&argcosts, (Node *) aggref->args, context->root);
costs->transCost.startup += argcosts.startup;
costs->transCost.per_tuple += argcosts.per_tuple;
+
+ /*
+ * Add any filter's cost to per-input-row costs.
+ *
+ * XXX Ideally we should reduce input expression costs according
+ * to filter selectivity, but it's not clear it's worth the
+ * trouble.
+ */
+ if (aggref->aggfilter)
+ {
+ cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
+ context->root);
+ costs->transCost.startup += argcosts.startup;
+ costs->transCost.per_tuple += argcosts.per_tuple;
+ }
}
/*
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index 3ab57f1..1eb1eb4 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -67,7 +67,8 @@ extern List *make_ands_implicit(Expr *clause);
extern PartialAggType aggregates_allow_partial(Node *clause);
extern bool contain_agg_clause(Node *clause);
extern void count_agg_clauses(PlannerInfo *root, Node *clause,
- AggClauseCosts *costs);
+ AggClauseCosts *costs, bool finalizeAggs,
+ bool combineStates, bool serialStates);
extern bool contain_window_function(Node *clause);
extern WindowFuncLists *find_window_functions(Node *clause, Index maxWinRef);
On Mon, Apr 11, 2016 at 12:47:29AM +1200, David Rowley wrote:
Hi,
I realised a few days ago that the parallel aggregate code does not
cost for the combine, serialisation and deserialisation functions at
all.I've attached a patch which fixes this.
One small point which I was a little unsure of in the attached is,
should the "if (aggref->aggdirectargs)" part of
count_agg_clauses_walker() be within the "if
(!context->combineStates)". I simply couldn't decide. We currently
have no aggregates which this affects anyway, per; select * from
pg_aggregate where aggcombinefn <> 0 and aggkind <> 'n'; so for now
I've left it outwith.Another thing I thought of is that it's not too nice that I have to
pass 3 bools to count_agg_clauses() in order to tell it what to do. I
was tempted to invent some bitmask flags for this, then modify
create_agg_path() to use the same flags, but I thought I'd better not
cause too much churn with this patch.
[This is a generic notification.]
The above-described topic is currently a PostgreSQL 9.6 open item. Robert,
since you committed the patch believed to have created it, you own this open
item. If that responsibility lies elsewhere, please let us know whose
responsibility it is to fix this. Since new open items may be discovered at
any time and I want to plan to have them all fixed well in advance of the ship
date, I will appreciate your efforts toward speedy resolution. Please
present, within 72 hours, a plan to fix the defect within seven days of this
message. Thanks.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Sun, Apr 10, 2016 at 8:47 AM, David Rowley
<david.rowley@2ndquadrant.com> wrote:
I realised a few days ago that the parallel aggregate code does not
cost for the combine, serialisation and deserialisation functions at
all.
Oops.
I've attached a patch which fixes this.
I've committed this patch. I wonder if it's going to produce compiler
warnings for some people, complaining about possible use of an
uninitialized variable. That would kind of suck. I don't much mind
having to insert a dummy assignment to shut the compiler up; a smarter
compiler will just throw it out anyway. I'm less enthused about a
dummy MemSet. The compiler is less likely to be able to get rid of
that, and it's more expensive if it doesn't. But let's see what
happens.
One small point which I was a little unsure of in the attached is,
should the "if (aggref->aggdirectargs)" part of
count_agg_clauses_walker() be within the "if
(!context->combineStates)". I simply couldn't decide. We currently
have no aggregates which this affects anyway, per; select * from
pg_aggregate where aggcombinefn <> 0 and aggkind <> 'n'; so for now
I've left it outwith.
The direct arguments would be evaluated in the worker, but not in the
leader, right? Or am I confused?
Another thing I thought of is that it's not too nice that I have to
pass 3 bools to count_agg_clauses() in order to tell it what to do. I
was tempted to invent some bitmask flags for this, then modify
create_agg_path() to use the same flags, but I thought I'd better not
cause too much churn with this patch.
I'm kinda tempted to say this should be using an enum. I note that
serialStates has a subtly different meaning here than in some other
places where you have used the same term.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 13 April 2016 at 08:52, Robert Haas <robertmhaas@gmail.com> wrote:
On Sun, Apr 10, 2016 at 8:47 AM, David Rowley
<david.rowley@2ndquadrant.com> wrote:I realised a few days ago that the parallel aggregate code does not
cost for the combine, serialisation and deserialisation functions at
all.Oops.
I've attached a patch which fixes this.
I've committed this patch. I wonder if it's going to produce compiler
warnings for some people, complaining about possible use of an
uninitialized variable. That would kind of suck. I don't much mind
having to insert a dummy assignment to shut the compiler up; a smarter
compiler will just throw it out anyway. I'm less enthused about a
dummy MemSet. The compiler is less likely to be able to get rid of
that, and it's more expensive if it doesn't. But let's see what
happens.
Thanks for committing.
I wondered that too, so checked a couple of compilers and got no
warnings, but the buildfarm should let us know. The other option would
be to palloc() them, and have them set to NULL initially... that's not
very nice either... Another option would be to protect the final
parallel path generation with if (grouped_rel->partial_pathlist &&
grouped_rel->consider_parallel). I'd imagine any compiler smart enough
to work out that uninitialised is not possible would also be able to
remove the check for grouped_rel->consider_parallel, but *shrug*, I
don't often look at the assembly that compilers generate, so I might
be giving them too much credit.
One small point which I was a little unsure of in the attached is,
should the "if (aggref->aggdirectargs)" part of
count_agg_clauses_walker() be within the "if
(!context->combineStates)". I simply couldn't decide. We currently
have no aggregates which this affects anyway, per; select * from
pg_aggregate where aggcombinefn <> 0 and aggkind <> 'n'; so for now
I've left it outwith.The direct arguments would be evaluated in the worker, but not in the
leader, right? Or am I confused?
That seems right, but I just can't think of how its possible to
parallelise these aggregates anyway.
Another thing I thought of is that it's not too nice that I have to
pass 3 bools to count_agg_clauses() in order to tell it what to do. I
was tempted to invent some bitmask flags for this, then modify
create_agg_path() to use the same flags, but I thought I'd better not
cause too much churn with this patch.I'm kinda tempted to say this should be using an enum. I note that
serialStates has a subtly different meaning here than in some other
places where you have used the same term.
hmm, I'm not sure how it's subtly different. Do you mean the
preference towards costing the finalfn when finalizeAggs is true, and
ignoring the serialfn in this case? nodeAgg.c should do the same,
although it'll deserialize in such a case. We can never finalize and
serialize in the same node.
--
David Rowley http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Tue, Apr 12, 2016 at 5:38 PM, David Rowley
<david.rowley@2ndquadrant.com> wrote:
One small point which I was a little unsure of in the attached is,
should the "if (aggref->aggdirectargs)" part of
count_agg_clauses_walker() be within the "if
(!context->combineStates)". I simply couldn't decide. We currently
have no aggregates which this affects anyway, per; select * from
pg_aggregate where aggcombinefn <> 0 and aggkind <> 'n'; so for now
I've left it outwith.The direct arguments would be evaluated in the worker, but not in the
leader, right? Or am I confused?That seems right, but I just can't think of how its possible to
parallelise these aggregates anyway.
Well, if you could ensure that each worker would see a whole group,
you could do it, I think. But it's probably fine to just leave this
for now. It's not like it can't be changed if somebody figures out
some cool thing to do in this area.
Another thing I thought of is that it's not too nice that I have to
pass 3 bools to count_agg_clauses() in order to tell it what to do. I
was tempted to invent some bitmask flags for this, then modify
create_agg_path() to use the same flags, but I thought I'd better not
cause too much churn with this patch.I'm kinda tempted to say this should be using an enum. I note that
serialStates has a subtly different meaning here than in some other
places where you have used the same term.hmm, I'm not sure how it's subtly different. Do you mean the
preference towards costing the finalfn when finalizeAggs is true, and
ignoring the serialfn in this case? nodeAgg.c should do the same,
although it'll deserialize in such a case. We can never finalize and
serialize in the same node.
I mean that, IIUC, in some other places where you use serialStates,
true means that (de)serialization is known to be needed. Here,
however, it only means it might be needed, contingent on whether the
serial/deserial functions are actually present.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers