From 5dc64aad99976bcdead74f1dd2376073e32898f0 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 11 Jun 2019 07:48:29 +0000 Subject: [PATCH] Implementing parallel grouping sets. Parallel aggregation has already been supported in PostgreSQL and it is implemented by aggregating in two stages. First, each worker performs an aggregation step, producing a partial result for each group of which that process is aware. Second, the partial results are transferred to the leader via the Gather node. Finally, the leader merges the partial results and produces the final result for each group. We are implementing parallel grouping sets in the same way. The only difference is that in the final stage, the leader performs a grouping sets aggregation, rather than a normal aggregation. --- src/backend/optimizer/plan/createplan.c | 4 +- src/backend/optimizer/plan/planner.c | 129 ++++++++++++---- src/backend/optimizer/util/pathnode.c | 2 + src/include/nodes/pathnodes.h | 1 + src/include/optimizer/pathnode.h | 1 + src/test/regress/expected/parallelgroupingsets.out | 172 +++++++++++++++++++++ src/test/regress/sql/parallelgroupingsets.sql | 43 ++++++ 7 files changed, 317 insertions(+), 35 deletions(-) create mode 100644 src/test/regress/expected/parallelgroupingsets.out create mode 100644 src/test/regress/sql/parallelgroupingsets.sql diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 608d5ad..6e9dfa5 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -2245,7 +2245,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) agg_plan = (Plan *) make_agg(NIL, NIL, strat, - AGGSPLIT_SIMPLE, + best_path->aggsplit, list_length((List *) linitial(rollup->gsets)), new_grpColIdx, extract_grouping_ops(rollup->groupClause), @@ -2283,7 +2283,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) plan = make_agg(build_path_tlist(root, &best_path->path), best_path->qual, best_path->aggstrategy, - AGGSPLIT_SIMPLE, + best_path->aggsplit, numGroupCols, top_grpColIdx, extract_grouping_ops(rollup->groupClause), diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index cb897cc..2b6dd36 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -148,7 +148,8 @@ static void standard_qp_callback(PlannerInfo *root, void *extra); static double get_number_of_groups(PlannerInfo *root, double path_rows, grouping_sets_data *gd, - List *target_list); + List *target_list, + bool is_partial); static RelOptInfo *create_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, @@ -176,7 +177,8 @@ static void consider_groupingsets_paths(PlannerInfo *root, bool can_hash, grouping_sets_data *gd, const AggClauseCosts *agg_costs, - double dNumGroups); + double dNumGroups, + AggSplit aggsplit); static RelOptInfo *create_window_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *input_target, @@ -3664,6 +3666,7 @@ standard_qp_callback(PlannerInfo *root, void *extra) * path_rows: number of output rows from scan/join step * gd: grouping sets data including list of grouping sets and their clauses * target_list: target list containing group clause references + * is_partial: whether the grouping is in partial aggregate * * If doing grouping sets, we also annotate the gsets data with the estimates * for each set and each individual rollup list, with a view to later @@ -3673,7 +3676,8 @@ static double get_number_of_groups(PlannerInfo *root, double path_rows, grouping_sets_data *gd, - List *target_list) + List *target_list, + bool is_partial) { Query *parse = root->parse; double dNumGroups; @@ -3682,7 +3686,7 @@ get_number_of_groups(PlannerInfo *root, { List *groupExprs; - if (parse->groupingSets) + if (parse->groupingSets && !is_partial) { /* Add up the estimates for each grouping set */ ListCell *lc; @@ -3745,7 +3749,7 @@ get_number_of_groups(PlannerInfo *root, } else { - /* Plain GROUP BY */ + /* Plain GROUP BY, or grouping is in partial aggregate */ groupExprs = get_sortgrouplist_exprs(parse->groupClause, target_list); @@ -4138,7 +4142,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, dNumGroups = get_number_of_groups(root, cheapest_path->rows, gd, - extra->targetList); + extra->targetList, + false); /* Build final grouping paths */ add_paths_to_grouping_rel(root, input_rel, grouped_rel, @@ -4183,7 +4188,8 @@ consider_groupingsets_paths(PlannerInfo *root, bool can_hash, grouping_sets_data *gd, const AggClauseCosts *agg_costs, - double dNumGroups) + double dNumGroups, + AggSplit aggsplit) { Query *parse = root->parse; @@ -4345,6 +4351,7 @@ consider_groupingsets_paths(PlannerInfo *root, path, (List *) parse->havingQual, strat, + aggsplit, new_rollups, agg_costs, dNumGroups)); @@ -4502,6 +4509,7 @@ consider_groupingsets_paths(PlannerInfo *root, path, (List *) parse->havingQual, AGG_MIXED, + aggsplit, rollups, agg_costs, dNumGroups)); @@ -4518,6 +4526,7 @@ consider_groupingsets_paths(PlannerInfo *root, path, (List *) parse->havingQual, AGG_SORTED, + aggsplit, gd->rollups, agg_costs, dNumGroups)); @@ -5192,7 +5201,15 @@ make_partial_grouping_target(PlannerInfo *root, foreach(lc, grouping_target->exprs) { Expr *expr = (Expr *) lfirst(lc); - Index sgref = get_pathtarget_sortgroupref(grouping_target, i); + Index sgref = get_pathtarget_sortgroupref(grouping_target, i++); + + /* + * GroupingFunc does not need to be evaluated in Partial Aggregate, + * since Partial Aggregate will not handle multiple grouping sets at + * once. + */ + if (IsA(expr, GroupingFunc)) + continue; if (sgref && parse->groupClause && get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL) @@ -5211,8 +5228,6 @@ make_partial_grouping_target(PlannerInfo *root, */ non_group_cols = lappend(non_group_cols, expr); } - - i++; } /* @@ -6406,7 +6421,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, { consider_groupingsets_paths(root, grouped_rel, path, true, can_hash, - gd, agg_costs, dNumGroups); + gd, agg_costs, dNumGroups, AGGSPLIT_SIMPLE); } else if (parse->hasAggs) { @@ -6473,7 +6488,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, -1.0); } - if (parse->hasAggs) + /* + * parallel grouping sets + */ + if (parse->groupingSets) + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, + gd, agg_final_costs, dNumGroups, AGGSPLIT_FINAL_DESERIAL); + else if (parse->hasAggs) add_path(grouped_rel, (Path *) create_agg_path(root, grouped_rel, @@ -6508,7 +6530,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, */ consider_groupingsets_paths(root, grouped_rel, cheapest_path, false, true, - gd, agg_costs, dNumGroups); + gd, agg_costs, dNumGroups, AGGSPLIT_SIMPLE); } else { @@ -6556,17 +6578,27 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, dNumGroups); if (hashaggtablesize < work_mem * 1024L) - add_path(grouped_rel, (Path *) - create_agg_path(root, - grouped_rel, - path, - grouped_rel->reltarget, - AGG_HASHED, - AGGSPLIT_FINAL_DESERIAL, - parse->groupClause, - havingQual, - agg_final_costs, - dNumGroups)); + { + /* + * parallel grouping sets + */ + if (parse->groupingSets) + consider_groupingsets_paths(root, grouped_rel, + path, false, true, + gd, agg_final_costs, dNumGroups, AGGSPLIT_FINAL_DESERIAL); + else + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + AGG_HASHED, + AGGSPLIT_FINAL_DESERIAL, + parse->groupClause, + havingQual, + agg_final_costs, + dNumGroups)); + } } } @@ -6706,13 +6738,15 @@ create_partial_grouping_paths(PlannerInfo *root, get_number_of_groups(root, cheapest_total_path->rows, gd, - extra->targetList); + extra->targetList, + true); if (cheapest_partial_path != NULL) dNumPartialPartialGroups = get_number_of_groups(root, cheapest_partial_path->rows, gd, - extra->targetList); + extra->targetList, + true); if (can_sort && cheapest_total_path != NULL) { @@ -6734,11 +6768,28 @@ create_partial_grouping_paths(PlannerInfo *root, { /* Sort the cheapest partial path, if it isn't already */ if (!is_sorted) + { + List *pathkeys; + + /* + * If we are performing Partial Aggregate for grouping + * sets, we need to sort by all the columns in + * parse->groupClause. + */ + if (parse->groupingSets) + pathkeys = + make_pathkeys_for_sortclauses(root, + parse->groupClause, + root->processed_tlist); + else + pathkeys = root->group_pathkeys; + path = (Path *) create_sort_path(root, partially_grouped_rel, path, - root->group_pathkeys, + pathkeys, -1.0); + } if (parse->hasAggs) add_path(partially_grouped_rel, (Path *) @@ -6778,11 +6829,28 @@ create_partial_grouping_paths(PlannerInfo *root, { /* Sort the cheapest partial path, if it isn't already */ if (!is_sorted) + { + List *pathkeys; + + /* + * If we are performing Partial Aggregate for grouping + * sets, we need to sort by all the columns in + * parse->groupClause. + */ + if (parse->groupingSets) + pathkeys = + make_pathkeys_for_sortclauses(root, + parse->groupClause, + root->processed_tlist); + else + pathkeys = root->group_pathkeys; + path = (Path *) create_sort_path(root, partially_grouped_rel, path, - root->group_pathkeys, + pathkeys, -1.0); + } if (parse->hasAggs) add_partial_path(partially_grouped_rel, (Path *) @@ -6952,11 +7020,6 @@ can_partial_agg(PlannerInfo *root, const AggClauseCosts *agg_costs) */ return false; } - else if (parse->groupingSets) - { - /* We don't know how to do grouping sets in parallel. */ - return false; - } else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial) { /* Insufficient support for partial mode. */ diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index d884d2b..b5d79d2 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -3014,6 +3014,7 @@ create_groupingsets_path(PlannerInfo *root, Path *subpath, List *having_qual, AggStrategy aggstrategy, + AggSplit aggsplit, List *rollups, const AggClauseCosts *agg_costs, double numGroups) @@ -3059,6 +3060,7 @@ create_groupingsets_path(PlannerInfo *root, pathnode->path.pathkeys = NIL; pathnode->aggstrategy = aggstrategy; + pathnode->aggsplit = aggsplit; pathnode->rollups = rollups; pathnode->qual = having_qual; diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 4b7703d..739f279 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1693,6 +1693,7 @@ typedef struct GroupingSetsPath Path path; Path *subpath; /* path representing input source */ AggStrategy aggstrategy; /* basic strategy */ + AggSplit aggsplit; /* agg-splitting mode, see nodes.h */ List *rollups; /* list of RollupData */ List *qual; /* quals (HAVING quals), if any */ } GroupingSetsPath; diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index e70d6a3..9d912fd 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -217,6 +217,7 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root, Path *subpath, List *having_qual, AggStrategy aggstrategy, + AggSplit aggsplit, List *rollups, const AggClauseCosts *agg_costs, double numGroups); diff --git a/src/test/regress/expected/parallelgroupingsets.out b/src/test/regress/expected/parallelgroupingsets.out new file mode 100644 index 0000000..97b181e --- /dev/null +++ b/src/test/regress/expected/parallelgroupingsets.out @@ -0,0 +1,172 @@ +-- +-- grouping sets +-- +-- test data sources +create table gstest(c1 int, c2 int, c3 int) with (parallel_workers = 4); +insert into gstest select 1,10,100 from generate_series(1,10)i; +insert into gstest select 1,10,200 from generate_series(1,10)i; +insert into gstest select 1,20,30 from generate_series(1,10)i; +insert into gstest select 2,30,40 from generate_series(1,10)i; +insert into gstest select 2,40,50 from generate_series(1,10)i; +insert into gstest select 3,50,60 from generate_series(1,10)i; +insert into gstest select 1,NULL,0 from generate_series(1,10)i; +analyze gstest; +SET parallel_tuple_cost=0; +SET parallel_setup_cost=0; +SET max_parallel_workers_per_gather=4; +-- test for hashagg +set enable_hashagg to on; +explain (costs off, verbose) +select c1, c2, avg(c3) from gstest group by grouping sets((c1,c2),(c1)); + QUERY PLAN +------------------------------------------------------ + Finalize HashAggregate + Output: c1, c2, avg(c3) + Hash Key: gstest.c1, gstest.c2 + Hash Key: gstest.c1 + -> Gather + Output: c1, c2, (PARTIAL avg(c3)) + Workers Planned: 4 + -> Partial HashAggregate + Output: c1, c2, PARTIAL avg(c3) + Group Key: gstest.c1, gstest.c2 + -> Parallel Seq Scan on public.gstest + Output: c1, c2, c3 +(12 rows) + +select c1, c2, avg(c3) from gstest group by grouping sets((c1,c2),(c1)) order by 1,2,3; + c1 | c2 | avg +----+----+------------------------ + 1 | 10 | 150.0000000000000000 + 1 | 20 | 30.0000000000000000 + 1 | | 0.00000000000000000000 + 1 | | 82.5000000000000000 + 2 | 30 | 40.0000000000000000 + 2 | 40 | 50.0000000000000000 + 2 | | 45.0000000000000000 + 3 | 50 | 60.0000000000000000 + 3 | | 60.0000000000000000 +(9 rows) + +explain (costs off, verbose) +select c1, c2, c3, avg(c3) from gstest group by grouping sets((c1,c2),(c1), (c2,c3)); + QUERY PLAN +---------------------------------------------------------- + Finalize HashAggregate + Output: c1, c2, c3, avg(c3) + Hash Key: gstest.c1, gstest.c2 + Hash Key: gstest.c1 + Hash Key: gstest.c2, gstest.c3 + -> Gather + Output: c1, c2, c3, (PARTIAL avg(c3)) + Workers Planned: 4 + -> Partial HashAggregate + Output: c1, c2, c3, PARTIAL avg(c3) + Group Key: gstest.c1, gstest.c2, gstest.c3 + -> Parallel Seq Scan on public.gstest + Output: c1, c2, c3 +(13 rows) + +select c1, c2, c3, avg(c3) from gstest group by grouping sets((c1,c2),(c1), (c2,c3)) order by 1,2,3,4; + c1 | c2 | c3 | avg +----+----+-----+------------------------ + 1 | 10 | | 150.0000000000000000 + 1 | 20 | | 30.0000000000000000 + 1 | | | 0.00000000000000000000 + 1 | | | 82.5000000000000000 + 2 | 30 | | 40.0000000000000000 + 2 | 40 | | 50.0000000000000000 + 2 | | | 45.0000000000000000 + 3 | 50 | | 60.0000000000000000 + 3 | | | 60.0000000000000000 + | 10 | 100 | 100.0000000000000000 + | 10 | 200 | 200.0000000000000000 + | 20 | 30 | 30.0000000000000000 + | 30 | 40 | 40.0000000000000000 + | 40 | 50 | 50.0000000000000000 + | 50 | 60 | 60.0000000000000000 + | | 0 | 0.00000000000000000000 +(16 rows) + +-- test for groupagg +set enable_hashagg to off; +explain (costs off, verbose) +select c1, c2, avg(c3) from gstest group by grouping sets((c1,c2),(c1)); + QUERY PLAN +------------------------------------------------------------ + Finalize GroupAggregate + Output: c1, c2, avg(c3) + Group Key: gstest.c1, gstest.c2 + Group Key: gstest.c1 + -> Gather Merge + Output: c1, c2, (PARTIAL avg(c3)) + Workers Planned: 4 + -> Partial GroupAggregate + Output: c1, c2, PARTIAL avg(c3) + Group Key: gstest.c1, gstest.c2 + -> Sort + Output: c1, c2, c3 + Sort Key: gstest.c1, gstest.c2 + -> Parallel Seq Scan on public.gstest + Output: c1, c2, c3 +(15 rows) + +select c1, c2, avg(c3) from gstest group by grouping sets((c1,c2),(c1)) order by 1,2,3; + c1 | c2 | avg +----+----+------------------------ + 1 | 10 | 150.0000000000000000 + 1 | 20 | 30.0000000000000000 + 1 | | 0.00000000000000000000 + 1 | | 82.5000000000000000 + 2 | 30 | 40.0000000000000000 + 2 | 40 | 50.0000000000000000 + 2 | | 45.0000000000000000 + 3 | 50 | 60.0000000000000000 + 3 | | 60.0000000000000000 +(9 rows) + +explain (costs off, verbose) +select c1, c2, c3, avg(c3) from gstest group by grouping sets((c1,c2),(c1), (c2,c3)); + QUERY PLAN +--------------------------------------------------------------- + Finalize GroupAggregate + Output: c1, c2, c3, avg(c3) + Group Key: gstest.c1, gstest.c2 + Group Key: gstest.c1 + Sort Key: gstest.c2, gstest.c3 + Group Key: gstest.c2, gstest.c3 + -> Gather Merge + Output: c1, c2, c3, (PARTIAL avg(c3)) + Workers Planned: 4 + -> Partial GroupAggregate + Output: c1, c2, c3, PARTIAL avg(c3) + Group Key: gstest.c1, gstest.c2, gstest.c3 + -> Sort + Output: c1, c2, c3 + Sort Key: gstest.c1, gstest.c2, gstest.c3 + -> Parallel Seq Scan on public.gstest + Output: c1, c2, c3 +(17 rows) + +select c1, c2, c3, avg(c3) from gstest group by grouping sets((c1,c2),(c1), (c2,c3)) order by 1,2,3,4; + c1 | c2 | c3 | avg +----+----+-----+------------------------ + 1 | 10 | | 150.0000000000000000 + 1 | 20 | | 30.0000000000000000 + 1 | | | 0.00000000000000000000 + 1 | | | 82.5000000000000000 + 2 | 30 | | 40.0000000000000000 + 2 | 40 | | 50.0000000000000000 + 2 | | | 45.0000000000000000 + 3 | 50 | | 60.0000000000000000 + 3 | | | 60.0000000000000000 + | 10 | 100 | 100.0000000000000000 + | 10 | 200 | 200.0000000000000000 + | 20 | 30 | 30.0000000000000000 + | 30 | 40 | 40.0000000000000000 + | 40 | 50 | 50.0000000000000000 + | 50 | 60 | 60.0000000000000000 + | | 0 | 0.00000000000000000000 +(16 rows) + +drop table gstest; diff --git a/src/test/regress/sql/parallelgroupingsets.sql b/src/test/regress/sql/parallelgroupingsets.sql new file mode 100644 index 0000000..5a84938 --- /dev/null +++ b/src/test/regress/sql/parallelgroupingsets.sql @@ -0,0 +1,43 @@ +-- +-- grouping sets +-- + +-- test data sources +create table gstest(c1 int, c2 int, c3 int) with (parallel_workers = 4); + +insert into gstest select 1,10,100 from generate_series(1,10)i; +insert into gstest select 1,10,200 from generate_series(1,10)i; +insert into gstest select 1,20,30 from generate_series(1,10)i; +insert into gstest select 2,30,40 from generate_series(1,10)i; +insert into gstest select 2,40,50 from generate_series(1,10)i; +insert into gstest select 3,50,60 from generate_series(1,10)i; +insert into gstest select 1,NULL,0 from generate_series(1,10)i; +analyze gstest; + +SET parallel_tuple_cost=0; +SET parallel_setup_cost=0; +SET max_parallel_workers_per_gather=4; + +-- test for hashagg +set enable_hashagg to on; +explain (costs off, verbose) +select c1, c2, avg(c3) from gstest group by grouping sets((c1,c2),(c1)); +select c1, c2, avg(c3) from gstest group by grouping sets((c1,c2),(c1)) order by 1,2,3; + +explain (costs off, verbose) +select c1, c2, c3, avg(c3) from gstest group by grouping sets((c1,c2),(c1), (c2,c3)); +select c1, c2, c3, avg(c3) from gstest group by grouping sets((c1,c2),(c1), (c2,c3)) order by 1,2,3,4; + + +-- test for groupagg +set enable_hashagg to off; +explain (costs off, verbose) +select c1, c2, avg(c3) from gstest group by grouping sets((c1,c2),(c1)); +select c1, c2, avg(c3) from gstest group by grouping sets((c1,c2),(c1)) order by 1,2,3; + +explain (costs off, verbose) +select c1, c2, c3, avg(c3) from gstest group by grouping sets((c1,c2),(c1), (c2,c3)); +select c1, c2, c3, avg(c3) from gstest group by grouping sets((c1,c2),(c1), (c2,c3)) order by 1,2,3,4; + + +drop table gstest; -- 2.7.4