From 066f0c0b60a738cb6766542b91c894905b672af2 Mon Sep 17 00:00:00 2001
From: Antonin Houska <ah@cybertec.at>
Date: Mon, 18 May 2020 14:28:31 +0200
Subject: [PATCH 3/3] Use also partial paths as the input for grouped paths.

---
 src/backend/optimizer/path/allpaths.c      |  46 +++++-
 src/backend/optimizer/util/relnode.c       |  46 +++---
 src/test/regress/expected/agg_pushdown.out | 157 +++++++++++++++++++++
 src/test/regress/sql/agg_pushdown.sql      |  66 +++++++++
 4 files changed, 285 insertions(+), 30 deletions(-)

diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 84a918dc58..e0b5418de4 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -128,7 +128,7 @@ static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
 								   RangeTblEntry *rte);
 static void add_grouped_path(PlannerInfo *root, RelOptInfo *rel,
 							 Path *subpath, AggStrategy aggstrategy,
-							 RelAggInfo *agg_info);
+							 RelAggInfo *agg_info, bool partial);
 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
 									  pushdown_safety_info *safetyInfo);
@@ -3021,6 +3021,7 @@ generate_grouping_paths(PlannerInfo *root, RelOptInfo *rel_grouped,
 						RelOptInfo *rel_plain, RelAggInfo *agg_info)
 {
 	ListCell   *lc;
+	Path	   *path;
 
 	if (IS_DUMMY_REL(rel_plain))
 	{
@@ -3030,7 +3031,7 @@ generate_grouping_paths(PlannerInfo *root, RelOptInfo *rel_grouped,
 
 	foreach(lc, rel_plain->pathlist)
 	{
-		Path	   *path = (Path *) lfirst(lc);
+		path = (Path *) lfirst(lc);
 
 		/*
 		 * Since the path originates from the non-grouped relation which is
@@ -3044,7 +3045,8 @@ generate_grouping_paths(PlannerInfo *root, RelOptInfo *rel_grouped,
 		 * add_grouped_path() will check whether the path has suitable
 		 * pathkeys.
 		 */
-		add_grouped_path(root, rel_grouped, path, AGG_SORTED, agg_info);
+		add_grouped_path(root, rel_grouped, path, AGG_SORTED, agg_info,
+						 false);
 
 		/*
 		 * Repeated creation of hash table (for new parameter values) should
@@ -3052,12 +3054,38 @@ generate_grouping_paths(PlannerInfo *root, RelOptInfo *rel_grouped,
 		 * efficiency.
 		 */
 		if (path->param_info == NULL)
-			add_grouped_path(root, rel_grouped, path, AGG_HASHED, agg_info);
+			add_grouped_path(root, rel_grouped, path, AGG_HASHED, agg_info,
+							 false);
 	}
 
 	/* Could not generate any grouped paths? */
 	if (rel_grouped->pathlist == NIL)
+	{
 		mark_dummy_rel(rel_grouped);
+		return;
+	}
+
+	/*
+	 * Almost the same for partial paths.
+	 *
+	 * The difference is that parameterized paths are never created, see
+	 * add_partial_path() for explanation.
+	 */
+	foreach(lc, rel_plain->partial_pathlist)
+	{
+		path = (Path *) lfirst(lc);
+
+		if (path->param_info != NULL)
+			continue;
+
+		path = (Path *) create_projection_path(root, rel_grouped, path,
+											   agg_info->agg_input);
+
+		add_grouped_path(root, rel_grouped, path, AGG_SORTED, agg_info,
+						 true);
+		add_grouped_path(root, rel_grouped, path, AGG_HASHED, agg_info,
+						 true);
+	}
 }
 
 /*
@@ -3065,7 +3093,8 @@ generate_grouping_paths(PlannerInfo *root, RelOptInfo *rel_grouped,
  */
 static void
 add_grouped_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
-				 AggStrategy aggstrategy, RelAggInfo *agg_info)
+				 AggStrategy aggstrategy, RelAggInfo *agg_info,
+				 bool partial)
 {
 	Path	   *agg_path;
 
@@ -3081,7 +3110,12 @@ add_grouped_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 
 	/* Add the grouped path to the list of grouped base paths. */
 	if (agg_path != NULL)
-		add_path(rel, (Path *) agg_path);
+	{
+		if (!partial)
+			add_path(rel, (Path *) agg_path);
+		else
+			add_partial_path(rel, (Path *) agg_path);
+	}
 }
 
 /*
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 7460794b46..4164f4b0e4 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -958,33 +958,12 @@ build_join_rel(PlannerInfo *root,
 		build_joinrel_partition_info(joinrel, outer_rel, inner_rel,
 									 restrictlist, sjinfo->jointype);
 
+	/*
+	 * Set estimates of the joinrel's size.
+	 */
 	if (!grouped)
-	{
-		/*
-		 * Set estimates of the joinrel's size.
-		 */
 		set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
 								   sjinfo, restrictlist);
-
-		/*
-		 * Set the consider_parallel flag if this joinrel could potentially be
-		 * scanned within a parallel worker.  If this flag is false for either
-		 * inner_rel or outer_rel, then it must be false for the joinrel also.
-		 * Even if both are true, there might be parallel-restricted
-		 * expressions in the targetlist or quals.
-		 *
-		 * Note that if there are more than two rels in this relation, they
-		 * could be divided between inner_rel and outer_rel in any arbitrary
-		 * way.  We assume this doesn't matter, because we should hit all the
-		 * same baserels and joinclauses while building up to this joinrel no
-		 * matter which we take; therefore, we should make the same decision
-		 * here however we get here.
-		 */
-		if (inner_rel->consider_parallel && outer_rel->consider_parallel &&
-			is_parallel_safe(root, (Node *) restrictlist) &&
-			is_parallel_safe(root, (Node *) joinrel->reltarget->exprs))
-			joinrel->consider_parallel = true;
-	}
 	else
 	{
 		/*
@@ -1000,6 +979,25 @@ build_join_rel(PlannerInfo *root,
 											agg_info->input_rows, NULL);
 	}
 
+	/*
+	 * Set the consider_parallel flag if this joinrel could potentially be
+	 * scanned within a parallel worker.  If this flag is false for either
+	 * inner_rel or outer_rel, then it must be false for the joinrel also.
+	 * Even if both are true, there might be parallel-restricted expressions
+	 * in the targetlist or quals.
+	 *
+	 * Note that if there are more than two rels in this relation, they could
+	 * be divided between inner_rel and outer_rel in any arbitrary way.  We
+	 * assume this doesn't matter, because we should hit all the same baserels
+	 * and joinclauses while building up to this joinrel no matter which we
+	 * take; therefore, we should make the same decision here however we get
+	 * here.
+	 */
+	if (inner_rel->consider_parallel && outer_rel->consider_parallel &&
+		is_parallel_safe(root, (Node *) restrictlist) &&
+		is_parallel_safe(root, (Node *) joinrel->reltarget->exprs))
+		joinrel->consider_parallel = true;
+
 	/* Add the joinrel to the PlannerInfo. */
 	if (!grouped)
 		add_join_rel(root, joinrel);
diff --git a/src/test/regress/expected/agg_pushdown.out b/src/test/regress/expected/agg_pushdown.out
index 413abc147e..66d36d122e 100644
--- a/src/test/regress/expected/agg_pushdown.out
+++ b/src/test/regress/expected/agg_pushdown.out
@@ -91,6 +91,7 @@ AS c1 ON c1.parent = p.i GROUP BY p.i;
                      ->  Seq Scan on agg_pushdown_child1 c1
 (12 rows)
 
+-- Restore the default values.
 SET enable_nestloop TO on;
 SET enable_hashjoin TO on;
 -- Scan index on agg_pushdown_child1(parent) column and aggregate the result
@@ -213,3 +214,159 @@ c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
          ->  Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
 (13 rows)
 
+-- Most of the tests above with parallel query processing enforced.
+SET min_parallel_index_scan_size = 0;
+SET min_parallel_table_scan_size = 0;
+SET parallel_setup_cost = 0;
+SET parallel_tuple_cost = 0;
+-- Partially aggregate a single relation.
+--
+-- Nestloop join.
+SET enable_nestloop TO on;
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO off;
+EXPLAIN (COSTS off)
+SELECT p.x, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Gather Merge
+         Workers Planned: 1
+         ->  Nested Loop
+               ->  Partial GroupAggregate
+                     Group Key: c1.parent
+                     ->  Parallel Index Scan using agg_pushdown_child1_parent_idx on agg_pushdown_child1 c1
+               ->  Index Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+                     Index Cond: (i = c1.parent)
+(10 rows)
+
+-- Hash join.
+SET enable_nestloop TO off;
+SET enable_hashjoin TO on;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+                                                    QUERY PLAN                                                    
+------------------------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Sort
+         Sort Key: p.i
+         ->  Gather
+               Workers Planned: 1
+               ->  Parallel Hash Join
+                     Hash Cond: (c1.parent = p.i)
+                     ->  Partial GroupAggregate
+                           Group Key: c1.parent
+                           ->  Parallel Index Scan using agg_pushdown_child1_parent_idx on agg_pushdown_child1 c1
+                     ->  Parallel Hash
+                           ->  Parallel Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+(13 rows)
+
+-- Merge join.
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO on;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Gather Merge
+         Workers Planned: 1
+         ->  Merge Join
+               Merge Cond: (c1.parent = p.i)
+               ->  Partial GroupAggregate
+                     Group Key: c1.parent
+                     ->  Parallel Index Scan using agg_pushdown_child1_parent_idx on agg_pushdown_child1 c1
+               ->  Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+(10 rows)
+
+SET enable_nestloop TO on;
+SET enable_hashjoin TO on;
+-- Perform nestloop join between agg_pushdown_child1 and agg_pushdown_child2
+-- and aggregate the result.
+SET enable_nestloop TO on;
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO off;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+                                                    QUERY PLAN                                                    
+------------------------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Gather Merge
+         Workers Planned: 2
+         ->  Sort
+               Sort Key: p.i
+               ->  Nested Loop
+                     ->  Partial HashAggregate
+                           Group Key: c1.parent
+                           ->  Nested Loop
+                                 ->  Parallel Index Scan using agg_pushdown_child1_pkey on agg_pushdown_child1 c1
+                                 ->  Index Scan using agg_pushdown_child2_pkey on agg_pushdown_child2 c2
+                                       Index Cond: ((k = c1.j) AND (parent = c1.parent))
+                     ->  Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+                           Index Cond: (i = c1.parent)
+(15 rows)
+
+-- The same for hash join.
+SET enable_nestloop TO off;
+SET enable_hashjoin TO on;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+                                                       QUERY PLAN                                                       
+------------------------------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Gather Merge
+         Workers Planned: 1
+         ->  Sort
+               Sort Key: p.i
+               ->  Parallel Hash Join
+                     Hash Cond: (c1.parent = p.i)
+                     ->  Partial HashAggregate
+                           Group Key: c1.parent
+                           ->  Parallel Hash Join
+                                 Hash Cond: ((c1.parent = c2.parent) AND (c1.j = c2.k))
+                                 ->  Parallel Index Scan using agg_pushdown_child1_parent_idx on agg_pushdown_child1 c1
+                                 ->  Parallel Hash
+                                       ->  Parallel Index Scan using agg_pushdown_child2_pkey on agg_pushdown_child2 c2
+                     ->  Parallel Hash
+                           ->  Parallel Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+(17 rows)
+
+-- The same for merge join.
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO on;
+SET enable_seqscan TO off;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+                                                    QUERY PLAN                                                    
+------------------------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Gather Merge
+         Workers Planned: 2
+         ->  Merge Join
+               Merge Cond: (c1.parent = p.i)
+               ->  Sort
+                     Sort Key: c1.parent
+                     ->  Partial HashAggregate
+                           Group Key: c1.parent
+                           ->  Merge Join
+                                 Merge Cond: ((c1.j = c2.k) AND (c1.parent = c2.parent))
+                                 ->  Parallel Index Scan using agg_pushdown_child1_pkey on agg_pushdown_child1 c1
+                                 ->  Index Scan using agg_pushdown_child2_pkey on agg_pushdown_child2 c2
+               ->  Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+(15 rows)
+
diff --git a/src/test/regress/sql/agg_pushdown.sql b/src/test/regress/sql/agg_pushdown.sql
index 6bbbc7a8a1..49ba6dd67c 100644
--- a/src/test/regress/sql/agg_pushdown.sql
+++ b/src/test/regress/sql/agg_pushdown.sql
@@ -61,6 +61,7 @@ EXPLAIN (COSTS off)
 SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
 AS c1 ON c1.parent = p.i GROUP BY p.i;
 
+-- Restore the default values.
 SET enable_nestloop TO on;
 SET enable_hashjoin TO on;
 
@@ -112,3 +113,68 @@ EXPLAIN (COSTS off)
 SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
 agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
 c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+
+-- Most of the tests above with parallel query processing enforced.
+SET min_parallel_index_scan_size = 0;
+SET min_parallel_table_scan_size = 0;
+SET parallel_setup_cost = 0;
+SET parallel_tuple_cost = 0;
+
+-- Partially aggregate a single relation.
+--
+-- Nestloop join.
+SET enable_nestloop TO on;
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO off;
+EXPLAIN (COSTS off)
+SELECT p.x, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+
+-- Hash join.
+SET enable_nestloop TO off;
+SET enable_hashjoin TO on;
+
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+
+-- Merge join.
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO on;
+
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+
+SET enable_nestloop TO on;
+SET enable_hashjoin TO on;
+
+-- Perform nestloop join between agg_pushdown_child1 and agg_pushdown_child2
+-- and aggregate the result.
+SET enable_nestloop TO on;
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO off;
+
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+
+-- The same for hash join.
+SET enable_nestloop TO off;
+SET enable_hashjoin TO on;
+
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+
+-- The same for merge join.
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO on;
+SET enable_seqscan TO off;
+
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
-- 
2.20.1

