diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index e284fd71d7..fa71cfb010 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -798,6 +798,45 @@ ExecInitExprRec(Expr *node, ExprState *state,
 				break;
 			}
 
+		case T_GroupedVar:
+
+			/*
+			 * If GroupedVar appears in targetlist of Agg node, it can
+			 * represent either Aggref or grouping expression.
+			 *
+			 * TODO Consider doing this expansion earlier, e.g. in setrefs.c.
+			 */
+			if (state->parent && (IsA(state->parent, AggState)))
+			{
+				GroupedVar *gvar = (GroupedVar *) node;
+
+				if (IsA(gvar->gvexpr, Aggref))
+				{
+					if (gvar->agg_partial)
+						ExecInitExprRec((Expr *) gvar->agg_partial, state,
+										resv, resnull);
+					else
+						ExecInitExprRec((Expr *) gvar->gvexpr, state,
+										resv, resnull);
+				}
+				else
+					ExecInitExprRec((Expr *) gvar->gvexpr, state,
+									resv, resnull);
+				break;
+			}
+			else
+			{
+				/*
+				 * set_plan_refs should have replaced GroupedVar in the
+				 * targetlist with an ordinary Var.
+				 *
+				 * XXX Should we error out here? There's at least one legal
+				 * case here which we'd have to check: a Result plan with no
+				 * outer plan which represents an empty Append plan.
+				 */
+				break;
+			}
+
 		case T_GroupingFunc:
 			{
 				GroupingFunc *grp_node = (GroupingFunc *) node;
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 1c12075b01..c03e7592b3 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -1421,6 +1421,7 @@ _copyAggref(const Aggref *from)
 	COPY_SCALAR_FIELD(aggcollid);
 	COPY_SCALAR_FIELD(inputcollid);
 	COPY_SCALAR_FIELD(aggtranstype);
+	COPY_SCALAR_FIELD(aggcombinefn);
 	COPY_NODE_FIELD(aggargtypes);
 	COPY_NODE_FIELD(aggdirectargs);
 	COPY_NODE_FIELD(args);
@@ -2273,6 +2274,23 @@ _copyPlaceHolderVar(const PlaceHolderVar *from)
 }
 
 /*
+ * _copyGroupedVar
+ */
+static GroupedVar *
+_copyGroupedVar(const GroupedVar *from)
+{
+	GroupedVar *newnode = makeNode(GroupedVar);
+
+	COPY_NODE_FIELD(gvexpr);
+	COPY_NODE_FIELD(agg_partial);
+	COPY_SCALAR_FIELD(sortgroupref);
+	COPY_SCALAR_FIELD(gvid);
+	COPY_SCALAR_FIELD(width);
+
+	return newnode;
+}
+
+/*
  * _copySpecialJoinInfo
  */
 static SpecialJoinInfo *
@@ -2331,6 +2349,21 @@ _copyPlaceHolderInfo(const PlaceHolderInfo *from)
 	return newnode;
 }
 
+static GroupedVarInfo *
+_copyGroupedVarInfo(const GroupedVarInfo *from)
+{
+	GroupedVarInfo *newnode = makeNode(GroupedVarInfo);
+
+	COPY_SCALAR_FIELD(gvid);
+	COPY_NODE_FIELD(gvexpr);
+	COPY_NODE_FIELD(agg_partial);
+	COPY_SCALAR_FIELD(sortgroupref);
+	COPY_SCALAR_FIELD(gv_eval_at);
+	COPY_SCALAR_FIELD(derived);
+
+	return newnode;
+}
+
 /* ****************************************************************
  *					parsenodes.h copy functions
  * ****************************************************************
@@ -5086,6 +5119,9 @@ copyObjectImpl(const void *from)
 		case T_PlaceHolderVar:
 			retval = _copyPlaceHolderVar(from);
 			break;
+		case T_GroupedVar:
+			retval = _copyGroupedVar(from);
+			break;
 		case T_SpecialJoinInfo:
 			retval = _copySpecialJoinInfo(from);
 			break;
@@ -5095,6 +5131,9 @@ copyObjectImpl(const void *from)
 		case T_PlaceHolderInfo:
 			retval = _copyPlaceHolderInfo(from);
 			break;
+		case T_GroupedVarInfo:
+			retval = _copyGroupedVarInfo(from);
+			break;
 
 			/*
 			 * VALUE NODES
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 6a971d0141..8cd4051e74 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -873,6 +873,14 @@ _equalPlaceHolderVar(const PlaceHolderVar *a, const PlaceHolderVar *b)
 }
 
 static bool
+_equalGroupedVar(const GroupedVar *a, const GroupedVar *b)
+{
+	COMPARE_SCALAR_FIELD(gvid);
+
+	return true;
+}
+
+static bool
 _equalSpecialJoinInfo(const SpecialJoinInfo *a, const SpecialJoinInfo *b)
 {
 	COMPARE_BITMAPSET_FIELD(min_lefthand);
@@ -3173,6 +3181,9 @@ equal(const void *a, const void *b)
 		case T_PlaceHolderVar:
 			retval = _equalPlaceHolderVar(a, b);
 			break;
+		case T_GroupedVar:
+			retval = _equalGroupedVar(a, b);
+			break;
 		case T_SpecialJoinInfo:
 			retval = _equalSpecialJoinInfo(a, b);
 			break;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index a10014f755..8ea1f212a8 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -259,6 +259,17 @@ exprType(const Node *expr)
 		case T_PlaceHolderVar:
 			type = exprType((Node *) ((const PlaceHolderVar *) expr)->phexpr);
 			break;
+		case T_GroupedVar:
+			if (IsA(((const GroupedVar *) expr)->gvexpr, Aggref))
+			{
+				if (((const GroupedVar *) expr)->agg_partial)
+					type = exprType((Node *) ((const GroupedVar *) expr)->agg_partial);
+				else
+					type = exprType((Node *) ((const GroupedVar *) expr)->gvexpr);
+			}
+			else
+				type = exprType((Node *) ((const GroupedVar *) expr)->gvexpr);
+			break;
 		default:
 			elog(ERROR, "unrecognized node type: %d", (int) nodeTag(expr));
 			type = InvalidOid;	/* keep compiler quiet */
@@ -492,6 +503,16 @@ exprTypmod(const Node *expr)
 			return ((const SetToDefault *) expr)->typeMod;
 		case T_PlaceHolderVar:
 			return exprTypmod((Node *) ((const PlaceHolderVar *) expr)->phexpr);
+		case T_GroupedVar:
+			if (IsA(((const GroupedVar *) expr)->gvexpr, Aggref))
+			{
+				if (((const GroupedVar *) expr)->agg_partial)
+					return exprTypmod((Node *) ((const GroupedVar *) expr)->agg_partial);
+				else
+					return exprTypmod((Node *) ((const GroupedVar *) expr)->gvexpr);
+			}
+			else
+				return exprTypmod((Node *) ((const GroupedVar *) expr)->gvexpr);
 		default:
 			break;
 	}
@@ -903,6 +924,12 @@ exprCollation(const Node *expr)
 		case T_PlaceHolderVar:
 			coll = exprCollation((Node *) ((const PlaceHolderVar *) expr)->phexpr);
 			break;
+		case T_GroupedVar:
+			if (IsA(((const GroupedVar *) expr)->gvexpr, Aggref))
+				coll = exprCollation((Node *) ((const GroupedVar *) expr)->agg_partial);
+			else
+				coll = exprCollation((Node *) ((const GroupedVar *) expr)->gvexpr);
+			break;
 		default:
 			elog(ERROR, "unrecognized node type: %d", (int) nodeTag(expr));
 			coll = InvalidOid;	/* keep compiler quiet */
@@ -2187,6 +2214,8 @@ expression_tree_walker(Node *node,
 			break;
 		case T_PlaceHolderVar:
 			return walker(((PlaceHolderVar *) node)->phexpr, context);
+		case T_GroupedVar:
+			return walker(((GroupedVar *) node)->gvexpr, context);
 		case T_InferenceElem:
 			return walker(((InferenceElem *) node)->expr, context);
 		case T_AppendRelInfo:
@@ -2993,6 +3022,16 @@ expression_tree_mutator(Node *node,
 				return (Node *) newnode;
 			}
 			break;
+		case T_GroupedVar:
+			{
+				GroupedVar *gv = (GroupedVar *) node;
+				GroupedVar *newnode;
+
+				FLATCOPY(newnode, gv, GroupedVar);
+				MUTATE(newnode->gvexpr, gv->gvexpr, Expr *);
+				MUTATE(newnode->agg_partial, gv->agg_partial, Aggref *);
+				return (Node *) newnode;
+			}
 		case T_InferenceElem:
 			{
 				InferenceElem *inferenceelemdexpr = (InferenceElem *) node;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 979d523e00..7a8e52614d 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1196,6 +1196,7 @@ _outAggref(StringInfo str, const Aggref *node)
 	WRITE_OID_FIELD(aggcollid);
 	WRITE_OID_FIELD(inputcollid);
 	WRITE_OID_FIELD(aggtranstype);
+	WRITE_OID_FIELD(aggcombinefn);
 	WRITE_NODE_FIELD(aggargtypes);
 	WRITE_NODE_FIELD(aggdirectargs);
 	WRITE_NODE_FIELD(args);
@@ -2287,6 +2288,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
 	WRITE_NODE_FIELD(append_rel_list);
 	WRITE_NODE_FIELD(rowMarks);
 	WRITE_NODE_FIELD(placeholder_list);
+	WRITE_NODE_FIELD(grouped_var_list);
 	WRITE_NODE_FIELD(fkey_list);
 	WRITE_NODE_FIELD(query_pathkeys);
 	WRITE_NODE_FIELD(group_pathkeys);
@@ -2294,6 +2296,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
 	WRITE_NODE_FIELD(distinct_pathkeys);
 	WRITE_NODE_FIELD(sort_pathkeys);
 	WRITE_NODE_FIELD(processed_tlist);
+	WRITE_INT_FIELD(max_sortgroupref);
 	WRITE_NODE_FIELD(minmax_aggs);
 	WRITE_FLOAT_FIELD(total_table_pages, "%.0f");
 	WRITE_FLOAT_FIELD(tuple_fraction, "%.4f");
@@ -2334,6 +2337,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
 	WRITE_NODE_FIELD(cheapest_parameterized_paths);
 	WRITE_BITMAPSET_FIELD(direct_lateral_relids);
 	WRITE_BITMAPSET_FIELD(lateral_relids);
+	WRITE_NODE_FIELD(agg_info);
 	WRITE_UINT_FIELD(relid);
 	WRITE_OID_FIELD(reltablespace);
 	WRITE_ENUM_FIELD(rtekind, RTEKind);
@@ -2511,6 +2515,20 @@ _outParamPathInfo(StringInfo str, const ParamPathInfo *node)
 }
 
 static void
+_outRelAggInfo(StringInfo str, const RelAggInfo *node)
+{
+	WRITE_NODE_TYPE("RELAGGINFO");
+
+	WRITE_NODE_FIELD(target_simple);
+	WRITE_NODE_FIELD(target_partial);
+	WRITE_NODE_FIELD(input);
+	WRITE_NODE_FIELD(group_clauses);
+	WRITE_NODE_FIELD(group_exprs);
+	WRITE_NODE_FIELD(agg_exprs_simple);
+	WRITE_NODE_FIELD(agg_exprs_partial);
+}
+
+static void
 _outRestrictInfo(StringInfo str, const RestrictInfo *node)
 {
 	WRITE_NODE_TYPE("RESTRICTINFO");
@@ -2554,6 +2572,18 @@ _outPlaceHolderVar(StringInfo str, const PlaceHolderVar *node)
 }
 
 static void
+_outGroupedVar(StringInfo str, const GroupedVar *node)
+{
+	WRITE_NODE_TYPE("GROUPEDVAR");
+
+	WRITE_NODE_FIELD(gvexpr);
+	WRITE_NODE_FIELD(agg_partial);
+	WRITE_UINT_FIELD(sortgroupref);
+	WRITE_UINT_FIELD(gvid);
+	WRITE_INT_FIELD(width);
+}
+
+static void
 _outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node)
 {
 	WRITE_NODE_TYPE("SPECIALJOININFO");
@@ -2598,6 +2628,19 @@ _outPlaceHolderInfo(StringInfo str, const PlaceHolderInfo *node)
 }
 
 static void
+_outGroupedVarInfo(StringInfo str, const GroupedVarInfo *node)
+{
+	WRITE_NODE_TYPE("GROUPEDVARINFO");
+
+	WRITE_UINT_FIELD(gvid);
+	WRITE_NODE_FIELD(gvexpr);
+	WRITE_NODE_FIELD(agg_partial);
+	WRITE_UINT_FIELD(sortgroupref);
+	WRITE_BITMAPSET_FIELD(gv_eval_at);
+	WRITE_BOOL_FIELD(derived);
+}
+
+static void
 _outMinMaxAggInfo(StringInfo str, const MinMaxAggInfo *node)
 {
 	WRITE_NODE_TYPE("MINMAXAGGINFO");
@@ -4121,12 +4164,18 @@ outNode(StringInfo str, const void *obj)
 			case T_ParamPathInfo:
 				_outParamPathInfo(str, obj);
 				break;
+			case T_RelAggInfo:
+				_outRelAggInfo(str, obj);
+				break;
 			case T_RestrictInfo:
 				_outRestrictInfo(str, obj);
 				break;
 			case T_PlaceHolderVar:
 				_outPlaceHolderVar(str, obj);
 				break;
+			case T_GroupedVar:
+				_outGroupedVar(str, obj);
+				break;
 			case T_SpecialJoinInfo:
 				_outSpecialJoinInfo(str, obj);
 				break;
@@ -4136,6 +4185,9 @@ outNode(StringInfo str, const void *obj)
 			case T_PlaceHolderInfo:
 				_outPlaceHolderInfo(str, obj);
 				break;
+			case T_GroupedVarInfo:
+				_outGroupedVarInfo(str, obj);
+				break;
 			case T_MinMaxAggInfo:
 				_outMinMaxAggInfo(str, obj);
 				break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 42aff7f57a..9a54f768c3 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -534,6 +534,23 @@ _readVar(void)
 }
 
 /*
+ * _readGroupedVar
+ */
+static GroupedVar *
+_readGroupedVar(void)
+{
+	READ_LOCALS(GroupedVar);
+
+	READ_NODE_FIELD(gvexpr);
+	READ_NODE_FIELD(agg_partial);
+	READ_UINT_FIELD(sortgroupref);
+	READ_UINT_FIELD(gvid);
+	READ_INT_FIELD(width);
+
+	READ_DONE();
+}
+
+/*
  * _readConst
  */
 static Const *
@@ -589,6 +606,7 @@ _readAggref(void)
 	READ_OID_FIELD(aggcollid);
 	READ_OID_FIELD(inputcollid);
 	READ_OID_FIELD(aggtranstype);
+	READ_OID_FIELD(aggcombinefn);
 	READ_NODE_FIELD(aggargtypes);
 	READ_NODE_FIELD(aggdirectargs);
 	READ_NODE_FIELD(args);
@@ -2535,6 +2553,8 @@ parseNodeString(void)
 		return_value = _readTableFunc();
 	else if (MATCH("VAR", 3))
 		return_value = _readVar();
+	else if (MATCH("GROUPEDVAR", 10))
+		return_value = _readGroupedVar();
 	else if (MATCH("CONST", 5))
 		return_value = _readConst();
 	else if (MATCH("PARAM", 5))
diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
index 3ef7d7d8aa..30b5b6ad63 100644
--- a/src/backend/optimizer/geqo/geqo_eval.c
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -266,7 +266,8 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene,
 			if (joinrel)
 			{
 				/* Create paths for partitionwise joins. */
-				generate_partitionwise_join_paths(root, joinrel);
+				generate_partitionwise_join_paths(root, joinrel,
+												  REL_AGG_KIND_NONE);
 
 				/*
 				 * Except for the topmost scan/join rel, consider gathering
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 3ada379f8b..70a97689b5 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -58,6 +58,7 @@ typedef struct pushdown_safety_info
 
 /* These parameters are set by GUC */
 bool		enable_geqo = false;	/* just in case GUC doesn't set it */
+bool		enable_agg_pushdown;
 int			geqo_threshold;
 int			min_parallel_table_scan_size;
 int			min_parallel_index_scan_size;
@@ -73,16 +74,18 @@ static void set_base_rel_consider_startup(PlannerInfo *root);
 static void set_base_rel_sizes(PlannerInfo *root);
 static void set_base_rel_pathlists(PlannerInfo *root);
 static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
-			 Index rti, RangeTblEntry *rte);
+			 Index rti, RangeTblEntry *rte, RelAggKind agg_kind);
 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
-				 Index rti, RangeTblEntry *rte);
+				 Index rti, RangeTblEntry *rte,
+				 RelAggKind agg_kind);
 static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
 				   RangeTblEntry *rte);
-static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel);
+static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel,
+						   RelAggKind agg_kind);
 static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
 						  RangeTblEntry *rte);
 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
-					   RangeTblEntry *rte);
+					   RangeTblEntry *rte, RelAggKind agg_kind);
 static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
 						 RangeTblEntry *rte);
 static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -92,16 +95,19 @@ static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
 static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
 					 RangeTblEntry *rte);
 static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
-					Index rti, RangeTblEntry *rte);
+					Index rti, RangeTblEntry *rte,
+					RelAggKind agg_kind);
 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
-						Index rti, RangeTblEntry *rte);
+						Index rti, RangeTblEntry *rte,
+						RelAggKind agg_kind);
 static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
 						   List *live_childrels,
 						   List *all_child_pathkeys,
 						   List *partitioned_rels);
 static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
 									  RelOptInfo *rel,
-									  Relids required_outer);
+									  Relids required_outer,
+									  RelAggKind agg_kind);
 static void accumulate_append_subpath(Path *path,
 						  List **subpaths, List **special_subpaths);
 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -118,7 +124,8 @@ static void set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel,
 							 RangeTblEntry *rte);
 static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
 					   RangeTblEntry *rte);
-static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
+static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root,
+					   List *joinlist);
 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
 						  pushdown_safety_info *safetyInfo);
 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
@@ -140,7 +147,8 @@ static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
 /*
  * make_one_rel
  *	  Finds all possible access paths for executing a query, returning a
- *	  single rel that represents the join of all base rels in the query.
+ *	  single rel that represents the join of all base rels in the query. If
+ *	  possible, also return a join that contains partial aggregate(s).
  */
 RelOptInfo *
 make_one_rel(PlannerInfo *root, List *joinlist)
@@ -169,12 +177,16 @@ make_one_rel(PlannerInfo *root, List *joinlist)
 		root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
 	}
 
-	/* Mark base rels as to whether we care about fast-start plans */
+	/*
+	 * Mark base rels as to whether we care about fast-start plans. XXX We
+	 * deliberately do not mark grouped rels --- see the comment on
+	 * consider_startup in build_simple_rel().
+	 */
 	set_base_rel_consider_startup(root);
 
 	/*
-	 * Compute size estimates and consider_parallel flags for each base rel,
-	 * then generate access paths.
+	 * Compute size estimates and consider_parallel flags for each plain and
+	 * each grouped base rel, then generate access paths.
 	 */
 	set_base_rel_sizes(root);
 	set_base_rel_pathlists(root);
@@ -231,6 +243,21 @@ set_base_rel_consider_startup(PlannerInfo *root)
 			RelOptInfo *rel = find_base_rel(root, varno);
 
 			rel->consider_param_startup = true;
+
+			if (rel->grouped)
+			{
+				/*
+				 * As for grouped relations, paths differ substantially by the
+				 * AggStrategy. Paths that use AGG_HASHED should not be
+				 * parameterized (because creation of hashtable would have to
+				 * be repeated for different parameters) but paths using
+				 * AGG_SORTED can be. The latter seems to justify considering
+				 * the startup cost for grouped relation in general.
+				 */
+				rel->grouped->needs_final_agg->consider_param_startup = true;
+				if (rel->grouped->no_final_agg)
+					rel->grouped->no_final_agg->consider_param_startup = true;
+			}
 		}
 	}
 }
@@ -253,6 +280,7 @@ set_base_rel_sizes(PlannerInfo *root)
 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
 	{
 		RelOptInfo *rel = root->simple_rel_array[rti];
+		RelOptGrouped *rels_grouped;
 		RangeTblEntry *rte;
 
 		/* there may be empty slots corresponding to non-baserel RTEs */
@@ -266,6 +294,7 @@ set_base_rel_sizes(PlannerInfo *root)
 			continue;
 
 		rte = root->simple_rte_array[rti];
+		rels_grouped = rel->grouped;
 
 		/*
 		 * If parallelism is allowable for this query in general, see whether
@@ -276,9 +305,31 @@ set_base_rel_sizes(PlannerInfo *root)
 		 * goes ahead and makes paths immediately.
 		 */
 		if (root->glob->parallelModeOK)
+		{
 			set_rel_consider_parallel(root, rel, rte);
 
-		set_rel_size(root, rel, rti, rte);
+			/*
+			 * The grouped rel should not need this field (the owning plain
+			 * relation controls whether the aggregation takes place in a
+			 * parallel worker) but let's set it for consistency.
+			 *
+			 * TODO Either do the same for no_final_agg or remove this setting
+			 * altogether.
+			 */
+			if (rels_grouped)
+				rels_grouped->needs_final_agg->consider_parallel =
+					rel->consider_parallel;
+		}
+
+		set_rel_size(root, rel, rti, rte, REL_AGG_KIND_NONE);
+		if (rels_grouped)
+		{
+			set_rel_size(root, rels_grouped->needs_final_agg, rti, rte,
+						 REL_AGG_KIND_PARTIAL);
+			if (rels_grouped->no_final_agg)
+				set_rel_size(root, rels_grouped->no_final_agg, rti, rte,
+							 REL_AGG_KIND_SIMPLE);
+		}
 	}
 }
 
@@ -297,7 +348,9 @@ set_base_rel_pathlists(PlannerInfo *root)
 	{
 		RelOptInfo *rel = root->simple_rel_array[rti];
 
-		/* there may be empty slots corresponding to non-baserel RTEs */
+		/*
+		 * there may be empty slots corresponding to non-baserel RTEs
+		 */
 		if (rel == NULL)
 			continue;
 
@@ -307,7 +360,31 @@ set_base_rel_pathlists(PlannerInfo *root)
 		if (rel->reloptkind != RELOPT_BASEREL)
 			continue;
 
-		set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
+		set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti],
+						 REL_AGG_KIND_NONE);
+
+		/*
+		 * Create grouped paths for grouped relation if it exists.
+		 */
+		if (rel->grouped)
+		{
+			Assert(rel->grouped->needs_final_agg->agg_info != NULL);
+			Assert(rel->grouped->needs_final_agg->grouped == NULL);
+
+			set_rel_pathlist(root, rel, rti,
+							 root->simple_rte_array[rti],
+							 REL_AGG_KIND_PARTIAL);
+
+			if (rel->grouped->no_final_agg)
+			{
+				Assert(rel->grouped->no_final_agg->agg_info != NULL);
+				Assert(rel->grouped->no_final_agg->grouped == NULL);
+
+				set_rel_pathlist(root, rel, rti,
+								 root->simple_rte_array[rti],
+								 REL_AGG_KIND_SIMPLE);
+			}
+		}
 	}
 }
 
@@ -317,8 +394,16 @@ set_base_rel_pathlists(PlannerInfo *root)
  */
 static void
 set_rel_size(PlannerInfo *root, RelOptInfo *rel,
-			 Index rti, RangeTblEntry *rte)
+			 Index rti, RangeTblEntry *rte, RelAggKind agg_kind)
 {
+	bool		grouped = rel->agg_info != NULL;
+
+	/*
+	 * build_simple_rel() should not have created rels that do not match this
+	 * condition.
+	 */
+	Assert(!grouped || rte->rtekind == RTE_RELATION);
+
 	if (rel->reloptkind == RELOPT_BASEREL &&
 		relation_excluded_by_constraints(root, rel, rte))
 	{
@@ -338,7 +423,7 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
 	else if (rte->inh)
 	{
 		/* It's an "append relation", process accordingly */
-		set_append_rel_size(root, rel, rti, rte);
+		set_append_rel_size(root, rel, rti, rte, agg_kind);
 	}
 	else
 	{
@@ -348,6 +433,8 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
 				if (rte->relkind == RELKIND_FOREIGN_TABLE)
 				{
 					/* Foreign table */
+					/* Not supported yet, see build_simple_rel(). */
+					Assert(!grouped);
 					set_foreign_size(root, rel, rte);
 				}
 				else if (rte->relkind == RELKIND_PARTITIONED_TABLE)
@@ -361,6 +448,8 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
 				else if (rte->tablesample != NULL)
 				{
 					/* Sampled relation */
+					/* Not supported yet, see build_simple_rel(). */
+					Assert(!grouped);
 					set_tablesample_rel_size(root, rel, rte);
 				}
 				else
@@ -420,8 +509,16 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
  */
 static void
 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
-				 Index rti, RangeTblEntry *rte)
+				 Index rti, RangeTblEntry *rte, RelAggKind agg_kind)
 {
+	bool		grouped = rel->agg_info != NULL;
+
+	/*
+	 * add_grouped_base_rels_to_query() should not have created rels that do
+	 * not match this condition.
+	 */
+	Assert(!grouped || rte->rtekind == RTE_RELATION);
+
 	if (IS_DUMMY_REL(rel))
 	{
 		/* We already proved the relation empty, so nothing more to do */
@@ -429,7 +526,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 	else if (rte->inh)
 	{
 		/* It's an "append relation", process accordingly */
-		set_append_rel_pathlist(root, rel, rti, rte);
+		set_append_rel_pathlist(root, rel, rti, rte, agg_kind);
 	}
 	else
 	{
@@ -439,17 +536,21 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 				if (rte->relkind == RELKIND_FOREIGN_TABLE)
 				{
 					/* Foreign table */
+					/* Not supported yet, see build_simple_rel(). */
+					Assert(!grouped);
 					set_foreign_pathlist(root, rel, rte);
 				}
 				else if (rte->tablesample != NULL)
 				{
 					/* Sampled relation */
+					/* Not supported yet, see build_simple_rel(). */
+					Assert(!grouped);
 					set_tablesample_rel_pathlist(root, rel, rte);
 				}
 				else
 				{
 					/* Plain relation */
-					set_plain_rel_pathlist(root, rel, rte);
+					set_plain_rel_pathlist(root, rel, rte, agg_kind);
 				}
 				break;
 			case RTE_SUBQUERY:
@@ -479,6 +580,11 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 		}
 	}
 
+	if (agg_kind == REL_AGG_KIND_PARTIAL)
+		rel = rel->grouped->needs_final_agg;
+	else if (agg_kind == REL_AGG_KIND_SIMPLE)
+		rel = rel->grouped->no_final_agg;
+
 	/*
 	 * If this is a baserel, we should normally consider gathering any partial
 	 * paths we may have created for it.
@@ -692,9 +798,17 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
  *	  Build access paths for a plain relation (no subquery, no inheritance)
  */
 static void
-set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte,
+					   RelAggKind agg_kind)
 {
 	Relids		required_outer;
+	Path	   *seq_path;
+	RelOptInfo *rel_plain = rel;
+
+	if (agg_kind == REL_AGG_KIND_PARTIAL)
+		rel = rel->grouped->needs_final_agg;
+	else if (agg_kind == REL_AGG_KIND_SIMPLE)
+		rel = rel->grouped->no_final_agg;
 
 	/*
 	 * We don't support pushing join clauses into the quals of a seqscan, but
@@ -703,18 +817,37 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	 */
 	required_outer = rel->lateral_relids;
 
-	/* Consider sequential scan */
-	add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
+	/* Consider sequential scan, both plain and grouped. */
+	seq_path = create_seqscan_path(root, rel, required_outer, 0);
 
-	/* If appropriate, consider parallel sequential scan */
+	/*
+	 * It's probably not good idea to repeat hashed aggregation with different
+	 * parameters, so check if there are no parameters.
+	 */
+	if (agg_kind == REL_AGG_KIND_NONE)
+		add_path(rel, seq_path);
+	else if (required_outer == NULL)
+	{
+		/*
+		 * Only AGG_HASHED is suitable here as it does not expect the input
+		 * set to be sorted.
+		 */
+		create_grouped_path(root, rel, seq_path, false, false, AGG_HASHED,
+							agg_kind);
+	}
+
+	/* If appropriate, consider parallel sequential scan (plain or grouped) */
 	if (rel->consider_parallel && required_outer == NULL)
-		create_plain_partial_paths(root, rel);
+		create_plain_partial_paths(root, rel_plain, agg_kind);
 
-	/* Consider index scans */
-	create_index_paths(root, rel);
+	/*
+	 * Consider index scans.
+	 */
+	create_index_paths(root, rel, agg_kind);
 
 	/* Consider TID scans */
-	create_tidscan_paths(root, rel);
+	/* TODO Regression test for these paths. */
+	create_tidscan_paths(root, rel, agg_kind);
 }
 
 /*
@@ -722,19 +855,143 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  *	  Build partial access paths for parallel scan of a plain relation
  */
 static void
-create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
+create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel,
+						   RelAggKind agg_kind)
 {
 	int			parallel_workers;
+	Path	   *path;
 
-	parallel_workers = compute_parallel_worker(rel, rel->pages, -1,
-											   max_parallel_workers_per_gather);
+	if (agg_kind == REL_AGG_KIND_PARTIAL)
+		rel = rel->grouped->needs_final_agg;
+	else if (agg_kind == REL_AGG_KIND_SIMPLE)
+		rel = rel->grouped->no_final_agg;
+
+	/*
+	 * See the no_final_agg field of RelOptGrouped for explanation.
+	 */
+	if (agg_kind == REL_AGG_KIND_SIMPLE)
+		return;
+
+	parallel_workers = compute_parallel_worker(rel, rel->pages, -1, max_parallel_workers_per_gather);
 
 	/* If any limit was set to zero, the user doesn't want a parallel scan. */
 	if (parallel_workers <= 0)
 		return;
 
 	/* Add an unordered partial path based on a parallel sequential scan. */
-	add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_workers));
+	path = create_seqscan_path(root, rel, NULL, parallel_workers);
+
+	if (agg_kind == REL_AGG_KIND_NONE)
+		add_partial_path(rel, path);
+	else
+	{
+		/*
+		 * Do partial aggregation at base relation level if the relation is
+		 * eligible for it. Only AGG_HASHED is suitable here as it does not
+		 * expect the input set to be sorted.
+		 */
+		create_grouped_path(root, rel, path, false, true, AGG_HASHED,
+							agg_kind);
+	}
+}
+
+/*
+ * Apply aggregation to a subpath and add the AggPath to the pathlist.
+ *
+ * "precheck" tells whether the aggregation path should first be checked using
+ * add_path_precheck() / add_partial_path_precheck().
+ *
+ * If "partial" is true, the aggregation path is considered partial in terms
+ * of parallel execution.
+ *
+ * "agg_kind" tells whether the aggregation should be partial (in terms of
+ * 2-stage aggregation) or simple (i.e. 1-stage aggregation).
+ *
+ * Caution: Since only grouped relation makes sense as an input for this
+ * function, "rel" is the grouped relation even though "agg_kind" is passed
+ * too. This is different from other functions that receive "agg_kind" and use
+ * it to fetch the grouped relation themselves.
+ *
+ * The return value tells whether the path was added to the pathlist.
+ */
+bool
+create_grouped_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
+					bool precheck, bool partial, AggStrategy aggstrategy,
+					RelAggKind agg_kind)
+{
+	Path	   *agg_path;
+	RelAggInfo *agg_info = rel->agg_info;
+
+	Assert(agg_kind != REL_AGG_KIND_NONE);
+	Assert(agg_info != NULL);
+
+	/*
+	 * REL_AGG_KIND_SIMPLE causes finalization of aggregates. We can only
+	 * support parallel paths if each worker produced a distinct set of
+	 * grouping keys, but such a special case is not known. So this list
+	 * should be empty.
+	 */
+	if (agg_kind == REL_AGG_KIND_SIMPLE && partial)
+		return false;
+
+	/*
+	 * If the AggPath should be partial, the subpath must be too, and
+	 * therefore the subpath is essentially parallel_safe.
+	 */
+	Assert(subpath->parallel_safe || !partial);
+
+	/*
+	 * Repeated creation of hash table does not sound like a good idea. Caller
+	 * should avoid asking us to do so.
+	 */
+	Assert(subpath->param_info == NULL || aggstrategy != AGG_HASHED);
+
+	/*
+	 * Note that "partial" in the following function names refers to 2-stage
+	 * aggregation, not to parallel processing.
+	 */
+	if (aggstrategy == AGG_HASHED)
+		agg_path = (Path *) create_agg_hashed_path(root, subpath,
+												   subpath->rows,
+												   agg_kind);
+	else if (aggstrategy == AGG_SORTED)
+		agg_path = (Path *) create_agg_sorted_path(root, subpath,
+												   true,
+												   subpath->rows,
+												   agg_kind);
+	else
+		elog(ERROR, "unexpected strategy %d", aggstrategy);
+
+	/* Add the grouped path to the list of grouped base paths. */
+	if (agg_path != NULL)
+	{
+		if (precheck)
+		{
+			List	   *pathkeys;
+
+			/* AGG_HASH is not supposed to generate sorted output. */
+			pathkeys = aggstrategy == AGG_SORTED ? subpath->pathkeys : NIL;
+
+			if (!partial &&
+				!add_path_precheck(rel, agg_path->startup_cost,
+								   agg_path->total_cost, pathkeys, NULL))
+				return false;
+
+			if (partial &&
+				!add_partial_path_precheck(rel, agg_path->total_cost,
+										   pathkeys))
+				return false;
+		}
+
+		if (!partial)
+			add_path(rel, (Path *) agg_path);
+		else
+			add_partial_path(rel, (Path *) agg_path);
+
+		return true;
+	}
+
+	return false;
 }
 
 /*
@@ -866,7 +1123,7 @@ set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  */
 static void
 set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
-					Index rti, RangeTblEntry *rte)
+					Index rti, RangeTblEntry *rte, RelAggKind agg_kind)
 {
 	int			parentRTindex = rti;
 	bool		has_live_children;
@@ -877,6 +1134,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
 	ListCell   *l;
 	Relids		live_children = NULL;
 	bool		did_pruning = false;
+	bool		grouped = rel->agg_info != NULL;
 
 	/* Guard against stack overflow due to overly deep inheritance tree. */
 	check_stack_depth();
@@ -1016,10 +1274,46 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
 		 * PlaceHolderVars.)  XXX we do not bother to update the cost or width
 		 * fields of childrel->reltarget; not clear if that would be useful.
 		 */
-		childrel->reltarget->exprs = (List *)
-			adjust_appendrel_attrs(root,
-								   (Node *) rel->reltarget->exprs,
-								   1, &appinfo);
+		if (grouped)
+		{
+			RelOptInfo *childrel_grouped;
+
+			Assert(childrel->grouped != NULL);
+
+			if (agg_kind == REL_AGG_KIND_PARTIAL)
+				childrel_grouped = childrel->grouped->needs_final_agg;
+			else if (agg_kind == REL_AGG_KIND_SIMPLE)
+				childrel_grouped = childrel->grouped->no_final_agg;
+			else
+				Assert(false);
+
+			/*
+			 * Special attention is needed in the grouped case.
+			 *
+			 * copy_simple_rel() didn't create empty target because it's
+			 * better to start with copying one from the parent rel.
+			 */
+			Assert(childrel_grouped->reltarget == NULL &&
+				   childrel_grouped->agg_info == NULL);
+			Assert(rel->reltarget != NULL && rel->agg_info != NULL);
+
+			/*
+			 * Translate the targets and grouping expressions so they match
+			 * this child.
+			 */
+			childrel_grouped->agg_info = translate_rel_agg_info(root, rel->agg_info,
+																&appinfo, 1);
+
+			/*
+			 * The relation paths will generate input for partial aggregation.
+			 */
+			childrel_grouped->reltarget = childrel_grouped->agg_info->input;
+		}
+		else
+			childrel->reltarget->exprs = (List *)
+				adjust_appendrel_attrs(root,
+									   (Node *) rel->reltarget->exprs,
+									   1, &appinfo);
 
 		/*
 		 * We have to make child entries in the EquivalenceClass data
@@ -1181,19 +1475,42 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
 								   1, &appinfo);
 
 		/*
+		 * We have to make child entries in the EquivalenceClass data
+		 * structures as well.  This is needed either if the parent
+		 * participates in some eclass joins (because we will want to consider
+		 * inner-indexscan joins on the individual children) or if the parent
+		 * has useful pathkeys (because we should try to build MergeAppend
+		 * paths that produce those sort orderings).
+		 */
+		if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
+			add_child_rel_equivalences(root, appinfo, rel, childrel);
+		childrel->has_eclass_joins = rel->has_eclass_joins;
+
+		/*
+		 * Note: we could compute appropriate attr_needed data for the child's
+		 * variables, by transforming the parent's attr_needed through the
+		 * translated_vars mapping.  However, currently there's no need
+		 * because attr_needed is only examined for base relations not
+		 * otherrels.  So we just leave the child's attr_needed empty.
+		 */
+
+		/*
 		 * If parallelism is allowable for this query in general, see whether
 		 * it's allowable for this childrel in particular.  But if we've
 		 * already decided the appendrel is not parallel-safe as a whole,
 		 * there's no point in considering parallelism for this child.  For
 		 * consistency, do this before calling set_rel_size() for the child.
+		 *
+		 * The aggregated relations do not use the consider_parallel flag.
 		 */
-		if (root->glob->parallelModeOK && rel->consider_parallel)
+		if (root->glob->parallelModeOK && rel->consider_parallel &&
+			agg_kind == REL_AGG_KIND_NONE)
 			set_rel_consider_parallel(root, childrel, childRTE);
 
 		/*
 		 * Compute the child's size.
 		 */
-		set_rel_size(root, childrel, childRTindex, childRTE);
+		set_rel_size(root, childrel, childRTindex, childRTE, agg_kind);
 
 		/*
 		 * It is possible that constraint exclusion detected a contradiction
@@ -1299,13 +1616,20 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
  */
 static void
 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
-						Index rti, RangeTblEntry *rte)
+						Index rti, RangeTblEntry *rte, RelAggKind agg_kind)
 {
 	int			parentRTindex = rti;
 	List	   *live_childrels = NIL;
 	ListCell   *l;
 
 	/*
+	 * TODO Only allow per-child AGGSPLIT_SIMPLE if the partitioning allows
+	 * it, i.e. each partition generates distinct set of grouping keys.
+	 */
+	if (agg_kind == REL_AGG_KIND_SIMPLE)
+		return;
+
+	/*
 	 * Generate access paths for each member relation, and remember the
 	 * non-dummy children.
 	 */
@@ -1323,7 +1647,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 		/* Re-locate the child RTE and RelOptInfo */
 		childRTindex = appinfo->child_relid;
 		childRTE = root->simple_rte_array[childRTindex];
-		childrel = root->simple_rel_array[childRTindex];
+		childrel = find_base_rel(root, childRTindex);
 
 		/*
 		 * If set_append_rel_size() decided the parent appendrel was
@@ -1337,7 +1661,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 		/*
 		 * Compute the child's access paths.
 		 */
-		set_rel_pathlist(root, childrel, childRTindex, childRTE);
+		set_rel_pathlist(root, childrel, childRTindex, childRTE, agg_kind);
 
 		/*
 		 * If child is dummy, ignore it.
@@ -1353,12 +1677,16 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 
 		/*
 		 * Child is live, so add it to the live_childrels list for use below.
+		 *
+		 * If we added the paths to the grouped child rel, add that grouped
+		 * rel to the list instead.
 		 */
+		if (agg_kind == REL_AGG_KIND_PARTIAL)
+			childrel = childrel->grouped->needs_final_agg;
 		live_childrels = lappend(live_childrels, childrel);
 	}
 
-	/* Add paths to the append relation. */
-	add_paths_to_append_rel(root, rel, live_childrels);
+	add_paths_to_append_rel(root, rel, live_childrels, agg_kind);
 }
 
 
@@ -1375,7 +1703,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
  */
 void
 add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
-						List *live_childrels)
+						List *live_childrels, RelAggKind agg_kind)
 {
 	List	   *subpaths = NIL;
 	bool		subpaths_valid = true;
@@ -1390,6 +1718,21 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 	List	   *partitioned_rels = NIL;
 	bool		build_partitioned_rels = false;
 	double		partial_rows = -1;
+	RelOptInfo *rel_target;
+
+	/*
+	 * TODO Only allow per-child AGGSPLIT_SIMPLE if the partitioning allows
+	 * it, i.e. each partition generates distinct set of grouping keys.
+	 */
+	Assert(agg_kind != REL_AGG_KIND_SIMPLE);
+
+	/*
+	 * Determine on which rel add_path() should be called.
+	 */
+	if (agg_kind == REL_AGG_KIND_PARTIAL)
+		rel_target = rel->grouped->needs_final_agg;
+	else
+		rel_target = rel;
 
 	/* If appropriate, consider parallel append */
 	pa_subpaths_valid = enable_parallel_append && rel->consider_parallel;
@@ -1609,9 +1952,10 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 	 * if we have zero or one live subpath due to constraint exclusion.)
 	 */
 	if (subpaths_valid)
-		add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL,
-												  NULL, 0, false,
-												  partitioned_rels, -1));
+		add_path(rel_target, (Path *) create_append_path(root, rel, subpaths, NIL,
+														 NULL, 0, false,
+														 partitioned_rels, -1,
+														 agg_kind));
 
 	/*
 	 * Consider an append of unordered, unparameterized partial paths.  Make
@@ -1654,7 +1998,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 		appendpath = create_append_path(root, rel, NIL, partial_subpaths,
 										NULL, parallel_workers,
 										enable_parallel_append,
-										partitioned_rels, -1);
+										partitioned_rels, -1, agg_kind);
 
 		/*
 		 * Make sure any subsequent partial paths use the same row count
@@ -1703,7 +2047,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 		appendpath = create_append_path(root, rel, pa_nonpartial_subpaths,
 										pa_partial_subpaths,
 										NULL, parallel_workers, true,
-										partitioned_rels, partial_rows);
+										partitioned_rels, partial_rows,
+										agg_kind);
 		add_partial_path(rel, (Path *) appendpath);
 	}
 
@@ -1742,6 +2087,11 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 			RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
 			Path	   *subpath;
 
+			if (agg_kind == REL_AGG_KIND_PARTIAL)
+				childrel = childrel->grouped->needs_final_agg;
+			else if (agg_kind == REL_AGG_KIND_SIMPLE)
+				childrel = childrel->grouped->no_final_agg;
+
 			if (childrel->pathlist == NIL)
 			{
 				/* failed to make a suitable path for this child */
@@ -1751,7 +2101,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 
 			subpath = get_cheapest_parameterized_child_path(root,
 															childrel,
-															required_outer);
+															required_outer,
+															agg_kind);
 			if (subpath == NULL)
 			{
 				/* failed to make a suitable path for this child */
@@ -1762,10 +2113,10 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 		}
 
 		if (subpaths_valid)
-			add_path(rel, (Path *)
+			add_path(rel_target, (Path *)
 					 create_append_path(root, rel, subpaths, NIL,
 										required_outer, 0, false,
-										partitioned_rels, -1));
+										partitioned_rels, -1, agg_kind));
 	}
 }
 
@@ -1799,6 +2150,7 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
 						   List *partitioned_rels)
 {
 	ListCell   *lcp;
+	PathTarget *target = NULL;
 
 	foreach(lcp, all_child_pathkeys)
 	{
@@ -1807,23 +2159,25 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
 		List	   *total_subpaths = NIL;
 		bool		startup_neq_total = false;
 		ListCell   *lcr;
+		Path	   *path;
 
 		/* Select the child paths for this ordering... */
 		foreach(lcr, live_childrels)
 		{
 			RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
+			List	   *pathlist = childrel->pathlist;
 			Path	   *cheapest_startup,
 					   *cheapest_total;
 
 			/* Locate the right paths, if they are available. */
 			cheapest_startup =
-				get_cheapest_path_for_pathkeys(childrel->pathlist,
+				get_cheapest_path_for_pathkeys(pathlist,
 											   pathkeys,
 											   NULL,
 											   STARTUP_COST,
 											   false);
 			cheapest_total =
-				get_cheapest_path_for_pathkeys(childrel->pathlist,
+				get_cheapest_path_for_pathkeys(pathlist,
 											   pathkeys,
 											   NULL,
 											   TOTAL_COST,
@@ -1856,19 +2210,28 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
 		}
 
 		/* ... and build the MergeAppend paths */
-		add_path(rel, (Path *) create_merge_append_path(root,
-														rel,
-														startup_subpaths,
-														pathkeys,
-														NULL,
-														partitioned_rels));
+		path = (Path *) create_merge_append_path(root,
+												 rel,
+												 target,
+												 startup_subpaths,
+												 pathkeys,
+												 NULL,
+												 partitioned_rels);
+
+		add_path(rel, path);
+
 		if (startup_neq_total)
-			add_path(rel, (Path *) create_merge_append_path(root,
-															rel,
-															total_subpaths,
-															pathkeys,
-															NULL,
-															partitioned_rels));
+		{
+			path = (Path *) create_merge_append_path(root,
+													 rel,
+													 target,
+													 total_subpaths,
+													 pathkeys,
+													 NULL,
+													 partitioned_rels);
+			add_path(rel, path);
+		}
+
 	}
 }
 
@@ -1881,7 +2244,8 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
  */
 static Path *
 get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
-									  Relids required_outer)
+									  Relids required_outer,
+									  RelAggKind agg_kind)
 {
 	Path	   *cheapest;
 	ListCell   *lc;
@@ -1928,7 +2292,8 @@ get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
 		/* Reparameterize if needed, then recheck cost */
 		if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
 		{
-			path = reparameterize_path(root, path, required_outer, 1.0);
+			path = reparameterize_path(root, path, required_outer, 1.0,
+									   agg_kind);
 			if (path == NULL)
 				continue;		/* failed to reparameterize this one */
 			Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
@@ -2030,7 +2395,8 @@ set_dummy_rel_pathlist(RelOptInfo *rel)
 	rel->partial_pathlist = NIL;
 
 	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NULL,
-											  0, false, NIL, -1));
+											  0, false, NIL, -1,
+											  REL_AGG_KIND_NONE));
 
 	/*
 	 * We set the cheapest path immediately, to ensure that IS_DUMMY_REL()
@@ -2670,11 +3036,22 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
 		root->initial_rels = initial_rels;
 
 		if (join_search_hook)
-			return (*join_search_hook) (root, levels_needed, initial_rels);
+			return (*join_search_hook) (root, levels_needed,
+										initial_rels);
 		else if (enable_geqo && levels_needed >= geqo_threshold)
+		{
+			/*
+			 * TODO Teach GEQO about grouped relations. Don't forget that
+			 * pathlist can be NIL before set_cheapest() gets called.
+			 *
+			 * This processing makes no difference betweend plain and grouped
+			 * rels, so process them in the same loop.
+			 */
 			return geqo(root, levels_needed, initial_rels);
+		}
 		else
-			return standard_join_search(root, levels_needed, initial_rels);
+			return standard_join_search(root, levels_needed,
+										initial_rels);
 	}
 }
 
@@ -2759,7 +3136,15 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
 			rel = (RelOptInfo *) lfirst(lc);
 
 			/* Create paths for partitionwise joins. */
-			generate_partitionwise_join_paths(root, rel);
+			generate_partitionwise_join_paths(root, rel, REL_AGG_KIND_NONE);
+			if (rel->grouped)
+			{
+				generate_partitionwise_join_paths(root, rel,
+												  REL_AGG_KIND_PARTIAL);
+
+				generate_partitionwise_join_paths(root, rel,
+												  REL_AGG_KIND_SIMPLE);
+			}
 
 			/*
 			 * Except for the topmost scan/join rel, consider gathering
@@ -2771,6 +3156,12 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
 
 			/* Find and save the cheapest paths for this rel */
 			set_cheapest(rel);
+			if (rel->grouped)
+			{
+				set_cheapest(rel->grouped->needs_final_agg);
+				if (rel->grouped->no_final_agg)
+					set_cheapest(rel->grouped->no_final_agg);
+			}
 
 #ifdef OPTIMIZER_DEBUG
 			debug_print_rel(root, rel);
@@ -3409,6 +3800,7 @@ create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
 {
 	int			parallel_workers;
 	double		pages_fetched;
+	Path	   *bmhpath;
 
 	/* Compute heap pages for bitmap heap scan */
 	pages_fetched = compute_bitmap_pages(root, rel, bitmapqual, 1.0,
@@ -3420,8 +3812,21 @@ create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
 	if (parallel_workers <= 0)
 		return;
 
-	add_partial_path(rel, (Path *) create_bitmap_heap_path(root, rel,
-														   bitmapqual, rel->lateral_relids, 1.0, parallel_workers));
+	bmhpath = (Path *) create_bitmap_heap_path(root, rel, bitmapqual,
+											   rel->lateral_relids, 1.0,
+											   parallel_workers);
+
+	if (rel->agg_info == NULL)
+		add_partial_path(rel, bmhpath);
+	else
+	{
+		/*
+		 * Only AGG_HASHED is suitable here as it does not expect the input
+		 * set to be sorted.
+		 */
+		create_grouped_path(root, rel, (Path *) bmhpath, false, true,
+							AGG_HASHED, REL_AGG_KIND_PARTIAL);
+	}
 }
 
 /*
@@ -3528,13 +3933,21 @@ compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages,
  * generated here has a reference.
  */
 void
-generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
+generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel,
+								  RelAggKind agg_kind)
 {
 	List	   *live_children = NIL;
 	int			cnt_parts;
 	int			num_parts;
 	RelOptInfo **part_rels;
 
+	/*
+	 * TODO Only allow per-child AGGSPLIT_SIMPLE if the partitioning allows
+	 * it, i.e. each partition generates distinct set of grouping keys.
+	 */
+	if (agg_kind == REL_AGG_KIND_SIMPLE)
+		return;
+
 	/* Handle only join relations here. */
 	if (!IS_JOIN_REL(rel))
 		return;
@@ -3557,12 +3970,17 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
 		Assert(child_rel != NULL);
 
 		/* Add partitionwise join paths for partitioned child-joins. */
-		generate_partitionwise_join_paths(root, child_rel);
+		generate_partitionwise_join_paths(root, child_rel, agg_kind);
 
 		/* Dummy children will not be scanned, so ignore those. */
 		if (IS_DUMMY_REL(child_rel))
 			continue;
 
+		if (agg_kind == REL_AGG_KIND_PARTIAL)
+			child_rel = child_rel->grouped->needs_final_agg;
+		else if (agg_kind == REL_AGG_KIND_SIMPLE)
+			child_rel = child_rel->grouped->no_final_agg;
+
 		set_cheapest(child_rel);
 
 #ifdef OPTIMIZER_DEBUG
@@ -3575,12 +3993,17 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
 	/* If all child-joins are dummy, parent join is also dummy. */
 	if (!live_children)
 	{
+		if (agg_kind == REL_AGG_KIND_PARTIAL)
+			rel = rel->grouped->needs_final_agg;
+		else if (agg_kind == REL_AGG_KIND_SIMPLE)
+			rel = rel->grouped->no_final_agg;
+
 		mark_dummy_rel(rel);
 		return;
 	}
 
 	/* Build additional paths for this rel from child-join paths. */
-	add_paths_to_append_rel(root, rel, live_children);
+	add_paths_to_append_rel(root, rel, live_children, agg_kind);
 	list_free(live_children);
 }
 
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index a2a7e0c520..f87a2d52ed 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -91,6 +91,7 @@
 #include "optimizer/plancat.h"
 #include "optimizer/planmain.h"
 #include "optimizer/restrictinfo.h"
+#include "optimizer/var.h"
 #include "parser/parsetree.h"
 #include "utils/lsyscache.h"
 #include "utils/selfuncs.h"
@@ -1068,6 +1069,17 @@ cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec)
 		*cost = path->total_cost;
 		*selec = ((BitmapOrPath *) path)->bitmapselectivity;
 	}
+	else if (IsA(path, AggPath))
+	{
+		/*
+		 * If partial aggregation was already applied, use only the input
+		 * path.
+		 *
+		 * TODO Take the aggregation into account, both cost and its effect on
+		 * selectivity (i.e. how it reduces the number of rows).
+		 */
+		cost_bitmap_tree_node(((AggPath *) path)->subpath, cost, selec);
+	}
 	else
 	{
 		elog(ERROR, "unrecognized node type: %d", nodeTag(path));
@@ -2290,6 +2302,41 @@ cost_group(Path *path, PlannerInfo *root,
 	path->total_cost = total_cost;
 }
 
+static void
+estimate_join_rows(PlannerInfo *root, Path *path, RelAggInfo *agg_info)
+{
+	bool		grouped = agg_info != NULL;
+
+	if (path->param_info)
+	{
+		double		nrows;
+
+		path->rows = path->param_info->ppi_rows;
+		if (grouped)
+		{
+			nrows = estimate_num_groups(root, agg_info->group_exprs,
+										path->rows, NULL);
+			path->rows = clamp_row_est(nrows);
+		}
+	}
+	else
+	{
+		if (!grouped)
+			path->rows = path->parent->rows;
+		else
+		{
+			/*
+			 * XXX agg_info->rows is an estimate of the output rows if we join
+			 * the non-grouped rels and aggregate the output. However the
+			 * figure can be different if an already grouped rel is joined to
+			 * non-grouped one. Is this worth adding a new field to the
+			 * agg_info?
+			 */
+			path->rows = agg_info->rows;
+		}
+	}
+}
+
 /*
  * initial_cost_nestloop
  *	  Preliminary estimate of the cost of a nestloop join path.
@@ -2411,10 +2458,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
 		inner_path_rows = 1;
 
 	/* Mark the path with the correct row estimate */
-	if (path->path.param_info)
-		path->path.rows = path->path.param_info->ppi_rows;
-	else
-		path->path.rows = path->path.parent->rows;
+	estimate_join_rows(root, (Path *) path, path->path.parent->agg_info);
 
 	/* For partial paths, scale row estimate. */
 	if (path->path.parallel_workers > 0)
@@ -2857,10 +2901,8 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
 		inner_path_rows = 1;
 
 	/* Mark the path with the correct row estimate */
-	if (path->jpath.path.param_info)
-		path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
-	else
-		path->jpath.path.rows = path->jpath.path.parent->rows;
+	estimate_join_rows(root, (Path *) path,
+					   path->jpath.path.parent->agg_info);
 
 	/* For partial paths, scale row estimate. */
 	if (path->jpath.path.parallel_workers > 0)
@@ -3282,10 +3324,8 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 	ListCell   *hcl;
 
 	/* Mark the path with the correct row estimate */
-	if (path->jpath.path.param_info)
-		path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
-	else
-		path->jpath.path.rows = path->jpath.path.parent->rows;
+	estimate_join_rows(root, (Path *) path,
+					   path->jpath.path.parent->agg_info);
 
 	/* For partial paths, scale row estimate. */
 	if (path->jpath.path.parallel_workers > 0)
@@ -3808,8 +3848,9 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
 	 * estimated execution cost given by pg_proc.procost (remember to multiply
 	 * this by cpu_operator_cost).
 	 *
-	 * Vars and Consts are charged zero, and so are boolean operators (AND,
-	 * OR, NOT). Simplistic, but a lot better than no model at all.
+	 * Vars, GroupedVars and Consts are charged zero, and so are boolean
+	 * operators (AND, OR, NOT). Simplistic, but a lot better than no model at
+	 * all.
 	 *
 	 * Should we try to account for the possibility of short-circuit
 	 * evaluation of AND/OR?  Probably *not*, because that would make the
@@ -4290,11 +4331,13 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals)
  *		  restriction clauses).
  *	width: the estimated average output tuple width in bytes.
  *	baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
+ *	grouped: will partial aggregation be applied to each path?
  */
 void
 set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
 {
 	double		nrows;
+	bool		grouped = rel->agg_info != NULL;
 
 	/* Should only be applied to base relations */
 	Assert(rel->relid > 0);
@@ -4305,12 +4348,31 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
 							   0,
 							   JOIN_INNER,
 							   NULL);
-
 	rel->rows = clamp_row_est(nrows);
 
+	/*
+	 * Grouping essentially changes the number of rows.
+	 */
+	if (grouped)
+	{
+		nrows = estimate_num_groups(root,
+									rel->agg_info->group_exprs, nrows,
+									NULL);
+		rel->agg_info->rows = clamp_row_est(nrows);
+	}
+
 	cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root);
 
-	set_rel_width(root, rel);
+	/*
+	 * The grouped target should have the cost and width set immediately on
+	 * creation, see create_rel_agg_info().
+	 */
+	if (!grouped)
+		set_rel_width(root, rel);
+#ifdef USE_ASSERT_CHECKING
+	else
+		Assert(rel->reltarget->width > 0);
+#endif
 }
 
 /*
@@ -4378,12 +4440,23 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 						   SpecialJoinInfo *sjinfo,
 						   List *restrictlist)
 {
+	double		outer_rows,
+				inner_rows;
+
+	/*
+	 * Take grouping of the input rels into account.
+	 */
+	outer_rows = outer_rel->agg_info ? outer_rel->agg_info->rows :
+		outer_rel->rows;
+	inner_rows = inner_rel->agg_info ? inner_rel->agg_info->rows :
+		inner_rel->rows;
+
 	rel->rows = calc_joinrel_size_estimate(root,
 										   rel,
 										   outer_rel,
 										   inner_rel,
-										   outer_rel->rows,
-										   inner_rel->rows,
+										   outer_rows,
+										   inner_rows,
 										   sjinfo,
 										   restrictlist);
 }
@@ -5260,11 +5333,11 @@ set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target)
 	foreach(lc, target->exprs)
 	{
 		Node	   *node = (Node *) lfirst(lc);
+		int32		item_width;
 
 		if (IsA(node, Var))
 		{
 			Var		   *var = (Var *) node;
-			int32		item_width;
 
 			/* We should not see any upper-level Vars here */
 			Assert(var->varlevelsup == 0);
@@ -5295,6 +5368,33 @@ set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target)
 			Assert(item_width > 0);
 			tuple_width += item_width;
 		}
+		else if (IsA(node, GroupedVar))
+		{
+			GroupedVar *gvar = (GroupedVar *) node;
+			Node	   *expr;
+
+			/*
+			 * Only AggPath can evaluate GroupedVar if it's an aggregate, or
+			 * the AggPath's input path if it's a generic grouping expression.
+			 * In the other cases the GroupedVar we see here only bubbled up
+			 * from a lower AggPath, so it does not add any cost to the path
+			 * that owns this target.
+			 *
+			 * XXX Is the value worth caching in GroupedVar?
+			 */
+			if (gvar->agg_partial != NULL)
+			{
+				Assert(IsA(gvar->gvexpr, Aggref));
+
+				expr = (Node *) gvar->agg_partial;
+			}
+			else
+				expr = (Node *) gvar->gvexpr;
+
+			item_width = get_typavgwidth(exprType(expr), exprTypmod(expr));
+			Assert(item_width > 0);
+			tuple_width += item_width;
+		}
 		else
 		{
 			/*
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index b22b36ec0e..921e6f405b 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -65,6 +65,19 @@ static bool reconsider_outer_join_clause(PlannerInfo *root,
 static bool reconsider_full_join_clause(PlannerInfo *root,
 							RestrictInfo *rinfo);
 
+typedef struct translate_expr_context
+{
+	Var		  **keys;			/* Dictionary keys. */
+	Var		  **values;			/* Dictionary values */
+	int			nitems;			/* Number of dictionary items. */
+	Relids	   *gv_eval_at_p;	/* See GroupedVarInfo. */
+	Index		relid;			/* Translate into this relid. */
+} translate_expr_context;
+
+static Node *translate_expression_to_rels_mutator(Node *node,
+									 translate_expr_context *context);
+static int	var_dictionary_comparator(const void *a, const void *b);
+
 
 /*
  * process_equivalence
@@ -2511,3 +2524,329 @@ is_redundant_derived_clause(RestrictInfo *rinfo, List *clauselist)
 
 	return false;
 }
+
+/*
+ * translate_expression_to_rels
+ *		If the appropriate equivalence classes exist, replace vars in
+ *		gvi->gvexpr with vars whose varno is equal to relid.
+ */
+GroupedVarInfo *
+translate_expression_to_rels(PlannerInfo *root, GroupedVarInfo *gvi,
+							 Index relid)
+{
+	List	   *vars;
+	ListCell   *l1;
+	int			i,
+				j;
+	int			nkeys,
+				nkeys_resolved;
+	Var		  **keys,
+			  **values,
+			  **keys_tmp;
+	Var		   *key,
+			   *key_prev;
+	translate_expr_context context;
+	GroupedVarInfo *result;
+
+	/* Can't do anything w/o equivalence classes. */
+	if (root->eq_classes == NIL)
+		return NULL;
+
+	/*
+	 * Before actually trying to modify the expression tree, find out if all
+	 * vars can be translated.
+	 */
+	vars = pull_var_clause((Node *) gvi->gvexpr, PVC_RECURSE_AGGREGATES);
+
+	/* No vars to translate? */
+	if (vars == NIL)
+		return NULL;
+
+	/*
+	 * Search for individual replacement vars as well as the actual expression
+	 * translation will be more efficient if we use a dictionary with the keys
+	 * (i.e. the "source vars") unique and sorted.
+	 */
+	nkeys = list_length(vars);
+	keys = (Var **) palloc(nkeys * sizeof(Var *));
+	i = 0;
+	foreach(l1, vars)
+	{
+		key = lfirst_node(Var, l1);
+		keys[i++] = key;
+	}
+
+	/*
+	 * Sort the keys by varno. varattno decides where varnos are equal.
+	 */
+	if (nkeys > 1)
+		pg_qsort(keys, nkeys, sizeof(Var *), var_dictionary_comparator);
+
+	/*
+	 * Pick unique values and get rid of the vars that need no translation.
+	 */
+	keys_tmp = (Var **) palloc(nkeys * sizeof(Var *));
+	key_prev = NULL;
+	j = 0;
+	for (i = 0; i < nkeys; i++)
+	{
+		key = keys[i];
+
+		if ((key_prev == NULL || (key->varno != key_prev->varno &&
+								  key->varattno != key_prev->varattno)) &&
+			key->varno != relid)
+			keys_tmp[j++] = key;
+
+		key_prev = key;
+	}
+	pfree(keys);
+	keys = keys_tmp;
+	nkeys = j;
+
+	/*
+	 * Is there actually nothing to be translated?
+	 */
+	if (nkeys == 0)
+	{
+		pfree(keys);
+		return NULL;
+	}
+
+	nkeys_resolved = 0;
+
+	/*
+	 * Find the replacement vars.
+	 */
+	values = (Var **) palloc0(nkeys * sizeof(Var *));
+	foreach(l1, root->eq_classes)
+	{
+		EquivalenceClass *ec = lfirst_node(EquivalenceClass, l1);
+		Relids		ec_var_relids;
+		Var		  **ec_vars;
+		int			ec_nvars;
+		ListCell   *l2;
+
+		/* TODO Re-check if any other EC kind should be ignored. */
+		if (ec->ec_has_volatile || ec->ec_below_outer_join || ec->ec_broken)
+			continue;
+
+		/* Single-element EC can hardly help in translations. */
+		if (list_length(ec->ec_members) == 1)
+			continue;
+
+		/*
+		 * Collect all vars of this EC and their varnos.
+		 *
+		 * ec->ec_relids does not help because we're only interested in a
+		 * subset of EC members.
+		 */
+		ec_vars = (Var **) palloc(list_length(ec->ec_members) * sizeof(Var *));
+		ec_nvars = 0;
+		ec_var_relids = NULL;
+		foreach(l2, ec->ec_members)
+		{
+			EquivalenceMember *em = lfirst_node(EquivalenceMember, l2);
+			Var		   *ec_var;
+
+			if (!IsA(em->em_expr, Var))
+				continue;
+
+			ec_var = castNode(Var, em->em_expr);
+			ec_vars[ec_nvars++] = ec_var;
+			ec_var_relids = bms_add_member(ec_var_relids, ec_var->varno);
+		}
+
+		/*
+		 * At least two vars are needed so that the EC is usable for
+		 * translation.
+		 */
+		if (ec_nvars <= 1)
+		{
+			pfree(ec_vars);
+			bms_free(ec_var_relids);
+			continue;
+		}
+
+		/*
+		 * Now check where this EC can help.
+		 */
+		for (i = 0; i < nkeys; i++)
+		{
+			Relids		ec_rest;
+			bool		relid_ok,
+						key_found;
+			Var		   *key = keys[i];
+			Var		   *value = values[i];
+
+			/* Skip this item if it's already resolved. */
+			if (value != NULL)
+				continue;
+
+			/*
+			 * Can't translate if the EC does not mention key->varno.
+			 */
+			if (!bms_is_member(key->varno, ec_var_relids))
+				continue;
+
+			/*
+			 * Besides key, at least one EC member must belong to the relation
+			 * we're translating our expression to.
+			 */
+			ec_rest = bms_copy(ec_var_relids);
+			ec_rest = bms_del_member(ec_rest, key->varno);
+			relid_ok = bms_is_member(relid, ec_rest);
+			bms_free(ec_rest);
+			if (!relid_ok)
+				continue;
+
+			/*
+			 * The preliminary checks passed, so try to find the exact vars.
+			 */
+			key_found = false;
+			for (j = 0; j < ec_nvars; j++)
+			{
+				Var		   *ec_var = ec_vars[j];
+
+				if (!key_found && key->varno == ec_var->varno &&
+					key->varattno == ec_var->varattno)
+					key_found = true;
+
+				/*
+				 *
+				 * Is this Var useful for our dictionary?
+				 *
+				 * XXX Shouldn't ec_var be copied?
+				 */
+				if (value == NULL && ec_var->varno == relid)
+					value = ec_var;
+
+				if (key_found && value != NULL)
+					break;
+			}
+
+			/*
+			 * The replacement Var must have the same data type, otherwise the
+			 * values are not guaranteed to be grouped in the same way as
+			 * values of the original Var.
+			 */
+			if (key_found && value != NULL &&
+				key->vartype == value->vartype)
+			{
+				values[i] = value;
+				nkeys_resolved++;
+
+				if (nkeys_resolved == nkeys)
+					break;
+			}
+		}
+
+		pfree(ec_vars);
+		bms_free(ec_var_relids);
+
+		/* Don't need to check the remaining ECs? */
+		if (nkeys_resolved == nkeys)
+			break;
+	}
+
+	/* Couldn't compose usable dictionary? */
+	if (nkeys_resolved < nkeys)
+	{
+		pfree(keys);
+		pfree(values);
+		return NULL;
+	}
+
+	result = makeNode(GroupedVarInfo);
+	memcpy(result, gvi, sizeof(GroupedVarInfo));
+
+	/*
+	 * translate_expression_to_rels_mutator updates gv_eval_at.
+	 */
+	result->gv_eval_at = bms_copy(result->gv_eval_at);
+
+	/* The dictionary is ready, so perform the translation. */
+	context.keys = keys;
+	context.values = values;
+	context.nitems = nkeys;
+	context.gv_eval_at_p = &result->gv_eval_at;
+	context.relid = relid;
+	result->gvexpr = (Expr *)
+		translate_expression_to_rels_mutator((Node *) gvi->gvexpr, &context);
+	result->derived = true;
+
+	pfree(keys);
+	pfree(values);
+	return result;
+}
+
+static Node *
+translate_expression_to_rels_mutator(Node *node,
+									 translate_expr_context *context)
+{
+	if (node == NULL)
+		return NULL;
+
+	if (IsA(node, Var))
+	{
+		Var		   *var = castNode(Var, node);
+		Var		  **key_p;
+		Var		   *value;
+		int			index;
+
+		/*
+		 * Simply return the existing variable if already belongs to the
+		 * relation we're adjusting the expression to.
+		 */
+		if (var->varno == context->relid)
+			return (Node *) var;
+
+		key_p = bsearch(&var, context->keys, context->nitems, sizeof(Var *),
+						var_dictionary_comparator);
+
+		/* We shouldn't have omitted any var from the dictionary. */
+		Assert(key_p != NULL);
+
+		index = key_p - context->keys;
+		Assert(index >= 0 && index < context->nitems);
+		value = context->values[index];
+
+		/* All values should be present in the dictionary. */
+		Assert(value != NULL);
+
+		/* Update gv_eval_at accordingly. */
+		bms_del_member(*context->gv_eval_at_p, var->varno);
+		*context->gv_eval_at_p = bms_add_member(*context->gv_eval_at_p,
+												value->varno);
+
+		return (Node *) value;
+	}
+
+	return expression_tree_mutator(node, translate_expression_to_rels_mutator,
+								   (void *) context);
+}
+
+static int
+var_dictionary_comparator(const void *a, const void *b)
+{
+	Var		  **var1_p,
+			  **var2_p;
+	Var		   *var1,
+			   *var2;
+
+	var1_p = (Var **) a;
+	var1 = castNode(Var, *var1_p);
+	var2_p = (Var **) b;
+	var2 = castNode(Var, *var2_p);
+
+	if (var1->varno < var2->varno)
+		return -1;
+	else if (var1->varno > var2->varno)
+		return 1;
+
+	if (var1->varattno < var2->varattno)
+		return -1;
+	else if (var1->varattno > var2->varattno)
+		return 1;
+
+	return 0;
+}
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index f295558f76..43e638d53a 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -32,6 +32,7 @@
 #include "optimizer/predtest.h"
 #include "optimizer/prep.h"
 #include "optimizer/restrictinfo.h"
+#include "optimizer/tlist.h"
 #include "optimizer/var.h"
 #include "utils/builtins.h"
 #include "utils/bytea.h"
@@ -76,13 +77,13 @@ typedef struct
 	int			indexcol;		/* index column we want to match to */
 } ec_member_matches_arg;
 
-
 static void consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel,
 							IndexOptInfo *index,
 							IndexClauseSet *rclauseset,
 							IndexClauseSet *jclauseset,
 							IndexClauseSet *eclauseset,
-							List **bitindexpaths);
+							List **bitindexpaths,
+							RelAggKind agg_kind);
 static void consider_index_join_outer_rels(PlannerInfo *root, RelOptInfo *rel,
 							   IndexOptInfo *index,
 							   IndexClauseSet *rclauseset,
@@ -91,7 +92,8 @@ static void consider_index_join_outer_rels(PlannerInfo *root, RelOptInfo *rel,
 							   List **bitindexpaths,
 							   List *indexjoinclauses,
 							   int considered_clauses,
-							   List **considered_relids);
+							   List **considered_relids,
+							   RelAggKind agg_kind);
 static void get_join_index_paths(PlannerInfo *root, RelOptInfo *rel,
 					 IndexOptInfo *index,
 					 IndexClauseSet *rclauseset,
@@ -99,23 +101,28 @@ static void get_join_index_paths(PlannerInfo *root, RelOptInfo *rel,
 					 IndexClauseSet *eclauseset,
 					 List **bitindexpaths,
 					 Relids relids,
-					 List **considered_relids);
+					 List **considered_relids,
+					 RelAggKind agg_kind);
 static bool eclass_already_used(EquivalenceClass *parent_ec, Relids oldrelids,
 					List *indexjoinclauses);
 static bool bms_equal_any(Relids relids, List *relids_list);
 static void get_index_paths(PlannerInfo *root, RelOptInfo *rel,
 				IndexOptInfo *index, IndexClauseSet *clauses,
-				List **bitindexpaths);
+				List **bitindexpaths,
+				RelAggKind agg_kind);
 static List *build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 				  IndexOptInfo *index, IndexClauseSet *clauses,
 				  bool useful_predicate,
 				  ScanTypeControl scantype,
 				  bool *skip_nonnative_saop,
-				  bool *skip_lower_saop);
+				  bool *skip_lower_saop,
+				  RelAggKind agg_kind);
 static List *build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
-				   List *clauses, List *other_clauses);
+				   List *clauses, List *other_clauses,
+				   RelAggKind agg_kind);
 static List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
-						 List *clauses, List *other_clauses);
+						 List *clauses, List *other_clauses,
+						 RelAggKind agg_kind);
 static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
 				  List *paths);
 static int	path_usage_comparator(const void *a, const void *b);
@@ -225,9 +232,11 @@ static Const *string_to_const(const char *str, Oid datatype);
  * index quals ... but for now, it doesn't seem worth troubling over.
  * In particular, comments below about "unparameterized" paths should be read
  * as meaning "unparameterized so far as the indexquals are concerned".
+ *
+ * If agg_info is passed, grouped paths are generated too.
  */
 void
-create_index_paths(PlannerInfo *root, RelOptInfo *rel)
+create_index_paths(PlannerInfo *root, RelOptInfo *rel, RelAggKind agg_kind)
 {
 	List	   *indexpaths;
 	List	   *bitindexpaths;
@@ -272,8 +281,8 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel)
 		 * non-parameterized paths.  Plain paths go directly to add_path(),
 		 * bitmap paths are added to bitindexpaths to be handled below.
 		 */
-		get_index_paths(root, rel, index, &rclauseset,
-						&bitindexpaths);
+		get_index_paths(root, rel, index, &rclauseset, &bitindexpaths,
+						agg_kind);
 
 		/*
 		 * Identify the join clauses that can match the index.  For the moment
@@ -302,15 +311,25 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel)
 										&rclauseset,
 										&jclauseset,
 										&eclauseset,
-										&bitjoinpaths);
+										&bitjoinpaths,
+										agg_kind);
 	}
 
+
+	/*
+	 * Bitmap paths are currently not aggregated: AggPath does not accept the
+	 * TID bitmap as input, and even if it did, it'd seem weird to aggregate
+	 * the individual paths and then AND them together.
+	 */
+	if (rel->agg_info != NULL)
+		return;
+
 	/*
 	 * Generate BitmapOrPaths for any suitable OR-clauses present in the
 	 * restriction list.  Add these to bitindexpaths.
 	 */
-	indexpaths = generate_bitmap_or_paths(root, rel,
-										  rel->baserestrictinfo, NIL);
+	indexpaths = generate_bitmap_or_paths(root, rel, rel->baserestrictinfo,
+										  NIL, agg_kind);
 	bitindexpaths = list_concat(bitindexpaths, indexpaths);
 
 	/*
@@ -318,7 +337,8 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel)
 	 * the joinclause list.  Add these to bitjoinpaths.
 	 */
 	indexpaths = generate_bitmap_or_paths(root, rel,
-										  joinorclauses, rel->baserestrictinfo);
+										  joinorclauses, rel->baserestrictinfo,
+										  agg_kind);
 	bitjoinpaths = list_concat(bitjoinpaths, indexpaths);
 
 	/*
@@ -439,7 +459,8 @@ consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel,
 							IndexClauseSet *rclauseset,
 							IndexClauseSet *jclauseset,
 							IndexClauseSet *eclauseset,
-							List **bitindexpaths)
+							List **bitindexpaths,
+							RelAggKind agg_kind)
 {
 	int			considered_clauses = 0;
 	List	   *considered_relids = NIL;
@@ -475,7 +496,8 @@ consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel,
 									   bitindexpaths,
 									   jclauseset->indexclauses[indexcol],
 									   considered_clauses,
-									   &considered_relids);
+									   &considered_relids,
+									   agg_kind);
 		/* Consider each applicable eclass join clause */
 		considered_clauses += list_length(eclauseset->indexclauses[indexcol]);
 		consider_index_join_outer_rels(root, rel, index,
@@ -483,7 +505,8 @@ consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel,
 									   bitindexpaths,
 									   eclauseset->indexclauses[indexcol],
 									   considered_clauses,
-									   &considered_relids);
+									   &considered_relids,
+									   agg_kind);
 	}
 }
 
@@ -508,7 +531,8 @@ consider_index_join_outer_rels(PlannerInfo *root, RelOptInfo *rel,
 							   List **bitindexpaths,
 							   List *indexjoinclauses,
 							   int considered_clauses,
-							   List **considered_relids)
+							   List **considered_relids,
+							   RelAggKind agg_kind)
 {
 	ListCell   *lc;
 
@@ -575,7 +599,8 @@ consider_index_join_outer_rels(PlannerInfo *root, RelOptInfo *rel,
 								 rclauseset, jclauseset, eclauseset,
 								 bitindexpaths,
 								 bms_union(clause_relids, oldrelids),
-								 considered_relids);
+								 considered_relids,
+								 agg_kind);
 		}
 
 		/* Also try this set of relids by itself */
@@ -583,7 +608,8 @@ consider_index_join_outer_rels(PlannerInfo *root, RelOptInfo *rel,
 							 rclauseset, jclauseset, eclauseset,
 							 bitindexpaths,
 							 clause_relids,
-							 considered_relids);
+							 considered_relids,
+							 agg_kind);
 	}
 }
 
@@ -608,7 +634,8 @@ get_join_index_paths(PlannerInfo *root, RelOptInfo *rel,
 					 IndexClauseSet *eclauseset,
 					 List **bitindexpaths,
 					 Relids relids,
-					 List **considered_relids)
+					 List **considered_relids,
+					 RelAggKind agg_kind)
 {
 	IndexClauseSet clauseset;
 	int			indexcol;
@@ -665,7 +692,8 @@ get_join_index_paths(PlannerInfo *root, RelOptInfo *rel,
 	Assert(clauseset.nonempty);
 
 	/* Build index path(s) using the collected set of clauses */
-	get_index_paths(root, rel, index, &clauseset, bitindexpaths);
+	get_index_paths(root, rel, index, &clauseset, bitindexpaths,
+					agg_kind);
 
 	/*
 	 * Remember we considered paths for this set of relids.  We use lcons not
@@ -715,7 +743,6 @@ bms_equal_any(Relids relids, List *relids_list)
 	return false;
 }
 
-
 /*
  * get_index_paths
  *	  Given an index and a set of index clauses for it, construct IndexPaths.
@@ -734,7 +761,7 @@ bms_equal_any(Relids relids, List *relids_list)
 static void
 get_index_paths(PlannerInfo *root, RelOptInfo *rel,
 				IndexOptInfo *index, IndexClauseSet *clauses,
-				List **bitindexpaths)
+				List **bitindexpaths, RelAggKind agg_kind)
 {
 	List	   *indexpaths;
 	bool		skip_nonnative_saop = false;
@@ -746,18 +773,26 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel,
 	 * clauses only if the index AM supports them natively, and skip any such
 	 * clauses for index columns after the first (so that we produce ordered
 	 * paths if possible).
+	 *
+	 * These paths are good candidates for AGG_SORTED, so pass the output
+	 * lists for this strategy. AGG_HASHED should be applied to paths with no
+	 * pathkeys.
 	 */
 	indexpaths = build_index_paths(root, rel,
 								   index, clauses,
 								   index->predOK,
 								   ST_ANYSCAN,
 								   &skip_nonnative_saop,
-								   &skip_lower_saop);
+								   &skip_lower_saop,
+								   agg_kind);
 
 	/*
 	 * If we skipped any lower-order ScalarArrayOpExprs on an index with an AM
 	 * that supports them, then try again including those clauses.  This will
 	 * produce paths with more selectivity but no ordering.
+	 *
+	 * As for the grouping paths, only AGG_HASHED is considered due to the
+	 * missing ordering.
 	 */
 	if (skip_lower_saop)
 	{
@@ -767,7 +802,8 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel,
 												   index->predOK,
 												   ST_ANYSCAN,
 												   &skip_nonnative_saop,
-												   NULL));
+												   NULL,
+												   agg_kind));
 	}
 
 	/*
@@ -799,6 +835,9 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel,
 	 * If there were ScalarArrayOpExpr clauses that the index can't handle
 	 * natively, generate bitmap scan paths relying on executor-managed
 	 * ScalarArrayOpExpr.
+	 *
+	 * As for grouping, only AGG_HASHED is possible here. Again, because
+	 * there's no ordering.
 	 */
 	if (skip_nonnative_saop)
 	{
@@ -807,7 +846,8 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel,
 									   false,
 									   ST_BITMAPSCAN,
 									   NULL,
-									   NULL);
+									   NULL,
+									   agg_kind);
 		*bitindexpaths = list_concat(*bitindexpaths, indexpaths);
 	}
 }
@@ -845,13 +885,18 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel,
  * NULL, we do not ignore non-first ScalarArrayOpExpr clauses, but they will
  * result in considering the scan's output to be unordered.
  *
+ * If 'agg_info' is passed, 'agg_sorted' and / or 'agg_hashed' must be passed
+ * too. In that case AGG_SORTED and / or AGG_HASHED aggregation is applied to
+ * the index path (as long as the index path is appropriate) and the resulting
+ * grouped path is stored here.
+ *
  * 'rel' is the index's heap relation
  * 'index' is the index for which we want to generate paths
  * 'clauses' is the collection of indexable clauses (RestrictInfo nodes)
  * 'useful_predicate' indicates whether the index has a useful predicate
  * 'scantype' indicates whether we need plain or bitmap scan support
  * 'skip_nonnative_saop' indicates whether to accept SAOP if index AM doesn't
- * 'skip_lower_saop' indicates whether to accept non-first-column SAOP
+ * 'skip_lower_saop' indicates whether to accept non-first-column SAOP.
  */
 static List *
 build_index_paths(PlannerInfo *root, RelOptInfo *rel,
@@ -859,7 +904,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 				  bool useful_predicate,
 				  ScanTypeControl scantype,
 				  bool *skip_nonnative_saop,
-				  bool *skip_lower_saop)
+				  bool *skip_lower_saop,
+				  RelAggKind agg_kind)
 {
 	List	   *result = NIL;
 	IndexPath  *ipath;
@@ -876,6 +922,12 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 	bool		index_is_ordered;
 	bool		index_only_scan;
 	int			indexcol;
+	bool		grouped;
+	bool		can_agg_sorted,
+				can_agg_hashed;
+	AggPath    *agg_path;
+
+	grouped = rel->agg_info != NULL;
 
 	/*
 	 * Check that index supports the desired scan type(s)
@@ -1029,7 +1081,12 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 	 * in the current clauses, OR the index ordering is potentially useful for
 	 * later merging or final output ordering, OR the index has a useful
 	 * predicate, OR an index-only scan is possible.
+	 *
+	 * This is where grouped path start to be considered.
 	 */
+	can_agg_sorted = true;
+	can_agg_hashed = true;
+
 	if (index_clauses != NIL || useful_pathkeys != NIL || useful_predicate ||
 		index_only_scan)
 	{
@@ -1046,7 +1103,65 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 								  outer_relids,
 								  loop_count,
 								  false);
-		result = lappend(result, ipath);
+
+		if (!grouped)
+			result = lappend(result, ipath);
+		else
+		{
+			/*
+			 * Try to create the grouped paths if caller is interested in
+			 * them.
+			 */
+			if (useful_pathkeys != NIL)
+			{
+				agg_path = create_agg_sorted_path(root,
+												  (Path *) ipath,
+												  true,
+												  ipath->path.rows,
+												  agg_kind);
+
+				if (agg_path != NULL)
+					result = lappend(result, agg_path);
+				else
+				{
+					/*
+					 * If ipath could not be used as a source for AGG_SORTED
+					 * partial aggregation, it probably does not have the
+					 * appropriate pathkeys. Avoid trying to apply AGG_SORTED
+					 * to the next index paths because those will have the
+					 * same pathkeys.
+					 */
+					can_agg_sorted = false;
+				}
+			}
+			else
+				can_agg_sorted = false;
+
+			/*
+			 * Hashed aggregation should not be parameterized: the cost of
+			 * repeated creation of the hashtable (for different parameter
+			 * values) is probably not worth.
+			 */
+			if (outer_relids != NULL)
+			{
+				agg_path = create_agg_hashed_path(root,
+												  (Path *) ipath,
+												  ipath->path.rows,
+												  agg_kind);
+
+				if (agg_path != NULL)
+					result = lappend(result, agg_path);
+				else
+				{
+					/*
+					 * If ipath could not be used as a source for AGG_HASHED,
+					 * we should not expect any other path of the same index
+					 * to succeed. Avoid wasting the effort next time.
+					 */
+					can_agg_hashed = false;
+				}
+			}
+		}
 
 		/*
 		 * If appropriate, consider parallel index scan.  We don't allow
@@ -1075,7 +1190,48 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 			 * parallel workers, just free it.
 			 */
 			if (ipath->path.parallel_workers > 0)
-				add_partial_path(rel, (Path *) ipath);
+			{
+				if (!grouped)
+					add_partial_path(rel, (Path *) ipath);
+				else
+				{
+					if (useful_pathkeys != NIL && can_agg_sorted)
+					{
+						/*
+						 * No need to check the pathkeys again.
+						 */
+						agg_path = create_agg_sorted_path(root,
+														  (Path *) ipath,
+														  false,
+														  ipath->path.rows,
+														  agg_kind);
+
+						/*
+						 * If create_agg_sorted_path succeeded once, it should
+						 * always do.
+						 */
+						Assert(agg_path != NULL);
+
+						add_partial_path(rel, (Path *) agg_path);
+					}
+
+					if (can_agg_hashed && outer_relids == NULL)
+					{
+						agg_path = create_agg_hashed_path(root,
+														  (Path *) ipath,
+														  ipath->path.rows,
+														  agg_kind);
+
+						/*
+						 * If create_agg_hashed_path succeeded once, it should
+						 * always do.
+						 */
+						Assert(agg_path != NULL);
+
+						add_partial_path(rel, (Path *) agg_path);
+					}
+				}
+			}
 			else
 				pfree(ipath);
 		}
@@ -1103,7 +1259,33 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 									  outer_relids,
 									  loop_count,
 									  false);
-			result = lappend(result, ipath);
+
+			if (!grouped)
+				result = lappend(result, ipath);
+			else
+			{
+				/*
+				 * As the input set ordering does not matter to AGG_HASHED,
+				 * only AGG_SORTED makes sense here. (The AGG_HASHED path we'd
+				 * create here should already exist.)
+				 *
+				 * The existing value of can_agg_sorted is not up-to-date for
+				 * the new pathkeys.
+				 */
+				can_agg_sorted = true;
+
+				/* pathkeys are new, so check them. */
+				agg_path = create_agg_sorted_path(root,
+												  (Path *) ipath,
+												  true,
+												  ipath->path.rows,
+												  agg_kind);
+
+				if (agg_path != NULL)
+					result = lappend(result, agg_path);
+				else
+					can_agg_sorted = false;
+			}
 
 			/* If appropriate, consider parallel index scan */
 			if (index->amcanparallel &&
@@ -1127,7 +1309,27 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 				 * using parallel workers, just free it.
 				 */
 				if (ipath->path.parallel_workers > 0)
-					add_partial_path(rel, (Path *) ipath);
+				{
+					if (!grouped)
+						add_partial_path(rel, (Path *) ipath);
+					else
+					{
+						if (can_agg_sorted)
+						{
+							/*
+							 * The non-partial path above should have been
+							 * created, so no need to check pathkeys.
+							 */
+							agg_path = create_agg_sorted_path(root,
+															  (Path *) ipath,
+															  false,
+															  ipath->path.rows,
+															  agg_kind);
+							Assert(agg_path != NULL);
+							add_partial_path(rel, (Path *) agg_path);
+						}
+					}
+				}
 				else
 					pfree(ipath);
 			}
@@ -1162,10 +1364,12 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
  * 'rel' is the relation for which we want to generate index paths
  * 'clauses' is the current list of clauses (RestrictInfo nodes)
  * 'other_clauses' is the list of additional upper-level clauses
+ * 'agg_info' indicates that grouped paths should be added to 'agg_hashed'.
  */
 static List *
 build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
-				   List *clauses, List *other_clauses)
+				   List *clauses, List *other_clauses,
+				   RelAggKind agg_kind)
 {
 	List	   *result = NIL;
 	List	   *all_clauses = NIL;	/* not computed till needed */
@@ -1235,14 +1439,16 @@ build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
 		match_clauses_to_index(index, other_clauses, &clauseset);
 
 		/*
-		 * Construct paths if possible.
+		 * Construct paths if possible. Forbid partial aggregation even if the
+		 * relation is grouped --- it'll be applied to the bitmap heap path.
 		 */
 		indexpaths = build_index_paths(root, rel,
 									   index, &clauseset,
 									   useful_predicate,
 									   ST_BITMAPSCAN,
 									   NULL,
-									   NULL);
+									   NULL,
+									   agg_kind);
 		result = list_concat(result, indexpaths);
 	}
 
@@ -1261,7 +1467,8 @@ build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
  */
 static List *
 generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
-						 List *clauses, List *other_clauses)
+						 List *clauses, List *other_clauses,
+						 RelAggKind agg_kind)
 {
 	List	   *result = NIL;
 	List	   *all_clauses;
@@ -1301,13 +1508,15 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
 
 				indlist = build_paths_for_OR(root, rel,
 											 andargs,
-											 all_clauses);
+											 all_clauses,
+											 agg_kind);
 
 				/* Recurse in case there are sub-ORs */
 				indlist = list_concat(indlist,
 									  generate_bitmap_or_paths(root, rel,
 															   andargs,
-															   all_clauses));
+															   all_clauses,
+															   agg_kind));
 			}
 			else
 			{
@@ -1319,7 +1528,8 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
 
 				indlist = build_paths_for_OR(root, rel,
 											 orargs,
-											 all_clauses);
+											 all_clauses,
+											 agg_kind);
 			}
 
 			/*
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 642f951093..d5880e42a0 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -48,29 +48,38 @@ static void try_partial_mergejoin_path(PlannerInfo *root,
 						   List *outersortkeys,
 						   List *innersortkeys,
 						   JoinType jointype,
-						   JoinPathExtraData *extra);
+						   JoinPathExtraData *extra,
+						   RelAggKind agg_kind,
+						   bool do_aggregate);
 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
-					 JoinType jointype, JoinPathExtraData *extra);
+					 JoinType jointype, JoinPathExtraData *extra,
+					 RelAggKind agg_kind, bool do_aggregate);
 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
-					 JoinType jointype, JoinPathExtraData *extra);
+					 JoinType jointype, JoinPathExtraData *extra,
+					 RelAggKind agg_kind, bool do_aggregate);
 static void consider_parallel_nestloop(PlannerInfo *root,
 						   RelOptInfo *joinrel,
 						   RelOptInfo *outerrel,
 						   RelOptInfo *innerrel,
 						   JoinType jointype,
-						   JoinPathExtraData *extra);
+						   JoinPathExtraData *extra,
+						   RelAggKind agg_kind,
+						   bool do_aggregate);
 static void consider_parallel_mergejoin(PlannerInfo *root,
 							RelOptInfo *joinrel,
 							RelOptInfo *outerrel,
 							RelOptInfo *innerrel,
 							JoinType jointype,
 							JoinPathExtraData *extra,
-							Path *inner_cheapest_total);
+							Path *inner_cheapest_total,
+							RelAggKind agg_kind,
+							bool do_aggregate);
 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
-					 JoinType jointype, JoinPathExtraData *extra);
+					 JoinType jointype, JoinPathExtraData *extra,
+					 RelAggKind agg_kind, bool do_aggregate);
 static List *select_mergejoin_clauses(PlannerInfo *root,
 						 RelOptInfo *joinrel,
 						 RelOptInfo *outerrel,
@@ -87,7 +96,9 @@ static void generate_mergejoin_paths(PlannerInfo *root,
 						 bool useallclauses,
 						 Path *inner_cheapest_total,
 						 List *merge_pathkeys,
-						 bool is_partial);
+						 bool is_partial,
+						 RelAggKind agg_kind,
+						 bool do_aggregate);
 
 
 /*
@@ -112,6 +123,9 @@ static void generate_mergejoin_paths(PlannerInfo *root,
  * however.  Path cost estimation code may need to recognize that it's
  * dealing with such a case --- the combination of nominal jointype INNER
  * with sjinfo->jointype == JOIN_SEMI indicates that.
+ *
+ * agg_info is passed iff partial aggregation should be applied to the join
+ * path.
  */
 void
 add_paths_to_joinrel(PlannerInfo *root,
@@ -120,7 +134,9 @@ add_paths_to_joinrel(PlannerInfo *root,
 					 RelOptInfo *innerrel,
 					 JoinType jointype,
 					 SpecialJoinInfo *sjinfo,
-					 List *restrictlist)
+					 List *restrictlist,
+					 RelAggKind agg_kind,
+					 bool do_aggregate)
 {
 	JoinPathExtraData extra;
 	bool		mergejoin_allowed = true;
@@ -267,7 +283,7 @@ add_paths_to_joinrel(PlannerInfo *root,
 	 */
 	if (mergejoin_allowed)
 		sort_inner_and_outer(root, joinrel, outerrel, innerrel,
-							 jointype, &extra);
+							 jointype, &extra, agg_kind, do_aggregate);
 
 	/*
 	 * 2. Consider paths where the outer relation need not be explicitly
@@ -278,7 +294,7 @@ add_paths_to_joinrel(PlannerInfo *root,
 	 */
 	if (mergejoin_allowed)
 		match_unsorted_outer(root, joinrel, outerrel, innerrel,
-							 jointype, &extra);
+							 jointype, &extra, agg_kind, do_aggregate);
 
 #ifdef NOT_USED
 
@@ -305,7 +321,7 @@ add_paths_to_joinrel(PlannerInfo *root,
 	 */
 	if (enable_hashjoin || jointype == JOIN_FULL)
 		hash_inner_and_outer(root, joinrel, outerrel, innerrel,
-							 jointype, &extra);
+							 jointype, &extra, agg_kind, do_aggregate);
 
 	/*
 	 * 5. If inner and outer relations are foreign tables (or joins) belonging
@@ -366,7 +382,9 @@ try_nestloop_path(PlannerInfo *root,
 				  Path *inner_path,
 				  List *pathkeys,
 				  JoinType jointype,
-				  JoinPathExtraData *extra)
+				  JoinPathExtraData *extra,
+				  RelAggKind agg_kind,
+				  bool do_aggregate)
 {
 	Relids		required_outer;
 	JoinCostWorkspace workspace;
@@ -376,6 +394,12 @@ try_nestloop_path(PlannerInfo *root,
 	Relids		outerrelids;
 	Relids		inner_paramrels = PATH_REQ_OUTER(inner_path);
 	Relids		outer_paramrels = PATH_REQ_OUTER(outer_path);
+	bool		success = false;
+
+	if (agg_kind == REL_AGG_KIND_PARTIAL)
+		joinrel = joinrel->grouped->needs_final_agg;
+	else if (agg_kind == REL_AGG_KIND_SIMPLE)
+		joinrel = joinrel->grouped->no_final_agg;
 
 	/*
 	 * Paths are parameterized by top-level parents, so run parameterization
@@ -422,10 +446,31 @@ try_nestloop_path(PlannerInfo *root,
 	initial_cost_nestloop(root, &workspace, jointype,
 						  outer_path, inner_path, extra);
 
-	if (add_path_precheck(joinrel,
-						  workspace.startup_cost, workspace.total_cost,
-						  pathkeys, required_outer))
+	/*
+	 * If the join output should be (partially) aggregated, the precheck
+	 * includes the aggregation and is postponed to create_grouped_path().
+	 */
+	if ((!do_aggregate &&
+		 add_path_precheck(joinrel,
+						   workspace.startup_cost, workspace.total_cost,
+						   pathkeys, required_outer)) ||
+		do_aggregate)
 	{
+		Path	   *path;
+		PathTarget *target;
+
+		/*
+		 * If the join output is subject to partial aggregation, the path must
+		 * have the appropriate target.
+		 */
+		if (!do_aggregate)
+			target = joinrel->reltarget;
+		else
+		{
+			Assert(joinrel->agg_info != NULL);
+			target = joinrel->agg_info->input;
+		}
+
 		/*
 		 * If the inner path is parameterized, it is parameterized by the
 		 * topmost parent of the outer rel, not the outer rel itself.  Fix
@@ -447,21 +492,58 @@ try_nestloop_path(PlannerInfo *root,
 			}
 		}
 
-		add_path(joinrel, (Path *)
-				 create_nestloop_path(root,
-									  joinrel,
-									  jointype,
-									  &workspace,
-									  extra,
-									  outer_path,
-									  inner_path,
-									  extra->restrictlist,
-									  pathkeys,
-									  required_outer));
+		path = (Path *) create_nestloop_path(root,
+											 joinrel,
+											 target,
+											 jointype,
+											 &workspace,
+											 extra,
+											 outer_path,
+											 inner_path,
+											 extra->restrictlist,
+											 pathkeys,
+											 required_outer);
+		if (!do_aggregate)
+		{
+			add_path(joinrel, path);
+			success = true;
+		}
+		else
+		{
+			/*
+			 * Try both AGG_HASHED and AGG_SORTED partial aggregation.
+			 *
+			 * AGG_HASHED should not be parameterized because we don't want to
+			 * create the hashtable again for each set of parameters.
+			 */
+			if (required_outer == NULL)
+				success = create_grouped_path(root,
+											  joinrel,
+											  path,
+											  true,
+											  false,
+											  AGG_HASHED,
+											  agg_kind);
+
+			/*
+			 * Don't try AGG_SORTED if create_grouped_path() would reject it
+			 * anyway.
+			 */
+			if (pathkeys != NIL)
+				success = success ||
+					create_grouped_path(root,
+										joinrel,
+										path,
+										true,
+										false,
+										AGG_SORTED,
+										agg_kind);
+		}
 	}
-	else
+
+	if (!success)
 	{
-		/* Waste no memory when we reject a path here */
+		/* Waste no memory when we reject path(s) here */
 		bms_free(required_outer);
 	}
 }
@@ -478,9 +560,28 @@ try_partial_nestloop_path(PlannerInfo *root,
 						  Path *inner_path,
 						  List *pathkeys,
 						  JoinType jointype,
-						  JoinPathExtraData *extra)
+						  JoinPathExtraData *extra,
+						  RelAggKind agg_kind,
+						  bool do_aggregate)
 {
 	JoinCostWorkspace workspace;
+	Path	   *path;
+	PathTarget *target;
+
+	if (agg_kind == REL_AGG_KIND_SIMPLE)
+	{
+		/*
+		 * See create_grouped_path() for explanation why parallel grouping
+		 * paths are not useful w/o final aggregation.
+		 */
+		return;
+	}
+
+	/*
+	 * Fetch the relation to which we'll add the paths.
+	 */
+	if (agg_kind == REL_AGG_KIND_PARTIAL)
+		joinrel = joinrel->grouped->needs_final_agg;
 
 	/*
 	 * If the inner path is parameterized, the parameterization must be fully
@@ -515,7 +616,13 @@ try_partial_nestloop_path(PlannerInfo *root,
 	 */
 	initial_cost_nestloop(root, &workspace, jointype,
 						  outer_path, inner_path, extra);
-	if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
+
+	/*
+	 * If the join output should be (partially) aggregated, the precheck
+	 * includes the aggregation and is postponed to create_grouped_path().
+	 */
+	if (!do_aggregate &&
+		!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
 		return;
 
 	/*
@@ -534,18 +641,56 @@ try_partial_nestloop_path(PlannerInfo *root,
 			return;
 	}
 
+	/*
+	 * If the join output is subject to partial aggregation, the path must
+	 * have the appropriate target.
+	 */
+	if (!do_aggregate)
+		target = joinrel->reltarget;
+	else
+	{
+		Assert(joinrel->agg_info != NULL);
+		target = joinrel->agg_info->input;
+	}
+
 	/* Might be good enough to be worth trying, so let's try it. */
-	add_partial_path(joinrel, (Path *)
-					 create_nestloop_path(root,
-										  joinrel,
-										  jointype,
-										  &workspace,
-										  extra,
-										  outer_path,
-										  inner_path,
-										  extra->restrictlist,
-										  pathkeys,
-										  NULL));
+	path = (Path *) create_nestloop_path(root,
+										 joinrel,
+										 target,
+										 jointype,
+										 &workspace,
+										 extra,
+										 outer_path,
+										 inner_path,
+										 extra->restrictlist,
+										 pathkeys,
+										 NULL);
+
+	if (!do_aggregate)
+		add_partial_path(joinrel, path);
+	else
+	{
+		create_grouped_path(root,
+							joinrel,
+							path,
+							true,
+							true,
+							AGG_HASHED,
+							agg_kind);
+
+		/*
+		 * Don't try AGG_SORTED if create_grouped_path() would reject it
+		 * anyway.
+		 */
+		if (pathkeys != NIL)
+			create_grouped_path(root,
+								joinrel,
+								path,
+								true,
+								true,
+								AGG_SORTED,
+								agg_kind);
+	}
 }
 
 /*
@@ -564,15 +709,24 @@ try_mergejoin_path(PlannerInfo *root,
 				   List *innersortkeys,
 				   JoinType jointype,
 				   JoinPathExtraData *extra,
-				   bool is_partial)
+				   bool is_partial,
+				   RelAggKind agg_kind,
+				   bool do_aggregate)
 {
 	Relids		required_outer;
 	JoinCostWorkspace workspace;
+	bool		success = false;
+	RelOptInfo *joinrel_plain = joinrel;
+
+	if (agg_kind == REL_AGG_KIND_PARTIAL)
+		joinrel = joinrel->grouped->needs_final_agg;
+	else if (agg_kind == REL_AGG_KIND_SIMPLE)
+		joinrel = joinrel->grouped->no_final_agg;
 
 	if (is_partial)
 	{
 		try_partial_mergejoin_path(root,
-								   joinrel,
+								   joinrel_plain,
 								   outer_path,
 								   inner_path,
 								   pathkeys,
@@ -580,7 +734,9 @@ try_mergejoin_path(PlannerInfo *root,
 								   outersortkeys,
 								   innersortkeys,
 								   jointype,
-								   extra);
+								   extra,
+								   agg_kind,
+								   do_aggregate);
 		return;
 	}
 
@@ -617,26 +773,70 @@ try_mergejoin_path(PlannerInfo *root,
 						   outersortkeys, innersortkeys,
 						   extra);
 
-	if (add_path_precheck(joinrel,
-						  workspace.startup_cost, workspace.total_cost,
-						  pathkeys, required_outer))
+	if ((!do_aggregate &&
+		 add_path_precheck(joinrel,
+						   workspace.startup_cost, workspace.total_cost,
+						   pathkeys, required_outer)) ||
+		do_aggregate)
 	{
-		add_path(joinrel, (Path *)
-				 create_mergejoin_path(root,
-									   joinrel,
-									   jointype,
-									   &workspace,
-									   extra,
-									   outer_path,
-									   inner_path,
-									   extra->restrictlist,
-									   pathkeys,
-									   required_outer,
-									   mergeclauses,
-									   outersortkeys,
-									   innersortkeys));
+		Path	   *path;
+		PathTarget *target;
+
+		/*
+		 * If the join output is subject to partial aggregation, the path must
+		 * have the appropriate target.
+		 */
+		if (!do_aggregate)
+			target = joinrel->reltarget;
+		else
+		{
+			Assert(joinrel->agg_info != NULL);
+			target = joinrel->agg_info->input;
+		}
+
+		path = (Path *) create_mergejoin_path(root,
+											  joinrel,
+											  target,
+											  jointype,
+											  &workspace,
+											  extra,
+											  outer_path,
+											  inner_path,
+											  extra->restrictlist,
+											  pathkeys,
+											  required_outer,
+											  mergeclauses,
+											  outersortkeys,
+											  innersortkeys);
+		if (!do_aggregate)
+		{
+			add_path(joinrel, path);
+			success = true;
+		}
+		else
+		{
+			if (required_outer == NULL)
+				success = create_grouped_path(root,
+											  joinrel,
+											  path,
+											  true,
+											  false,
+											  AGG_HASHED,
+											  agg_kind);
+
+			if (pathkeys != NIL)
+				success = success ||
+					create_grouped_path(root,
+										joinrel,
+										path,
+										true,
+										false,
+										AGG_SORTED,
+										agg_kind);
+		}
 	}
-	else
+
+	if (!success)
 	{
 		/* Waste no memory when we reject a path here */
 		bms_free(required_outer);
@@ -658,9 +858,28 @@ try_partial_mergejoin_path(PlannerInfo *root,
 						   List *outersortkeys,
 						   List *innersortkeys,
 						   JoinType jointype,
-						   JoinPathExtraData *extra)
+						   JoinPathExtraData *extra,
+						   RelAggKind agg_kind,
+						   bool do_aggregate)
 {
 	JoinCostWorkspace workspace;
+	Path	   *path;
+	PathTarget *target;
+
+	if (agg_kind == REL_AGG_KIND_SIMPLE)
+	{
+		/*
+		 * See create_grouped_path() for explanation why parallel grouping
+		 * paths are not useful w/o final aggregation.
+		 */
+		return;
+	}
+
+	/*
+	 * Fetch the relation to which we'll add the paths.
+	 */
+	if (agg_kind == REL_AGG_KIND_PARTIAL)
+		joinrel = joinrel->grouped->needs_final_agg;
 
 	/*
 	 * See comments in try_partial_hashjoin_path().
@@ -693,24 +912,59 @@ try_partial_mergejoin_path(PlannerInfo *root,
 						   outersortkeys, innersortkeys,
 						   extra);
 
-	if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
+	if (!do_aggregate &&
+		!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
 		return;
 
+	/*
+	 * If the join output is subject to partial aggregation, the path must
+	 * have the appropriate target.
+	 */
+	if (!do_aggregate)
+		target = joinrel->reltarget;
+	else
+	{
+		Assert(joinrel->agg_info != NULL);
+		target = joinrel->agg_info->input;
+	}
+
 	/* Might be good enough to be worth trying, so let's try it. */
-	add_partial_path(joinrel, (Path *)
-					 create_mergejoin_path(root,
-										   joinrel,
-										   jointype,
-										   &workspace,
-										   extra,
-										   outer_path,
-										   inner_path,
-										   extra->restrictlist,
-										   pathkeys,
-										   NULL,
-										   mergeclauses,
-										   outersortkeys,
-										   innersortkeys));
+	path = (Path *) create_mergejoin_path(root,
+										  joinrel,
+										  target,
+										  jointype,
+										  &workspace,
+										  extra,
+										  outer_path,
+										  inner_path,
+										  extra->restrictlist,
+										  pathkeys,
+										  NULL,
+										  mergeclauses,
+										  outersortkeys,
+										  innersortkeys);
+
+	if (!do_aggregate)
+		add_partial_path(joinrel, path);
+	else
+	{
+		create_grouped_path(root,
+							joinrel,
+							path,
+							true,
+							true,
+							AGG_HASHED,
+							agg_kind);
+
+		if (pathkeys != NIL)
+			create_grouped_path(root,
+								joinrel,
+								path,
+								true,
+								true,
+								AGG_SORTED,
+								agg_kind);
+	}
 }
 
 /*
@@ -725,10 +979,19 @@ try_hashjoin_path(PlannerInfo *root,
 				  Path *inner_path,
 				  List *hashclauses,
 				  JoinType jointype,
-				  JoinPathExtraData *extra)
+				  JoinPathExtraData *extra,
+				  RelAggKind agg_kind,
+				  bool do_aggregate)
 {
 	Relids		required_outer;
 	JoinCostWorkspace workspace;
+	Path	   *path = NULL;
+	bool		success = false;
+
+	if (agg_kind == REL_AGG_KIND_PARTIAL)
+		joinrel = joinrel->grouped->needs_final_agg;
+	else if (agg_kind == REL_AGG_KIND_SIMPLE)
+		joinrel = joinrel->grouped->no_final_agg;
 
 	/*
 	 * Check to see if proposed path is still parameterized, and reject if the
@@ -745,30 +1008,81 @@ try_hashjoin_path(PlannerInfo *root,
 	}
 
 	/*
+	 * Parameterized execution of grouped path would mean repeated hashing of
+	 * the output of the hashjoin output, so forget about AGG_HASHED if there
+	 * are any parameters. And AGG_SORTED makes no sense because the hash join
+	 * output is not sorted.
+	 */
+	if (required_outer && joinrel->agg_info)
+		return;
+
+	/*
 	 * See comments in try_nestloop_path().  Also note that hashjoin paths
 	 * never have any output pathkeys, per comments in create_hashjoin_path.
 	 */
 	initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
 						  outer_path, inner_path, extra, false);
 
-	if (add_path_precheck(joinrel,
-						  workspace.startup_cost, workspace.total_cost,
-						  NIL, required_outer))
+	/*
+	 * If the join output should be (partially) aggregated, the precheck
+	 * includes the aggregation and is postponed to create_grouped_path().
+	 */
+	if ((!do_aggregate &&
+		 add_path_precheck(joinrel,
+						   workspace.startup_cost, workspace.total_cost,
+						   NIL, required_outer)) ||
+		do_aggregate)
 	{
-		add_path(joinrel, (Path *)
-				 create_hashjoin_path(root,
-									  joinrel,
-									  jointype,
-									  &workspace,
-									  extra,
-									  outer_path,
-									  inner_path,
-									  false,	/* parallel_hash */
-									  extra->restrictlist,
-									  required_outer,
-									  hashclauses));
+		PathTarget *target;
+
+		/*
+		 * If the join output is subject to partial aggregation, the path must
+		 * have the appropriate target.
+		 */
+		if (!do_aggregate)
+			target = joinrel->reltarget;
+		else
+		{
+			Assert(joinrel->agg_info != NULL);
+			target = joinrel->agg_info->input;
+		}
+
+		path = (Path *) create_hashjoin_path(root,
+											 joinrel,
+											 target,
+											 jointype,
+											 &workspace,
+											 extra,
+											 outer_path,
+											 inner_path,
+											 false, /* parallel_hash */
+											 extra->restrictlist,
+											 required_outer,
+											 hashclauses);
+
+		if (!do_aggregate)
+		{
+			add_path(joinrel, path);
+			success = true;
+		}
+		else
+		{
+
+			/*
+			 * As the hashjoin path is not sorted, only try AGG_HASHED.
+			 */
+			if (create_grouped_path(root,
+									joinrel,
+									path,
+									true,
+									false,
+									AGG_HASHED,
+									agg_kind))
+				success = true;
+		}
 	}
-	else
+
+	if (!success)
 	{
 		/* Waste no memory when we reject a path here */
 		bms_free(required_outer);
@@ -792,9 +1106,28 @@ try_partial_hashjoin_path(PlannerInfo *root,
 						  List *hashclauses,
 						  JoinType jointype,
 						  JoinPathExtraData *extra,
-						  bool parallel_hash)
+						  bool parallel_hash,
+						  RelAggKind agg_kind,
+						  bool do_aggregate)
 {
 	JoinCostWorkspace workspace;
+	Path	   *path;
+	PathTarget *target;
+
+	if (agg_kind == REL_AGG_KIND_SIMPLE)
+	{
+		/*
+		 * See create_grouped_path() for explanation why parallel grouping
+		 * paths are not useful w/o final aggregation.
+		 */
+		return;
+	}
+
+	/*
+	 * Fetch the relation to which we'll add the paths.
+	 */
+	if (agg_kind == REL_AGG_KIND_PARTIAL)
+		joinrel = joinrel->grouped->needs_final_agg;
 
 	/*
 	 * If the inner path is parameterized, the parameterization must be fully
@@ -816,23 +1149,55 @@ try_partial_hashjoin_path(PlannerInfo *root,
 	 * cost.  Bail out right away if it looks terrible.
 	 */
 	initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
-						  outer_path, inner_path, extra, parallel_hash);
-	if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
+						  outer_path, inner_path, extra, true);
+
+	/*
+	 * If the join output should be (partially) aggregated, the precheck
+	 * includes the aggregation and is postponed to create_grouped_path().
+	 */
+	if (!do_aggregate &&
+		!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
 		return;
 
-	/* Might be good enough to be worth trying, so let's try it. */
-	add_partial_path(joinrel, (Path *)
-					 create_hashjoin_path(root,
-										  joinrel,
-										  jointype,
-										  &workspace,
-										  extra,
-										  outer_path,
-										  inner_path,
-										  parallel_hash,
-										  extra->restrictlist,
-										  NULL,
-										  hashclauses));
+	/*
+	 * If the join output is subject to partial aggregation, the path must
+	 * have the appropriate target.
+	 */
+	if (!do_aggregate)
+		target = joinrel->reltarget;
+	else
+	{
+		Assert(joinrel->agg_info != NULL);
+		target = joinrel->agg_info->input;
+	}
+
+	path = (Path *) create_hashjoin_path(root,
+										 joinrel,
+										 target,
+										 jointype,
+										 &workspace,
+										 extra,
+										 outer_path,
+										 inner_path,
+										 parallel_hash,
+										 extra->restrictlist,
+										 NULL,
+										 hashclauses);
+	if (!do_aggregate)
+		add_partial_path(joinrel, path);
+	else
+	{
+		/*
+		 * Only AGG_HASHED is useful, see comments in try_hashjoin_path().
+		 */
+		create_grouped_path(root,
+							joinrel,
+							path,
+							true,
+							true,
+							AGG_HASHED,
+							agg_kind);
+	}
 }
 
 /*
@@ -876,6 +1241,7 @@ clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
  * 'innerrel' is the inner join relation
  * 'jointype' is the type of join to do
  * 'extra' contains additional input values
+ * 'agg_info' tells if/how to apply partial aggregation to the output.
  */
 static void
 sort_inner_and_outer(PlannerInfo *root,
@@ -883,7 +1249,9 @@ sort_inner_and_outer(PlannerInfo *root,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 JoinType jointype,
-					 JoinPathExtraData *extra)
+					 JoinPathExtraData *extra,
+					 RelAggKind agg_kind,
+					 bool do_aggregate)
 {
 	JoinType	save_jointype = jointype;
 	Path	   *outer_path;
@@ -1045,7 +1413,9 @@ sort_inner_and_outer(PlannerInfo *root,
 						   innerkeys,
 						   jointype,
 						   extra,
-						   false);
+						   false,
+						   agg_kind,
+						   do_aggregate);
 
 		/*
 		 * If we have partial outer and parallel safe inner path then try
@@ -1061,7 +1431,9 @@ sort_inner_and_outer(PlannerInfo *root,
 									   outerkeys,
 									   innerkeys,
 									   jointype,
-									   extra);
+									   extra,
+									   agg_kind,
+									   do_aggregate);
 	}
 }
 
@@ -1089,7 +1461,9 @@ generate_mergejoin_paths(PlannerInfo *root,
 						 bool useallclauses,
 						 Path *inner_cheapest_total,
 						 List *merge_pathkeys,
-						 bool is_partial)
+						 bool is_partial,
+						 RelAggKind agg_kind,
+						 bool do_aggregate)
 {
 	List	   *mergeclauses;
 	List	   *innersortkeys;
@@ -1150,7 +1524,9 @@ generate_mergejoin_paths(PlannerInfo *root,
 					   innersortkeys,
 					   jointype,
 					   extra,
-					   is_partial);
+					   is_partial,
+					   agg_kind,
+					   do_aggregate);
 
 	/* Can't do anything else if inner path needs to be unique'd */
 	if (save_jointype == JOIN_UNIQUE_INNER)
@@ -1247,7 +1623,9 @@ generate_mergejoin_paths(PlannerInfo *root,
 							   NIL,
 							   jointype,
 							   extra,
-							   is_partial);
+							   is_partial,
+							   agg_kind,
+							   do_aggregate);
 			cheapest_total_inner = innerpath;
 		}
 		/* Same on the basis of cheapest startup cost ... */
@@ -1291,7 +1669,9 @@ generate_mergejoin_paths(PlannerInfo *root,
 								   NIL,
 								   jointype,
 								   extra,
-								   is_partial);
+								   is_partial,
+								   agg_kind,
+								   do_aggregate);
 			}
 			cheapest_startup_inner = innerpath;
 		}
@@ -1333,7 +1713,9 @@ match_unsorted_outer(PlannerInfo *root,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 JoinType jointype,
-					 JoinPathExtraData *extra)
+					 JoinPathExtraData *extra,
+					 RelAggKind agg_kind,
+					 bool do_aggregate)
 {
 	JoinType	save_jointype = jointype;
 	bool		nestjoinOK;
@@ -1456,7 +1838,9 @@ match_unsorted_outer(PlannerInfo *root,
 							  inner_cheapest_total,
 							  merge_pathkeys,
 							  jointype,
-							  extra);
+							  extra,
+							  agg_kind,
+							  do_aggregate);
 		}
 		else if (nestjoinOK)
 		{
@@ -1478,7 +1862,9 @@ match_unsorted_outer(PlannerInfo *root,
 								  innerpath,
 								  merge_pathkeys,
 								  jointype,
-								  extra);
+								  extra,
+								  agg_kind,
+								  do_aggregate);
 			}
 
 			/* Also consider materialized form of the cheapest inner path */
@@ -1489,7 +1875,9 @@ match_unsorted_outer(PlannerInfo *root,
 								  matpath,
 								  merge_pathkeys,
 								  jointype,
-								  extra);
+								  extra,
+								  agg_kind,
+								  do_aggregate);
 		}
 
 		/* Can't do anything else if outer path needs to be unique'd */
@@ -1504,7 +1892,7 @@ match_unsorted_outer(PlannerInfo *root,
 		generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
 								 save_jointype, extra, useallclauses,
 								 inner_cheapest_total, merge_pathkeys,
-								 false);
+								 false, agg_kind, do_aggregate);
 	}
 
 	/*
@@ -1525,7 +1913,8 @@ match_unsorted_outer(PlannerInfo *root,
 	{
 		if (nestjoinOK)
 			consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
-									   save_jointype, extra);
+									   save_jointype, extra, agg_kind,
+									   do_aggregate);
 
 		/*
 		 * If inner_cheapest_total is NULL or non parallel-safe then find the
@@ -1545,7 +1934,9 @@ match_unsorted_outer(PlannerInfo *root,
 		if (inner_cheapest_total)
 			consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
 										save_jointype, extra,
-										inner_cheapest_total);
+										inner_cheapest_total,
+										agg_kind,
+										do_aggregate);
 	}
 }
 
@@ -1568,7 +1959,9 @@ consider_parallel_mergejoin(PlannerInfo *root,
 							RelOptInfo *innerrel,
 							JoinType jointype,
 							JoinPathExtraData *extra,
-							Path *inner_cheapest_total)
+							Path *inner_cheapest_total,
+							RelAggKind agg_kind,
+							bool do_aggregate)
 {
 	ListCell   *lc1;
 
@@ -1586,7 +1979,8 @@ consider_parallel_mergejoin(PlannerInfo *root,
 
 		generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
 								 extra, false, inner_cheapest_total,
-								 merge_pathkeys, true);
+								 merge_pathkeys, true, agg_kind,
+								 do_aggregate);
 	}
 }
 
@@ -1607,7 +2001,9 @@ consider_parallel_nestloop(PlannerInfo *root,
 						   RelOptInfo *outerrel,
 						   RelOptInfo *innerrel,
 						   JoinType jointype,
-						   JoinPathExtraData *extra)
+						   JoinPathExtraData *extra,
+						   RelAggKind agg_kind,
+						   bool do_aggregate)
 {
 	JoinType	save_jointype = jointype;
 	ListCell   *lc1;
@@ -1657,7 +2053,8 @@ consider_parallel_nestloop(PlannerInfo *root,
 			}
 
 			try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
-									  pathkeys, jointype, extra);
+									  pathkeys, jointype, extra, agg_kind,
+									  do_aggregate);
 		}
 	}
 }
@@ -1672,6 +2069,7 @@ consider_parallel_nestloop(PlannerInfo *root,
  * 'innerrel' is the inner join relation
  * 'jointype' is the type of join to do
  * 'extra' contains additional input values
+ * 'agg_info' tells if/how to apply partial aggregation to the output.
  */
 static void
 hash_inner_and_outer(PlannerInfo *root,
@@ -1679,7 +2077,9 @@ hash_inner_and_outer(PlannerInfo *root,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 JoinType jointype,
-					 JoinPathExtraData *extra)
+					 JoinPathExtraData *extra,
+					 RelAggKind agg_kind,
+					 bool do_aggregate)
 {
 	JoinType	save_jointype = jointype;
 	bool		isouterjoin = IS_OUTER_JOIN(jointype);
@@ -1754,7 +2154,9 @@ hash_inner_and_outer(PlannerInfo *root,
 							  cheapest_total_inner,
 							  hashclauses,
 							  jointype,
-							  extra);
+							  extra,
+							  agg_kind,
+							  do_aggregate);
 			/* no possibility of cheap startup here */
 		}
 		else if (jointype == JOIN_UNIQUE_INNER)
@@ -1770,7 +2172,9 @@ hash_inner_and_outer(PlannerInfo *root,
 							  cheapest_total_inner,
 							  hashclauses,
 							  jointype,
-							  extra);
+							  extra,
+							  agg_kind,
+							  do_aggregate);
 			if (cheapest_startup_outer != NULL &&
 				cheapest_startup_outer != cheapest_total_outer)
 				try_hashjoin_path(root,
@@ -1779,7 +2183,9 @@ hash_inner_and_outer(PlannerInfo *root,
 								  cheapest_total_inner,
 								  hashclauses,
 								  jointype,
-								  extra);
+								  extra,
+								  agg_kind,
+								  do_aggregate);
 		}
 		else
 		{
@@ -1800,7 +2206,9 @@ hash_inner_and_outer(PlannerInfo *root,
 								  cheapest_total_inner,
 								  hashclauses,
 								  jointype,
-								  extra);
+								  extra,
+								  agg_kind,
+								  do_aggregate);
 
 			foreach(lc1, outerrel->cheapest_parameterized_paths)
 			{
@@ -1834,7 +2242,9 @@ hash_inner_and_outer(PlannerInfo *root,
 									  innerpath,
 									  hashclauses,
 									  jointype,
-									  extra);
+									  extra,
+									  agg_kind,
+									  do_aggregate);
 				}
 			}
 		}
@@ -1877,7 +2287,9 @@ hash_inner_and_outer(PlannerInfo *root,
 										  cheapest_partial_outer,
 										  cheapest_partial_inner,
 										  hashclauses, jointype, extra,
-										  true /* parallel_hash */ );
+										  true /* parallel_hash */ ,
+										  agg_kind,
+										  do_aggregate);
 			}
 
 			/*
@@ -1898,7 +2310,9 @@ hash_inner_and_outer(PlannerInfo *root,
 										  cheapest_partial_outer,
 										  cheapest_safe_inner,
 										  hashclauses, jointype, extra,
-										  false /* parallel_hash */ );
+										  false /* parallel_hash */ ,
+										  agg_kind,
+										  do_aggregate);
 		}
 	}
 }
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 7008e1318e..78b1950a84 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -16,13 +16,16 @@
 
 #include "miscadmin.h"
 #include "optimizer/clauses.h"
+#include "optimizer/cost.h"
 #include "optimizer/joininfo.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/prep.h"
+#include "optimizer/tlist.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
+#include "utils/selfuncs.h"
 
 
 static void make_rels_by_clause_joins(PlannerInfo *root,
@@ -31,23 +34,36 @@ static void make_rels_by_clause_joins(PlannerInfo *root,
 static void make_rels_by_clauseless_joins(PlannerInfo *root,
 							  RelOptInfo *old_rel,
 							  ListCell *other_rels);
+static void set_grouped_joinrel_target(PlannerInfo *root, RelOptInfo *joinrel,
+						   RelOptInfo *rel1, RelOptInfo *rel2,
+						   SpecialJoinInfo *sjinfo, List *restrictlist,
+						   RelAggInfo *agg_info, RelAggKind agg_kind);
 static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
 static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel);
 static bool is_dummy_rel(RelOptInfo *rel);
 static bool restriction_is_constant_false(List *restrictlist,
 							  RelOptInfo *joinrel,
 							  bool only_pushed_down);
+static RelOptInfo *make_join_rel_common(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
+					 RelAggInfo *agg_info, RelAggKind agg_kind,
+					 bool do_aggregate);
+static void make_join_rel_common_grouped(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
+							 RelAggInfo *agg_info, RelAggKind agg_kind,
+							 bool do_aggregate);
 static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
 							RelOptInfo *rel2, RelOptInfo *joinrel,
-							SpecialJoinInfo *sjinfo, List *restrictlist);
-static void try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1,
-					   RelOptInfo *rel2, RelOptInfo *joinrel,
-					   SpecialJoinInfo *parent_sjinfo,
-					   List *parent_restrictlist);
+							SpecialJoinInfo *sjinfo, List *restrictlist,
+							RelAggKind agg_kind,
+							bool do_aggregate);
+static void try_partition_wise_join(PlannerInfo *root, RelOptInfo *rel1,
+						RelOptInfo *rel2, RelOptInfo *joinrel,
+						SpecialJoinInfo *parent_sjinfo,
+						List *parent_restrictlist,
+						RelAggKind agg_kind,
+						bool do_aggregate);
 static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel,
 							 bool strict_op);
 
-
 /*
  * join_search_one_level
  *	  Consider ways to produce join relations containing exactly 'level'
@@ -322,6 +338,63 @@ make_rels_by_clauseless_joins(PlannerInfo *root,
 	}
 }
 
+/*
+ * Set joinrel's reltarget according to agg_info and estimate the number of
+ * rows.
+ */
+static void
+set_grouped_joinrel_target(PlannerInfo *root, RelOptInfo *joinrel,
+						   RelOptInfo *rel1, RelOptInfo *rel2,
+						   SpecialJoinInfo *sjinfo, List *restrictlist,
+						   RelAggInfo *agg_info, RelAggKind agg_kind)
+{
+	PathTarget *target = NULL;
+
+	Assert(agg_info != NULL);
+
+	/*
+	 * build_join_rel() / build_child_join_rel() does not create the target
+	 * for grouped relation.
+	 */
+	Assert(joinrel->reltarget == NULL);
+	Assert(joinrel->agg_info == NULL);
+
+	if (agg_kind == REL_AGG_KIND_SIMPLE)
+		target = agg_info->target_simple;
+	else if (agg_kind == REL_AGG_KIND_PARTIAL)
+		target = agg_info->target_partial;
+	else
+		Assert(false);
+
+	/*
+	 * The output will actually be grouped, i.e. partially aggregated. No
+	 * additional processing needed.
+	 */
+	joinrel->reltarget = copy_pathtarget(target);
+
+	/*
+	 * The rest of agg_info will be needed at aggregation time.
+	 */
+	joinrel->agg_info = agg_info;
+
+	/*
+	 * Now that we have the target, compute the estimates.
+	 */
+	set_joinrel_size_estimates(root, joinrel, rel1, rel2, sjinfo,
+							   restrictlist);
+
+	/*
+	 * Grouping essentially changes the number of rows.
+	 *
+	 * XXX We do not distinguish whether two plain rels are joined and the
+	 * result is partially aggregated, or the partial aggregation has been
+	 * already applied to one of the input rels. Is this worth extra effort,
+	 * e.g. maintaining a separate RelOptInfo for each case (one difficulty
+	 * that would introduce is construction of AppendPath)?
+	 */
+	joinrel->rows = estimate_num_groups(root, joinrel->agg_info->group_exprs,
+										joinrel->rows, NULL);
+}
 
 /*
  * join_is_legal
@@ -651,32 +724,46 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 	return true;
 }
 
-
 /*
- * make_join_rel
+ * make_join_rel_common
  *	   Find or create a join RelOptInfo that represents the join of
  *	   the two given rels, and add to it path information for paths
  *	   created with the two rels as outer and inner rel.
  *	   (The join rel may already contain paths generated from other
  *	   pairs of rels that add up to the same set of base rels.)
  *
- * NB: will return NULL if attempted join is not valid.  This can happen
- * when working with outer joins, or with IN or EXISTS clauses that have been
- * turned into joins.
+ *	   'agg_info' contains the reltarget of grouped relation and everything we
+ *	   need to aggregate the join result. If NULL, then the join relation
+ *	   should not be grouped.
+ *
+ *	   'do_aggregate' tells that two non-grouped rels should be grouped and
+ *	   partial aggregation should be applied to all their paths.
+ *
+ * NB: will return NULL if attempted join is not valid.  This can happen when
+ * working with outer joins, or with IN or EXISTS clauses that have been
+ * turned into joins. NULL is also returned if caller is interested in a
+ * grouped relation but there's no useful grouped input relation.
  */
-RelOptInfo *
-make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
+static RelOptInfo *
+make_join_rel_common(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
+					 RelAggInfo *agg_info, RelAggKind agg_kind,
+					 bool do_aggregate)
 {
 	Relids		joinrelids;
 	SpecialJoinInfo *sjinfo;
 	bool		reversed;
 	SpecialJoinInfo sjinfo_data;
-	RelOptInfo *joinrel;
+	RelOptInfo *joinrel,
+			   *joinrel_plain;
 	List	   *restrictlist;
+	bool		grouped = agg_info != NULL;
 
 	/* We should never try to join two overlapping sets of rels. */
 	Assert(!bms_overlap(rel1->relids, rel2->relids));
 
+	/* do_aggregate implies the output to be grouped. */
+	Assert(agg_kind == REL_AGG_KIND_NONE || grouped);
+
 	/* Construct Relids set that identifies the joinrel. */
 	joinrelids = bms_union(rel1->relids, rel2->relids);
 
@@ -725,8 +812,68 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 	 * Find or build the join RelOptInfo, and compute the restrictlist that
 	 * goes with this particular joining.
 	 */
-	joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
-							 &restrictlist);
+	joinrel = joinrel_plain = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
+											 &restrictlist, false);
+
+	if (grouped)
+	{
+		/*
+		 * Make sure there's a grouped join relation.
+		 */
+		if (agg_kind == REL_AGG_KIND_PARTIAL)
+		{
+			if (joinrel->grouped->needs_final_agg == NULL)
+				joinrel->grouped->needs_final_agg = build_join_rel(root,
+																   joinrelids,
+																   rel1,
+																   rel2,
+																   sjinfo,
+																   &restrictlist,
+																   true);
+
+			/*
+			 * The grouped join is what we need to return.
+			 */
+			joinrel = joinrel->grouped->needs_final_agg;
+		}
+		else if (agg_kind == REL_AGG_KIND_SIMPLE)
+		{
+			if (joinrel->grouped->no_final_agg == NULL)
+				joinrel->grouped->no_final_agg = build_join_rel(root,
+																joinrelids,
+																rel1,
+																rel2,
+																sjinfo,
+																&restrictlist,
+																true);
+
+			/*
+			 * The grouped join is what we need to return.
+			 */
+			joinrel = joinrel->grouped->no_final_agg;
+		}
+		else
+			Assert(false);
+
+		/*
+		 * Make sure the grouped joinrel has reltarget initialized. Caller
+		 * should supply the target for group relation, so build_join_rel()
+		 * should have omitted its creation.
+		 *
+		 * The target can already be there if we already applied another
+		 * strategy to create grouped join.
+		 */
+		if (joinrel->reltarget == NULL)
+		{
+			set_grouped_joinrel_target(root, joinrel, rel1, rel2, sjinfo,
+									   restrictlist, agg_info, agg_kind);
+
+			if (rel1->consider_parallel && rel2->consider_parallel &&
+				is_parallel_safe(root, (Node *) restrictlist) &&
+				is_parallel_safe(root, (Node *) joinrel->reltarget->exprs))
+				joinrel->consider_parallel = true;
+		}
+	}
 
 	/*
 	 * If we've already proven this join is empty, we needn't consider any
@@ -738,15 +885,222 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 		return joinrel;
 	}
 
-	/* Add paths to the join relation. */
-	populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo,
-								restrictlist);
+	/*
+	 * Add paths to the join relation.
+	 *
+	 * Pass joinrel_plain and agg_kind instead of joinrel, since the function
+	 * needs agg_kind anyway.
+	 */
+	populate_joinrel_with_paths(root, rel1, rel2, joinrel_plain, sjinfo,
+								restrictlist, agg_kind, do_aggregate);
 
 	bms_free(joinrelids);
 
 	return joinrel;
 }
 
+static void
+make_join_rel_common_grouped(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
+							 RelAggInfo *agg_info, RelAggKind agg_kind,
+							 bool do_aggregate)
+{
+	RelOptInfo *rel1_grouped = NULL;
+	RelOptInfo *rel2_grouped = NULL;
+	bool		rel1_grouped_useful = false;
+	bool		rel2_grouped_useful = false;
+
+	/*
+	 * Retrieve the grouped relations.
+	 *
+	 * Dummy rel indicates join relation able to generate grouped paths as
+	 * such (i.e. it has valid agg_info), but for which the path actually
+	 * could not be created (e.g. only AGG_HASHED strategy was possible but
+	 * work_mem was not sufficient for hash table).
+	 */
+	if (agg_kind == REL_AGG_KIND_PARTIAL)
+	{
+		if (rel1->grouped && rel1->grouped->needs_final_agg)
+			rel1_grouped = rel1->grouped->needs_final_agg;
+
+		if (rel2->grouped && rel2->grouped->needs_final_agg)
+			rel2_grouped = rel2->grouped->needs_final_agg;
+	}
+	else if (agg_kind == REL_AGG_KIND_SIMPLE)
+	{
+		if (rel1->grouped && rel1->grouped->no_final_agg)
+			rel1_grouped = rel1->grouped->no_final_agg;
+
+		if (rel2->grouped && rel2->grouped->no_final_agg)
+			rel2_grouped = rel2->grouped->no_final_agg;
+	}
+	else
+		Assert(false);
+
+	rel1_grouped_useful = rel1_grouped != NULL && !IS_DUMMY_REL(rel1_grouped);
+	rel2_grouped_useful = rel2_grouped != NULL && !IS_DUMMY_REL(rel2_grouped);
+
+	/*
+	 * Nothing else to do?
+	 */
+	if (!rel1_grouped_useful && !rel2_grouped_useful)
+		return;
+
+	/*
+	 * At maximum one input rel can be grouped (here we don't care if any rel
+	 * is eventually dummy, the existence of grouped rel indicates that
+	 * aggregates can be pushed down to it). If both were grouped, then
+	 * grouping of one side would change the occurrence of the other side's
+	 * aggregate transient states on the input of the final aggregation. This
+	 * can be handled by adjusting the transient states, but it's not worth
+	 * the effort because it's hard to find a use case for this kind of join.
+	 *
+	 * XXX If the join of two grouped rels is implemented someday, note that
+	 * both rels can have aggregates, so it'd be hard to join grouped rel to
+	 * non-grouped here: 1) such a "mixed join" would require a special
+	 * target, 2) both AGGSPLIT_FINAL_DESERIAL and AGGSPLIT_SIMPLE aggregates
+	 * could appear in the target of the final aggregation node, originating
+	 * from the grouped and the non-grouped input rel respectively.
+	 */
+	if (rel1_grouped && rel2_grouped)
+		return;
+
+	if (agg_kind == REL_AGG_KIND_SIMPLE)
+	{
+		/*
+		 * TODO return only if the join can duplicate values of grouping key
+		 * generated by the grouped relation.
+		 */
+		return;
+	}
+
+	if (rel1_grouped_useful)
+		make_join_rel_common(root, rel1_grouped, rel2, agg_info, agg_kind,
+							 do_aggregate);
+	else if (rel2_grouped_useful)
+		make_join_rel_common(root, rel1, rel2_grouped, agg_info, agg_kind,
+							 do_aggregate);
+}
+
+/*
+ * Front-end to make_join_rel_common(). Generates plain (non-grouped) join and
+ * then uses all the possible strategies to generate the grouped one.
+ */
+RelOptInfo *
+make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
+{
+	Relids		joinrelids;
+	RelAggInfo *agg_info;
+	RelOptInfo *joinrel;
+	double		nrows_plain;
+	RelOptInfo *result;
+
+	/* 1) form the plain join. */
+	result = make_join_rel_common(root, rel1, rel2, NULL, REL_AGG_KIND_NONE,
+								  false);
+
+	if (result == NULL)
+		return result;
+
+	nrows_plain = result->rows;
+
+	/*
+	 * We're done if there are no grouping expressions nor aggregates.
+	 */
+	if (root->grouped_var_list == NIL)
+		return result;
+
+	/*
+	 * If the same joinrel was already formed, just with the base rels divided
+	 * between rel1 and rel2 in a different way, we might already have the
+	 * matching agg_info.
+	 */
+	joinrelids = bms_union(rel1->relids, rel2->relids);
+	joinrel = find_join_rel(root, joinrelids);
+
+	/*
+	 * At the moment we know that non-grouped join exists, so it should have
+	 * been fetched.
+	 */
+	Assert(joinrel != NULL);
+
+	if (joinrel->grouped != NULL)
+	{
+		/*
+		 * RelOptGrouped should always have valid needs_final_agg.
+		 *
+		 * XXX Should RelOptGrouped also have the agg_info pointer, to make
+		 * access to it more straightforward?
+		 */
+		Assert(joinrel->grouped->needs_final_agg != NULL);
+		Assert(joinrel->grouped->needs_final_agg->agg_info != NULL);
+
+		agg_info = joinrel->grouped->needs_final_agg->agg_info;
+	}
+	else
+	{
+		double		nrows;
+
+		/*
+		 * agg_info must be created from scratch.
+		 */
+		agg_info = create_rel_agg_info(root, result);
+
+		/*
+		 * Grouping essentially changes the number of rows.
+		 */
+		if (agg_info != NULL)
+		{
+			nrows = estimate_num_groups(root,
+										agg_info->group_exprs,
+										nrows_plain,
+										NULL);
+			agg_info->rows = clamp_row_est(nrows);
+		}
+	}
+
+	/*
+	 * Cannot we build grouped join?
+	 */
+	if (agg_info == NULL)
+		return result;
+
+	/*
+	 * 2) join two plain rels and aggregate the join paths.
+	 */
+	result->grouped = (RelOptGrouped *) palloc0(sizeof(RelOptGrouped));
+	result->grouped->needs_final_agg = make_join_rel_common(root, rel1, rel2,
+															agg_info,
+															REL_AGG_KIND_PARTIAL,
+															true);
+
+	/*
+	 * If the non-grouped join relation could be built, its aggregated form
+	 * should exist too.
+	 */
+	Assert(result->grouped->needs_final_agg != NULL);
+
+	/*
+	 * Similarly for no_final_agg.
+	 */
+	result->grouped->no_final_agg = make_join_rel_common(root, rel1, rel2,
+														 agg_info,
+														 REL_AGG_KIND_SIMPLE,
+														 true);
+	Assert(result->grouped->no_final_agg != NULL);
+
+
+	/*
+	 * 3) combine plain and grouped relations in order to create both
+	 * needs_final_agg and no_final_agg join relations.
+	 */
+	make_join_rel_common_grouped(root, rel1, rel2, agg_info,
+								 REL_AGG_KIND_PARTIAL, false);
+	make_join_rel_common_grouped(root, rel1, rel2, agg_info,
+								 REL_AGG_KIND_SIMPLE, false);
+
+	return result;
+}
+
 /*
  * populate_joinrel_with_paths
  *	  Add paths to the given joinrel for given pair of joining relations. The
@@ -757,8 +1111,26 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 static void
 populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
 							RelOptInfo *rel2, RelOptInfo *joinrel,
-							SpecialJoinInfo *sjinfo, List *restrictlist)
+							SpecialJoinInfo *sjinfo, List *restrictlist,
+							RelAggKind agg_kind, bool do_aggregate)
 {
+	RelOptInfo *joinrel_plain;
+
+	/*
+	 * joinrel_plain and agg_kind is passed to add_paths_to_joinrel() since it
+	 * needs agg_kind anyway.
+	 *
+	 * TODO As for the other uses, find out where joinrel can be used safely
+	 * instead of joinrel_plain, i.e. check that even grouped joinrel has all
+	 * the information needed.
+	 */
+	joinrel_plain = joinrel;
+
+	if (agg_kind == REL_AGG_KIND_PARTIAL)
+		joinrel = joinrel->grouped->needs_final_agg;
+	else if (agg_kind == REL_AGG_KIND_SIMPLE)
+		joinrel = joinrel->grouped->no_final_agg;
+
 	/*
 	 * Consider paths using each rel as both outer and inner.  Depending on
 	 * the join type, a provably empty outer or inner rel might mean the join
@@ -781,17 +1153,17 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
 	{
 		case JOIN_INNER:
 			if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
-				restriction_is_constant_false(restrictlist, joinrel, false))
+				restriction_is_constant_false(restrictlist, joinrel_plain, false))
 			{
 				mark_dummy_rel(joinrel);
 				break;
 			}
-			add_paths_to_joinrel(root, joinrel, rel1, rel2,
+			add_paths_to_joinrel(root, joinrel_plain, rel1, rel2,
 								 JOIN_INNER, sjinfo,
-								 restrictlist);
-			add_paths_to_joinrel(root, joinrel, rel2, rel1,
+								 restrictlist, agg_kind, do_aggregate);
+			add_paths_to_joinrel(root, joinrel_plain, rel2, rel1,
 								 JOIN_INNER, sjinfo,
-								 restrictlist);
+								 restrictlist, agg_kind, do_aggregate);
 			break;
 		case JOIN_LEFT:
 			if (is_dummy_rel(rel1) ||
@@ -800,29 +1172,29 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
 				mark_dummy_rel(joinrel);
 				break;
 			}
-			if (restriction_is_constant_false(restrictlist, joinrel, false) &&
+			if (restriction_is_constant_false(restrictlist, joinrel_plain, false) &&
 				bms_is_subset(rel2->relids, sjinfo->syn_righthand))
 				mark_dummy_rel(rel2);
-			add_paths_to_joinrel(root, joinrel, rel1, rel2,
+			add_paths_to_joinrel(root, joinrel_plain, rel1, rel2,
 								 JOIN_LEFT, sjinfo,
-								 restrictlist);
-			add_paths_to_joinrel(root, joinrel, rel2, rel1,
+								 restrictlist, agg_kind, do_aggregate);
+			add_paths_to_joinrel(root, joinrel_plain, rel2, rel1,
 								 JOIN_RIGHT, sjinfo,
-								 restrictlist);
+								 restrictlist, agg_kind, do_aggregate);
 			break;
 		case JOIN_FULL:
 			if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) ||
-				restriction_is_constant_false(restrictlist, joinrel, true))
+				restriction_is_constant_false(restrictlist, joinrel_plain, true))
 			{
 				mark_dummy_rel(joinrel);
 				break;
 			}
-			add_paths_to_joinrel(root, joinrel, rel1, rel2,
+			add_paths_to_joinrel(root, joinrel_plain, rel1, rel2,
 								 JOIN_FULL, sjinfo,
-								 restrictlist);
-			add_paths_to_joinrel(root, joinrel, rel2, rel1,
+								 restrictlist, agg_kind, do_aggregate);
+			add_paths_to_joinrel(root, joinrel_plain, rel2, rel1,
 								 JOIN_FULL, sjinfo,
-								 restrictlist);
+								 restrictlist, agg_kind, do_aggregate);
 
 			/*
 			 * If there are join quals that aren't mergeable or hashable, we
@@ -848,14 +1220,14 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
 				bms_is_subset(sjinfo->min_righthand, rel2->relids))
 			{
 				if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
-					restriction_is_constant_false(restrictlist, joinrel, false))
+					restriction_is_constant_false(restrictlist, joinrel_plain, false))
 				{
 					mark_dummy_rel(joinrel);
 					break;
 				}
-				add_paths_to_joinrel(root, joinrel, rel1, rel2,
+				add_paths_to_joinrel(root, joinrel_plain, rel1, rel2,
 									 JOIN_SEMI, sjinfo,
-									 restrictlist);
+									 restrictlist, agg_kind, do_aggregate);
 			}
 
 			/*
@@ -871,32 +1243,32 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
 								   sjinfo) != NULL)
 			{
 				if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
-					restriction_is_constant_false(restrictlist, joinrel, false))
+					restriction_is_constant_false(restrictlist, joinrel_plain, false))
 				{
 					mark_dummy_rel(joinrel);
 					break;
 				}
-				add_paths_to_joinrel(root, joinrel, rel1, rel2,
+				add_paths_to_joinrel(root, joinrel_plain, rel1, rel2,
 									 JOIN_UNIQUE_INNER, sjinfo,
-									 restrictlist);
-				add_paths_to_joinrel(root, joinrel, rel2, rel1,
+									 restrictlist, agg_kind, do_aggregate);
+				add_paths_to_joinrel(root, joinrel_plain, rel2, rel1,
 									 JOIN_UNIQUE_OUTER, sjinfo,
-									 restrictlist);
+									 restrictlist, agg_kind, do_aggregate);
 			}
 			break;
 		case JOIN_ANTI:
 			if (is_dummy_rel(rel1) ||
-				restriction_is_constant_false(restrictlist, joinrel, true))
+				restriction_is_constant_false(restrictlist, joinrel_plain, true))
 			{
 				mark_dummy_rel(joinrel);
 				break;
 			}
-			if (restriction_is_constant_false(restrictlist, joinrel, false) &&
+			if (restriction_is_constant_false(restrictlist, joinrel_plain, false) &&
 				bms_is_subset(rel2->relids, sjinfo->syn_righthand))
 				mark_dummy_rel(rel2);
-			add_paths_to_joinrel(root, joinrel, rel1, rel2,
+			add_paths_to_joinrel(root, joinrel_plain, rel1, rel2,
 								 JOIN_ANTI, sjinfo,
-								 restrictlist);
+								 restrictlist, agg_kind, do_aggregate);
 			break;
 		default:
 			/* other values not expected here */
@@ -904,8 +1276,16 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
 			break;
 	}
 
-	/* Apply partitionwise join technique, if possible. */
-	try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist);
+	/*
+	 * TODO Only allow per-child AGGSPLIT_SIMPLE if the partitioning allows
+	 * it, i.e. each partition generates distinct set of grouping keys.
+	 */
+	if (agg_kind == REL_AGG_KIND_SIMPLE)
+		return;
+
+	/* Apply partition-wise join technique, if possible. */
+	try_partition_wise_join(root, rel1, rel2, joinrel_plain, sjinfo, restrictlist,
+							agg_kind, do_aggregate);
 }
 
 
@@ -1232,7 +1612,8 @@ mark_dummy_rel(RelOptInfo *rel)
 
 	/* Set up the dummy path */
 	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NULL,
-											  0, false, NIL, -1));
+											  0, false, NIL, -1,
+											  REL_AGG_KIND_NONE));
 
 	/* Set or update cheapest_total_path and related fields */
 	set_cheapest(rel);
@@ -1308,16 +1689,16 @@ restriction_is_constant_false(List *restrictlist,
  * obtained by translating the respective parent join structures.
  */
 static void
-try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
-					   RelOptInfo *joinrel, SpecialJoinInfo *parent_sjinfo,
-					   List *parent_restrictlist)
+try_partition_wise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
+						RelOptInfo *joinrel, SpecialJoinInfo *parent_sjinfo,
+						List *parent_restrictlist, RelAggKind agg_kind,
+						bool do_aggregate)
 {
 	int			nparts;
 	int			cnt_parts;
 
 	/* Guard against stack overflow due to overly deep partition hierarchy. */
 	check_stack_depth();
-
 	/* Nothing to do, if the join relation is not partitioned. */
 	if (!IS_PARTITIONED_REL(joinrel))
 		return;
@@ -1390,23 +1771,124 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 			(List *) adjust_appendrel_attrs(root,
 											(Node *) parent_restrictlist,
 											nappinfos, appinfos);
-		pfree(appinfos);
 
 		child_joinrel = joinrel->part_rels[cnt_parts];
 		if (!child_joinrel)
 		{
-			child_joinrel = build_child_join_rel(root, child_rel1, child_rel2,
-												 joinrel, child_restrictlist,
-												 child_sjinfo,
-												 child_sjinfo->jointype);
-			joinrel->part_rels[cnt_parts] = child_joinrel;
+			if (agg_kind == REL_AGG_KIND_NONE)
+				child_joinrel = build_child_join_rel(root, child_rel1, child_rel2,
+													 joinrel,
+													 child_restrictlist,
+													 child_sjinfo,
+													 child_sjinfo->jointype,
+													 false);
+			else
+			{
+				/*
+				 * The join should have been created when we were called with
+				 * REL_AGG_KIND_NONE.
+				 */
+				child_joinrel = find_join_rel(root, bms_union(child_rel1->relids,
+															  child_rel2->relids));
+				Assert(child_joinrel);
+			}
 		}
 
+		if (agg_kind != REL_AGG_KIND_NONE)
+		{
+			RelOptInfo *joinrel_grouped,
+					   *child_joinrel_grouped;
+			RelAggInfo *child_agg_info;
+
+			if (child_joinrel->grouped == NULL)
+				child_joinrel->grouped = (RelOptGrouped *) palloc0(sizeof(RelOptGrouped));
+
+			/*
+			 * Make sure there's a grouped join relation.
+			 */
+			if (agg_kind == REL_AGG_KIND_PARTIAL)
+			{
+				joinrel_grouped = joinrel->grouped->needs_final_agg;
+
+				if (child_joinrel->grouped->needs_final_agg == NULL)
+					child_joinrel->grouped->needs_final_agg =
+						build_child_join_rel(root,
+											 child_rel1,
+											 child_rel2,
+											 joinrel_grouped,
+											 child_restrictlist,
+											 child_sjinfo,
+											 child_sjinfo->jointype,
+											 true);
+
+				/*
+				 * The grouped join is what we need till the end of the
+				 * function.
+				 */
+				child_joinrel_grouped = child_joinrel->grouped->needs_final_agg;
+			}
+			else if (agg_kind == REL_AGG_KIND_SIMPLE)
+			{
+				joinrel_grouped = joinrel->grouped->no_final_agg;
+
+				if (child_joinrel->grouped->no_final_agg == NULL)
+					child_joinrel->grouped->no_final_agg =
+						build_child_join_rel(root, child_rel1, child_rel2,
+											 joinrel_grouped,
+											 child_restrictlist,
+											 child_sjinfo,
+											 child_sjinfo->jointype,
+											 true);
+
+				/*
+				 * The grouped join is what we need till the end of the
+				 * function.
+				 */
+				child_joinrel_grouped = child_joinrel->grouped->no_final_agg;
+			}
+			else
+				Assert(false);
+
+			/*
+			 * Make sure the child_joinrel has reltarget initialized.
+			 *
+			 * Although build_child_join_rel() creates reltarget for each
+			 * child join from scratch as opposed to translating the parent
+			 * reltarget (XXX set_append_rel_size() uses the translation ---
+			 * is this inconsistency justified?), we just translate the parent
+			 * reltarget here. Per-child call of create_rel_agg_info() would
+			 * introduce too much duplicate work because it needs the *parent*
+			 * target as a source and that one is identical for all the child
+			 * joins
+			 */
+			child_agg_info = translate_rel_agg_info(root,
+													joinrel_grouped->agg_info,
+													appinfos, nappinfos);
+
+			/*
+			 * Make sure the child joinrel has reltarget initialized.
+			 */
+			if (child_joinrel_grouped->reltarget == NULL)
+			{
+				set_grouped_joinrel_target(root, child_joinrel_grouped, rel1, rel2,
+										   child_sjinfo, child_restrictlist,
+										   child_agg_info, agg_kind);
+			}
+
+			joinrel_grouped->part_rels[cnt_parts] = child_joinrel_grouped;
+		}
+		else
+			joinrel->part_rels[cnt_parts] = child_joinrel;
+
+		pfree(appinfos);
+
 		Assert(bms_equal(child_joinrel->relids, child_joinrelids));
 
 		populate_joinrel_with_paths(root, child_rel1, child_rel2,
 									child_joinrel, child_sjinfo,
-									child_restrictlist);
+									child_restrictlist,
+									agg_kind,
+									do_aggregate);
 	}
 }
 
diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c
index 3bb5b8def6..0a0d22d427 100644
--- a/src/backend/optimizer/path/tidpath.c
+++ b/src/backend/optimizer/path/tidpath.c
@@ -250,10 +250,11 @@ TidQualFromBaseRestrictinfo(RelOptInfo *rel)
  *	  Candidate paths are added to the rel's pathlist (using add_path).
  */
 void
-create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
+create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel, RelAggKind agg_kind)
 {
 	Relids		required_outer;
 	List	   *tidquals;
+	Path	   *tidpath;
 
 	/*
 	 * We don't support pushing join clauses into the quals of a tidscan, but
@@ -263,8 +264,21 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
 	required_outer = rel->lateral_relids;
 
 	tidquals = TidQualFromBaseRestrictinfo(rel);
+	if (!tidquals)
+		return;
 
-	if (tidquals)
-		add_path(rel, (Path *) create_tidscan_path(root, rel, tidquals,
-												   required_outer));
+	tidpath = (Path *) create_tidscan_path(root, rel, tidquals,
+										   required_outer);
+
+	if (agg_kind == REL_AGG_KIND_NONE)
+		add_path(rel, tidpath);
+	else if (required_outer == NULL)
+	{
+		/*
+		 * Only AGG_HASHED is suitable here as it does not expect the input
+		 * set to be sorted.
+		 */
+		create_grouped_path(root, rel, tidpath, false, false, AGG_HASHED,
+							agg_kind);
+	}
 }
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index cf82b7052d..26dec922d8 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -831,6 +831,12 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags)
 		return false;
 
 	/*
+	 * Grouped relation's target list contains GroupedVars.
+	 */
+	if (rel->agg_info != NULL)
+		return false;
+
+	/*
 	 * If a bitmap scan's tlist is empty, keep it as-is.  This may allow the
 	 * executor to skip heap page fetches, and in any case, the benefit of
 	 * using a physical tlist instead would be minimal.
@@ -1639,7 +1645,8 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags)
 	 * therefore can't predict whether it will require an exact tlist. For
 	 * both of these reasons, we have to recheck here.
 	 */
-	if (use_physical_tlist(root, &best_path->path, flags))
+	if (!best_path->force_result &&
+		use_physical_tlist(root, &best_path->path, flags))
 	{
 		/*
 		 * Our caller doesn't really care what tlist we return, so we don't
@@ -1652,7 +1659,8 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags)
 			apply_pathtarget_labeling_to_tlist(tlist,
 											   best_path->path.pathtarget);
 	}
-	else if (is_projection_capable_path(best_path->subpath))
+	else if (!best_path->force_result &&
+			 is_projection_capable_path(best_path->subpath))
 	{
 		/*
 		 * Our caller requires that we return the exact tlist, but no separate
@@ -5929,6 +5937,21 @@ find_ec_member_for_tle(EquivalenceClass *ec,
 	while (tlexpr && IsA(tlexpr, RelabelType))
 		tlexpr = ((RelabelType *) tlexpr)->arg;
 
+	/*
+	 * GroupedVar can contain either non-Var grouping expression or aggregate.
+	 * The grouping expression might be useful for sorting, however aggregates
+	 * shouldn't currently appear among pathkeys.
+	 */
+	if (IsA(tlexpr, GroupedVar))
+	{
+		GroupedVar *gvar = castNode(GroupedVar, tlexpr);
+
+		if (!IsA(gvar->gvexpr, Aggref))
+			tlexpr = gvar->gvexpr;
+		else
+			return NULL;
+	}
+
 	foreach(lc, ec->ec_members)
 	{
 		EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 01335db511..0740e3f18d 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -14,6 +14,7 @@
  */
 #include "postgres.h"
 
+#include "access/sysattr.h"
 #include "catalog/pg_type.h"
 #include "catalog/pg_class.h"
 #include "nodes/nodeFuncs.h"
@@ -27,6 +28,7 @@
 #include "optimizer/planner.h"
 #include "optimizer/prep.h"
 #include "optimizer/restrictinfo.h"
+#include "optimizer/tlist.h"
 #include "optimizer/var.h"
 #include "parser/analyze.h"
 #include "rewrite/rewriteManip.h"
@@ -46,6 +48,10 @@ typedef struct PostponedQual
 } PostponedQual;
 
 
+static void create_aggregate_grouped_var_infos(PlannerInfo *root);
+static void create_grouping_expr_grouped_var_infos(PlannerInfo *root);
+static RelOptInfo *copy_simple_rel(PlannerInfo *root, RelOptInfo *rel,
+				RelAggKind agg_kind);
 static void extract_lateral_references(PlannerInfo *root, RelOptInfo *brel,
 						   Index rtindex);
 static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode,
@@ -96,10 +102,9 @@ static void check_hashjoinable(RestrictInfo *restrictinfo);
  * jtnode.  Internally, the function recurses through the jointree.
  *
  * At the end of this process, there should be one baserel RelOptInfo for
- * every non-join RTE that is used in the query.  Therefore, this routine
- * is the only place that should call build_simple_rel with reloptkind
- * RELOPT_BASEREL.  (Note: build_simple_rel recurses internally to build
- * "other rel" RelOptInfos for the members of any appendrels we find here.)
+ * every non-grouped non-join RTE that is used in the query. (Note:
+ * build_simple_rel recurses internally to build "other rel" RelOptInfos for
+ * the members of any appendrels we find here.)
  */
 void
 add_base_rels_to_query(PlannerInfo *root, Node *jtnode)
@@ -241,6 +246,456 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars,
 	}
 }
 
+/*
+ * Add GroupedVarInfo to grouped_var_list for each aggregate as well as for
+ * each possible grouping expression and setup RelOptInfo for each base or
+ * 'other' relation that can product grouped paths.
+ *
+ * Note that targets of the 'other' relations are not set here ---
+ * set_append_rel_size() will create them by translating the targets of the
+ * base rel.
+ *
+ * root->group_pathkeys must be setup before this function is called.
+ */
+extern void
+add_grouped_base_rels_to_query(PlannerInfo *root)
+{
+	int			i;
+	ListCell   *lc;
+
+	/*
+	 * Isn't user interested in the aggregate push-down feature?
+	 */
+	if (!enable_agg_pushdown)
+		return;
+
+	/* No grouping in the query? */
+	if (!root->parse->groupClause)
+		return;
+
+	/*
+	 * Grouping sets require multiple different groupings but the base
+	 * relation can only generate one.
+	 */
+	if (root->parse->groupingSets)
+		return;
+
+	/*
+	 * SRF is not allowed in the aggregate argument and we don't even want it
+	 * in the GROUP BY clause, so forbid it in general. It needs to be
+	 * analyzed if evaluation of a GROUP BY clause containing SRF below the
+	 * query targetlist would be correct. Currently it does not seem to be an
+	 * important use case.
+	 */
+	if (root->parse->hasTargetSRFs)
+		return;
+
+	/*
+	 * TODO Consider if this is a real limitation.
+	 */
+	if (root->parse->hasWindowFuncs)
+		return;
+
+	/* Create GroupedVarInfo per (distinct) aggregate. */
+	create_aggregate_grouped_var_infos(root);
+
+	/* Isn't there any aggregate to be pushed down? */
+	if (root->grouped_var_list == NIL)
+		return;
+
+	/* Create GroupedVarInfo per grouping expression. */
+	create_grouping_expr_grouped_var_infos(root);
+
+	/*
+	 * Are all the aggregates AGGSPLIT_SIMPLE?
+	 */
+	if (root->grouped_var_list == NIL)
+		return;
+
+	/*
+	 * Now that we know that grouping can be pushed down, search for the
+	 * maximum sortgroupref. The base relations may need it if extra grouping
+	 * expressions get added to them.
+	 */
+	Assert(root->max_sortgroupref == 0);
+	foreach(lc, root->processed_tlist)
+	{
+		TargetEntry *te = lfirst_node(TargetEntry, lc);
+
+		if (te->ressortgroupref > root->max_sortgroupref)
+			root->max_sortgroupref = te->ressortgroupref;
+	}
+
+	/* Process the individual base relations. */
+	for (i = 1; i < root->simple_rel_array_size; i++)
+	{
+		RelOptInfo *rel = root->simple_rel_array[i];
+		RangeTblEntry *rte;
+		RelAggInfo *agg_info;
+
+		/* NULL should mean a join relation. */
+		if (rel == NULL)
+			continue;
+
+		/*
+		 * Not all RTE kinds are supported when grouping is considered.
+		 *
+		 * TODO Consider relaxing some of these restrictions.
+		 */
+		rte = root->simple_rte_array[rel->relid];
+		if (rte->rtekind != RTE_RELATION ||
+			rte->relkind == RELKIND_FOREIGN_TABLE ||
+			rte->tablesample != NULL)
+			return;
+
+		/*
+		 * Grouped "other member rels" should not be created until we know
+		 * whether the parent can be grouped, i.e. until the parent has
+		 * rel->agg_info initialized.
+		 */
+		if (rel->reloptkind != RELOPT_BASEREL)
+			continue;
+
+		/*
+		 * Retrieve the information we need for aggregation of the rel
+		 * contents.
+		 */
+		Assert(rel->agg_info == NULL);
+		agg_info = create_rel_agg_info(root, rel);
+		if (agg_info == NULL)
+			continue;
+
+		/*
+		 * Create the grouped counterpart of "rel". This may includes the
+		 * "other member rels" rejected above, if they're children of this
+		 * rel. (The child rels will have their ->target and ->agg_info
+		 * initialized later by set_append_rel_size()).
+		 */
+		Assert(rel->agg_info == NULL);
+		Assert(rel->grouped == NULL);
+		rel->grouped = (RelOptGrouped *) palloc0(sizeof(RelOptGrouped));
+		rel->grouped->needs_final_agg = copy_simple_rel(root, rel,
+														REL_AGG_KIND_PARTIAL);
+		rel->grouped->no_final_agg = copy_simple_rel(root, rel,
+													 REL_AGG_KIND_SIMPLE);
+
+		/*
+		 * Assign it the aggregation-specific info.
+		 *
+		 * The aggregation paths will get their input target from agg_info, so
+		 * store it too.
+		 */
+		rel->grouped->needs_final_agg->reltarget = agg_info->target_partial;
+		rel->grouped->needs_final_agg->agg_info = agg_info;
+
+		rel->grouped->no_final_agg->reltarget = agg_info->target_simple;
+		rel->grouped->no_final_agg->agg_info = agg_info;
+	}
+}
+
+/*
+ * Create GroupedVarInfo for each distinct aggregate.
+ *
+ * If any aggregate is not suitable, set root->grouped_var_list to NIL and
+ * return.
+ */
+static void
+create_aggregate_grouped_var_infos(PlannerInfo *root)
+{
+	List	   *tlist_exprs;
+	ListCell   *lc;
+
+	Assert(root->grouped_var_list == NIL);
+
+	tlist_exprs = pull_var_clause((Node *) root->processed_tlist,
+								  PVC_INCLUDE_AGGREGATES);
+
+	/*
+	 * Although GroupingFunc is related to root->parse->groupingSets, this
+	 * field does not necessarily reflect its presence.
+	 */
+	foreach(lc, tlist_exprs)
+	{
+		Expr	   *expr = (Expr *) lfirst(lc);
+
+		if (IsA(expr, GroupingFunc))
+			return;
+	}
+
+	/*
+	 * Aggregates within the HAVING clause need to be processed in the same
+	 * way as those in the main targetlist.
+	 */
+	if (root->parse->havingQual != NULL)
+	{
+		List	   *having_exprs;
+
+		having_exprs = pull_var_clause((Node *) root->parse->havingQual,
+									   PVC_INCLUDE_AGGREGATES);
+		if (having_exprs != NIL)
+			tlist_exprs = list_concat(tlist_exprs, having_exprs);
+	}
+
+	if (tlist_exprs == NIL)
+		return;
+
+	/* tlist_exprs may also contain Vars, but we only need Aggrefs. */
+	foreach(lc, tlist_exprs)
+	{
+		Expr	   *expr = (Expr *) lfirst(lc);
+		Aggref	   *aggref;
+		ListCell   *lc2;
+		GroupedVarInfo *gvi;
+		bool		exists;
+
+		if (IsA(expr, Var))
+			continue;
+
+		aggref = castNode(Aggref, expr);
+
+		/* TODO Think if (some of) these can be handled. */
+		if (aggref->aggvariadic ||
+			aggref->aggdirectargs || aggref->aggorder ||
+			aggref->aggdistinct || aggref->aggfilter)
+		{
+			/*
+			 * Partial aggregation is not useful if at least one aggregate
+			 * cannot be evaluated below the top-level join.
+			 *
+			 * XXX Is it worth freeing the GroupedVarInfos and their subtrees?
+			 */
+			root->grouped_var_list = NIL;
+			break;
+		}
+
+		/*
+		 * Aggregation push-down does not work w/o aggcombinefn. This field is
+		 * not mandatory, so check if this particular aggregate can handle
+		 * partial aggregation.
+		 */
+		if (!OidIsValid(aggref->aggcombinefn))
+		{
+			root->grouped_var_list = NIL;
+			break;
+		}
+
+		/* Does GroupedVarInfo for this aggregate already exist? */
+		exists = false;
+		foreach(lc2, root->grouped_var_list)
+		{
+			gvi = lfirst_node(GroupedVarInfo, lc2);
+
+			if (equal(expr, gvi->gvexpr))
+			{
+				exists = true;
+				break;
+			}
+		}
+
+		/* Construct a new GroupedVarInfo if does not exist yet. */
+		if (!exists)
+		{
+			Relids		relids;
+
+			gvi = makeNode(GroupedVarInfo);
+			gvi->gvid = list_length(root->grouped_var_list);
+			gvi->gvexpr = (Expr *) copyObject(aggref);
+			gvi->agg_partial = copyObject(aggref);
+			mark_partial_aggref(gvi->agg_partial, AGGSPLIT_INITIAL_SERIAL);
+
+			/* Find out where the aggregate should be evaluated. */
+			relids = pull_varnos((Node *) aggref);
+			if (!bms_is_empty(relids))
+				gvi->gv_eval_at = relids;
+			else
+				gvi->gv_eval_at = NULL;
+
+			root->grouped_var_list = lappend(root->grouped_var_list, gvi);
+		}
+	}
+
+	list_free(tlist_exprs);
+}
+
+/*
+ * Create GroupedVarInfo for each expression usable as grouping key.
+ *
+ * In addition to the expressions of the query targetlist, group_pathkeys is
+ * also considered the source of grouping expressions. That increases the
+ * chance to get the relation output grouped.
+ */
+static void
+create_grouping_expr_grouped_var_infos(PlannerInfo *root)
+{
+	ListCell   *l1,
+			   *l2;
+	List	   *exprs = NIL;
+	List	   *sortgrouprefs = NIL;
+
+	/*
+	 * Make sure GroupedVarInfo exists for each expression usable as grouping
+	 * key.
+	 */
+	foreach(l1, root->parse->groupClause)
+	{
+		SortGroupClause *sgClause;
+		TargetEntry *te;
+		Index		sortgroupref;
+
+		sgClause = lfirst_node(SortGroupClause, l1);
+		te = get_sortgroupclause_tle(sgClause, root->processed_tlist);
+		sortgroupref = te->ressortgroupref;
+
+		if (sortgroupref == 0)
+			continue;
+
+		/*
+		 * Non-zero sortgroupref does not necessarily imply grouping
+		 * expression: data can also be sorted by aggregate.
+		 */
+		if (IsA(te->expr, Aggref))
+			continue;
+
+		exprs = lappend(exprs, te->expr);
+		sortgrouprefs = lappend_int(sortgrouprefs, sortgroupref);
+	}
+
+	/*
+	 * Construct GroupedVarInfo for each expression.
+	 */
+	forboth(l1, exprs, l2, sortgrouprefs)
+	{
+		Expr	   *expr = (Expr *) lfirst(l1);
+		int			sortgroupref = lfirst_int(l2);
+		GroupedVarInfo *gvi = makeNode(GroupedVarInfo);
+
+		gvi->gvid = list_length(root->grouped_var_list);
+		gvi->gvexpr = (Expr *) copyObject(expr);
+		gvi->sortgroupref = sortgroupref;
+
+		/* Find out where the expression should be evaluated. */
+		gvi->gv_eval_at = pull_varnos((Node *) expr);
+
+		root->grouped_var_list = lappend(root->grouped_var_list, gvi);
+	}
+}
+
+/*
+ * Take a flat copy of already initialized RelOptInfo and process child rels
+ * recursively.
+ *
+ * Flat copy ensures that we do not miss any information that the non-grouped
+ * rel already contains. XXX Do we need to copy any Node field?
+ *
+ * Two calls are expected per relation: the first with agg_kind equal to
+ * REL_AGG_KIND_PARTIAL, the second with REL_AGG_KIND_SIMPLE.
+ *
+ * TODO The function only produces grouped rels, the name should reflect it
+ * (create_grouped_rel() ?).
+ */
+static RelOptInfo *
+copy_simple_rel(PlannerInfo *root, RelOptInfo *rel, RelAggKind agg_kind)
+{
+	Index		relid = rel->relid;
+	RangeTblEntry *rte;
+	ListCell   *l;
+	List	   *indexlist = NIL;
+	RelOptInfo *result;
+
+	result = makeNode(RelOptInfo);
+	memcpy(result, rel, sizeof(RelOptInfo));
+
+	/*
+	 * The new relation is grouped itself.
+	 */
+	result->grouped = NULL;
+
+	/*
+	 * The target to generate aggregation input will be initialized later.
+	 */
+	result->reltarget = NULL;
+
+	/*
+	 * Make sure that index paths have access to the parent rel's agg_info,
+	 * which is used to indicate that the rel should produce grouped paths.
+	 */
+	foreach(l, result->indexlist)
+	{
+		IndexOptInfo *src,
+				   *dst;
+
+		src = lfirst_node(IndexOptInfo, l);
+		dst = makeNode(IndexOptInfo);
+		memcpy(dst, src, sizeof(IndexOptInfo));
+
+		dst->rel = result;
+		indexlist = lappend(indexlist, dst);
+	}
+	result->indexlist = indexlist;
+
+	/*
+	 * This is very similar to child rel processing in build_simple_rel().
+	 */
+	rte = root->simple_rte_array[relid];
+	if (rte->inh)
+	{
+		int			nparts = rel->nparts;
+		int			cnt_parts = 0;
+
+		if (nparts > 0)
+			result->part_rels = (RelOptInfo **)
+				palloc(sizeof(RelOptInfo *) * nparts);
+
+		foreach(l, root->append_rel_list)
+		{
+			AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
+			RelOptInfo *childrel;
+
+			/* append_rel_list contains all append rels; ignore others */
+			if (appinfo->parent_relid != relid)
+				continue;
+
+			/*
+			 * The non-grouped child rel must already exist.
+			 */
+			childrel = root->simple_rel_array[appinfo->child_relid];
+			Assert(childrel != NULL);
+
+			/*
+			 * Create the copies.
+			 */
+			Assert(childrel->agg_info == NULL);
+			if (agg_kind == REL_AGG_KIND_PARTIAL)
+			{
+				Assert(childrel->grouped == NULL);
+
+				childrel->grouped = (RelOptGrouped *) palloc0(sizeof(RelOptGrouped));
+				childrel->grouped->needs_final_agg = copy_simple_rel(root, childrel, agg_kind);
+			}
+			else if (agg_kind == REL_AGG_KIND_SIMPLE)
+			{
+				Assert(childrel->grouped != NULL);
+				Assert(childrel->grouped->no_final_agg == NULL);
+				childrel->grouped->no_final_agg = copy_simple_rel(root, childrel, agg_kind);
+			}
+			else
+				Assert(false);
+
+			/* Nothing more to do for an unpartitioned table. */
+			if (!rel->part_scheme)
+				continue;
+
+			Assert(cnt_parts < nparts);
+			result->part_rels[cnt_parts] = childrel;
+			cnt_parts++;
+		}
+
+		/* We should have seen all the child partitions. */
+		Assert(cnt_parts == nparts);
+	}
+
+	return result;
+}
 
 /*****************************************************************************
  *
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index b05adc70c4..0ca5d6ea0b 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -43,6 +43,8 @@
  *		(this is NOT necessarily root->parse->targetList!)
  * qp_callback is a function to compute query_pathkeys once it's safe to do so
  * qp_extra is optional extra data to pass to qp_callback
+ * *partially_grouped may receive relation that contains partial aggregate
+ *  anywhere in the join tree.
  *
  * Note: the PlannerInfo node also includes a query_pathkeys field, which
  * tells query_planner the sort order that is desired in the final output
@@ -66,6 +68,8 @@ query_planner(PlannerInfo *root, List *tlist,
 	 */
 	if (parse->jointree->fromlist == NIL)
 	{
+		RelOptInfo *final_rel;
+
 		/* We need a dummy joinrel to describe the empty set of baserels */
 		final_rel = build_empty_join_rel(root);
 
@@ -114,6 +118,7 @@ query_planner(PlannerInfo *root, List *tlist,
 	root->full_join_clauses = NIL;
 	root->join_info_list = NIL;
 	root->placeholder_list = NIL;
+	root->grouped_var_list = NIL;
 	root->fkey_list = NIL;
 	root->initial_rels = NIL;
 
@@ -232,6 +237,16 @@ query_planner(PlannerInfo *root, List *tlist,
 	extract_restriction_or_clauses(root);
 
 	/*
+	 * If the query result can be grouped, check if any grouping can be
+	 * performed below the top-level join. If so, setup root->grouped_var_list
+	 * and create RelOptInfo for base relations capable to do the grouping.
+	 *
+	 * The base relations should be fully initialized now, so that we have
+	 * enough info to decide whether grouping is possible.
+	 */
+	add_grouped_base_rels_to_query(root);
+
+	/*
 	 * We should now have size estimates for every actual table involved in
 	 * the query, and we also know which if any have been deleted from the
 	 * query by join removal; so we can compute total_table_pages.
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index fd45c9767d..da8ac3c2d1 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -133,9 +133,6 @@ static double get_number_of_groups(PlannerInfo *root,
 					 double path_rows,
 					 grouping_sets_data *gd,
 					 List *target_list);
-static Size estimate_hashagg_tablesize(Path *path,
-						   const AggClauseCosts *agg_costs,
-						   double dNumGroups);
 static RelOptInfo *create_grouping_paths(PlannerInfo *root,
 					  RelOptInfo *input_rel,
 					  PathTarget *target,
@@ -2044,6 +2041,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 												grouping_target_parallel_safe,
 												&agg_costs,
 												gset_data);
+
 			/* Fix things up if grouping_target contains SRFs */
 			if (parse->hasTargetSRFs)
 				adjust_paths_for_srfs(root, current_rel,
@@ -3640,40 +3638,6 @@ get_number_of_groups(PlannerInfo *root,
 }
 
 /*
- * estimate_hashagg_tablesize
- *	  estimate the number of bytes that a hash aggregate hashtable will
- *	  require based on the agg_costs, path width and dNumGroups.
- *
- * XXX this may be over-estimating the size now that hashagg knows to omit
- * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
- * grouping columns not in the hashed set are counted here even though hashagg
- * won't store them. Is this a problem?
- */
-static Size
-estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
-						   double dNumGroups)
-{
-	Size		hashentrysize;
-
-	/* Estimate per-hash-entry space at tuple width... */
-	hashentrysize = MAXALIGN(path->pathtarget->width) +
-		MAXALIGN(SizeofMinimalTupleHeader);
-
-	/* plus space for pass-by-ref transition values... */
-	hashentrysize += agg_costs->transitionSpace;
-	/* plus the per-hash-entry overhead */
-	hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
-
-	/*
-	 * Note that this disregards the effect of fill-factor and growth policy
-	 * of the hash-table. That's probably ok, given default the default
-	 * fill-factor is relatively high. It'd be hard to meaningfully factor in
-	 * "double-in-size" growth policies here.
-	 */
-	return hashentrysize * dNumGroups;
-}
-
-/*
  * create_grouping_paths
  *
  * Build a new upperrel containing Paths for grouping and/or aggregation.
@@ -3720,6 +3684,7 @@ create_grouping_paths(PlannerInfo *root,
 	{
 		int			flags = 0;
 		GroupPathExtraData extra;
+		List	   *agg_pushdown_paths = NIL;
 
 		/*
 		 * Determine whether it's possible to perform sort-based
@@ -3787,6 +3752,39 @@ create_grouping_paths(PlannerInfo *root,
 		create_ordinary_grouping_paths(root, input_rel, grouped_rel,
 									   agg_costs, gd, &extra,
 									   &partially_grouped_rel);
+
+		/*
+		 * Process paths generated by aggregation push-down feature. These
+		 * have been produced due to REL_AGG_KIND_SIMPLE.
+		 */
+		if (input_rel->grouped && input_rel->grouped->no_final_agg)
+		{
+			RelOptInfo *agg_pushdown_rel;
+			ListCell   *lc;
+
+			agg_pushdown_rel = input_rel->grouped->no_final_agg;
+			agg_pushdown_paths = agg_pushdown_rel->pathlist;
+
+			/*
+			 * See create_grouped_path().
+			 */
+			Assert(agg_pushdown_rel->partial_pathlist == NIL);
+
+			foreach(lc, agg_pushdown_paths)
+			{
+				Path	   *path = (Path *) lfirst(lc);
+
+				/*
+				 * The REL_AGG_KIND_SIMPLE strategy currently turns append rel
+				 * into a dummy rel, see comment in set_append_rel_pathlist().
+				 * XXX Can we eliminate this situation earlier?
+				 */
+				if (IS_DUMMY_PATH(path))
+					continue;
+
+				add_path(grouped_rel, path);
+			}
+		}
 	}
 
 	set_cheapest(grouped_rel);
@@ -3912,7 +3910,8 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 							   0,
 							   false,
 							   NIL,
-							   -1);
+							   -1,
+							   REL_AGG_KIND_NONE);
 	}
 	else
 	{
@@ -3951,6 +3950,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 	RelOptInfo *partially_grouped_rel = NULL;
 	double		dNumGroups;
 	PartitionwiseAggregateType patype = PARTITIONWISE_AGGREGATE_NONE;
+	RelOptInfo *grouped_input_rel = NULL;
+	bool		agg_push_down_paths = false;
 
 	/*
 	 * If this is the topmost grouping relation or if the parent relation is
@@ -3983,20 +3984,39 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 	}
 
 	/*
+	 * Process paths generated due to aggregation push-down feature.  The
+	 * REL_AGG_KIND_SIMPLE option is responsible for these.
+	 */
+	if (input_rel->grouped)
+		grouped_input_rel = input_rel->grouped->needs_final_agg;
+
+	/*
 	 * Before generating paths for grouped_rel, we first generate any possible
 	 * partially grouped paths; that way, later code can easily consider both
 	 * parallel and non-parallel approaches to grouping.
+	 *
+	 * Partially grouped paths may also result from aggregation push-down.
 	 */
+	if (grouped_input_rel != NULL)
+	{
+		Assert(enable_agg_pushdown);
+
+		if (grouped_input_rel->partial_pathlist != NIL ||
+			grouped_input_rel->pathlist != NIL)
+			agg_push_down_paths = true;
+	}
+
 	if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
 	{
 		bool		force_rel_creation;
 
 		/*
-		 * If we're doing partitionwise aggregation at this level, force
-		 * creation of a partially_grouped_rel so we can add partitionwise
-		 * paths to it.
+		 * If we're doing partitionwise aggregation at this level or if
+		 * aggregation push-down took place, force creation of a
+		 * partially_grouped_rel so we can add the related paths to it.
 		 */
-		force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
+		force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL ||
+							  agg_push_down_paths);
 
 		partially_grouped_rel =
 			create_partial_grouping_paths(root,
@@ -4005,6 +4025,44 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 										  gd,
 										  extra,
 										  force_rel_creation);
+
+		/*
+		 * Process paths resulting from aggregate push-down if there are some.
+		 *
+		 * This works independent from the "partitionwise features".
+		 */
+		if (agg_push_down_paths &&
+			extra->patype == PARTITIONWISE_AGGREGATE_NONE)
+		{
+			ListCell   *lc;
+
+			/*
+			 * Gather the partial paths resulting from aggregation push-down
+			 * separate because they have different target: aggregates are
+			 * represented there by GroupedVars. The targets of Gather /
+			 * GatherMerge paths must take this into account.
+			 */
+			if (grouped_input_rel->partial_pathlist != NIL)
+				gather_grouping_paths(root, grouped_input_rel);
+
+
+			/*
+			 * If non-partial paths were generated above and / or the
+			 * aggregate push-down resulted in non-partial paths, just add
+			 * them all to partially_grouped_rel for common processing.
+			 *
+			 * The only difference is that the paths we add here have
+			 * GroupedVars in their pathtarget, while ones to be added to
+			 * pathlist of partially_grouped_rel above have Aggrefs. This
+			 * difference will be handled later by set_upper_references().
+			 */
+			foreach(lc, grouped_input_rel->pathlist)
+			{
+				Path	   *path = (Path *) lfirst(lc);
+
+				add_path(partially_grouped_rel, path);
+			}
+		}
 	}
 
 	/* Set out parameter. */
@@ -4029,10 +4087,14 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 
 	/* Gather any partially grouped partial paths. */
 	if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
-	{
 		gather_grouping_paths(root, partially_grouped_rel);
+
+	/*
+	 * The non-partial paths can come either from the Gather above or from
+	 * aggregate push-down.
+	 */
+	if (partially_grouped_rel && partially_grouped_rel->pathlist)
 		set_cheapest(partially_grouped_rel);
-	}
 
 	/*
 	 * Estimate number of groups.
@@ -6839,7 +6901,7 @@ apply_scanjoin_target_to_paths(PlannerInfo *root,
 		 */
 		rel->pathlist = list_make1(create_append_path(root, rel, NIL, NIL,
 													  NULL, 0, false, NIL,
-													  -1));
+													  -1, REL_AGG_KIND_NONE));
 		rel->partial_pathlist = NIL;
 		set_cheapest(rel);
 		Assert(IS_DUMMY_REL(rel));
@@ -6963,7 +7025,8 @@ apply_scanjoin_target_to_paths(PlannerInfo *root,
 
 		/* Build new paths for this relation by appending child paths. */
 		if (live_children != NIL)
-			add_paths_to_append_rel(root, rel, live_children);
+			add_paths_to_append_rel(root, rel, live_children,
+									REL_AGG_KIND_NONE);
 	}
 
 	/*
@@ -7122,8 +7185,15 @@ create_partitionwise_grouping_paths(PlannerInfo *root,
 	{
 		Assert(partially_grouped_live_children != NIL);
 
+		/*
+		 * This grouping is independent from the aggregate push-down feature,
+		 * which is the reason we pass REL_AGG_KIND_NONE.
+		 */
+		Assert(partially_grouped_rel->agg_info == NULL);
+
 		add_paths_to_append_rel(root, partially_grouped_rel,
-								partially_grouped_live_children);
+								partially_grouped_live_children,
+								REL_AGG_KIND_NONE);
 
 		/*
 		 * We need call set_cheapest, since the finalization step will use the
@@ -7138,7 +7208,12 @@ create_partitionwise_grouping_paths(PlannerInfo *root,
 	{
 		Assert(grouped_live_children != NIL);
 
-		add_paths_to_append_rel(root, grouped_rel, grouped_live_children);
+		/*
+		 * This grouping is independent from the aggregate push-down feature,
+		 * which is the reason we pass REL_AGG_KIND_NONE.
+		 */
+		add_paths_to_append_rel(root, grouped_rel, grouped_live_children,
+								REL_AGG_KIND_NONE);
 	}
 }
 
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 69dd327f0c..5f2105a682 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -40,6 +40,7 @@ typedef struct
 	List	   *tlist;			/* underlying target list */
 	int			num_vars;		/* number of plain Var tlist entries */
 	bool		has_ph_vars;	/* are there PlaceHolderVar entries? */
+	bool		has_grp_vars;	/* are there GroupedVar entries? */
 	bool		has_non_vars;	/* are there other entries? */
 	bool		has_conv_whole_rows;	/* are there ConvertRowtypeExpr
 										 * entries encapsulating a whole-row
@@ -1739,9 +1740,74 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset)
 	indexed_tlist *subplan_itlist;
 	List	   *output_targetlist;
 	ListCell   *l;
+	List	   *sub_tlist_save = NIL;
+
+	if (root->grouped_var_list != NIL)
+	{
+		if (IsA(plan, Agg))
+		{
+			Agg		   *agg = (Agg *) plan;
+
+			if (agg->aggsplit == AGGSPLIT_FINAL_DESERIAL)
+			{
+				/*
+				 * convert_combining_aggrefs could have replaced some vars
+				 * with Aggref expressions representing the partial
+				 * aggregation. We need to restore the same Aggrefs in the
+				 * subplan targetlist, but this would break the subplan if
+				 * it's something else than the partial aggregation (i.e. the
+				 * partial aggregation takes place lower in the plan tree). So
+				 * we'll eventually need to restore the current
+				 * subplan->targetlist.
+				 */
+				if (!IsA(subplan, Agg))
+					sub_tlist_save = subplan->targetlist;
+#ifdef USE_ASSERT_CHECKING
+				else
+					Assert(((Agg *) subplan)->aggsplit == AGGSPLIT_INITIAL_SERIAL);
+#endif							/* USE_ASSERT_CHECKING */
+
+				/*
+				 * Restore the aggregate expressions that we might have
+				 * removed when planning for aggregation at base relation
+				 * level.
+				 */
+				subplan->targetlist =
+					replace_grouped_vars_with_aggrefs(root, subplan->targetlist);
+			}
+			else if (agg->aggsplit == AGGSPLIT_SIMPLE)
+			{
+				/*
+				 * Similarly, process paths generated due to
+				 * REL_AGG_KIND_SIMPLE.
+				 */
+				Assert(!IsA(subplan, Agg));
+
+				sub_tlist_save = subplan->targetlist;
+				subplan->targetlist =
+					replace_grouped_vars_with_aggrefs(root, subplan->targetlist);
+			}
+		}
+		else if (IsA(plan, Result))
+		{
+			/*
+			 * Result can contain Aggrefs that we need to convert.
+			 */
+			sub_tlist_save = subplan->targetlist;
+			subplan->targetlist =
+				replace_grouped_vars_with_aggrefs(root, subplan->targetlist);
+		}
+	}
 
 	subplan_itlist = build_tlist_index(subplan->targetlist);
 
+	/*
+	 * The replacement of GroupVars by Aggrefs was only needed for the index
+	 * build.
+	 */
+	if (sub_tlist_save != NIL)
+		subplan->targetlist = sub_tlist_save;
+
 	output_targetlist = NIL;
 	foreach(l, plan->targetlist)
 	{
@@ -1996,6 +2062,7 @@ build_tlist_index(List *tlist)
 
 	itlist->tlist = tlist;
 	itlist->has_ph_vars = false;
+	itlist->has_grp_vars = false;
 	itlist->has_non_vars = false;
 	itlist->has_conv_whole_rows = false;
 
@@ -2016,6 +2083,8 @@ build_tlist_index(List *tlist)
 		}
 		else if (tle->expr && IsA(tle->expr, PlaceHolderVar))
 			itlist->has_ph_vars = true;
+		else if (tle->expr && IsA(tle->expr, GroupedVar))
+			itlist->has_grp_vars = true;
 		else if (is_converted_whole_row_reference((Node *) tle->expr))
 			itlist->has_conv_whole_rows = true;
 		else
@@ -2299,6 +2368,31 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
 		/* No referent found for Var */
 		elog(ERROR, "variable not found in subplan target lists");
 	}
+	if (IsA(node, GroupedVar))
+	{
+		GroupedVar *gvar = (GroupedVar *) node;
+
+		/* See if the GroupedVar has bubbled up from a lower plan node */
+		if (context->outer_itlist && context->outer_itlist->has_grp_vars)
+		{
+			newvar = search_indexed_tlist_for_non_var((Expr *) gvar,
+													  context->outer_itlist,
+													  OUTER_VAR);
+			if (newvar)
+				return (Node *) newvar;
+		}
+		if (context->inner_itlist && context->inner_itlist->has_grp_vars)
+		{
+			newvar = search_indexed_tlist_for_non_var((Expr *) gvar,
+													  context->inner_itlist,
+													  INNER_VAR);
+			if (newvar)
+				return (Node *) newvar;
+		}
+
+		/* No referent found for GroupedVar */
+		elog(ERROR, "grouped variable not found in subplan target lists");
+	}
 	if (IsA(node, PlaceHolderVar))
 	{
 		PlaceHolderVar *phv = (PlaceHolderVar *) node;
@@ -2461,7 +2555,8 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context)
 		/* If no match, just fall through to process it normally */
 	}
 	/* Try matching more complex expressions too, if tlist has any */
-	if (context->subplan_itlist->has_non_vars ||
+	if (context->subplan_itlist->has_grp_vars ||
+		context->subplan_itlist->has_non_vars ||
 		(context->subplan_itlist->has_conv_whole_rows &&
 		 is_converted_whole_row_reference(node)))
 	{
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index c3f46a26c3..daf3118810 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -911,6 +911,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	memset(subroot->upper_rels, 0, sizeof(subroot->upper_rels));
 	memset(subroot->upper_targets, 0, sizeof(subroot->upper_targets));
 	subroot->processed_tlist = NIL;
+	subroot->max_sortgroupref = 0;
 	subroot->grouping_map = NULL;
 	subroot->minmax_aggs = NIL;
 	subroot->qual_security_level = 0;
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 7d75e1eda9..1d4452b57f 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -656,7 +656,8 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 	 * Append the child results together.
 	 */
 	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
-									   NULL, 0, false, NIL, -1);
+									   NULL, 0, false, NIL, -1,
+									   REL_AGG_KIND_NONE);
 
 	/*
 	 * For UNION ALL, we just need the Append path.  For UNION, need to add
@@ -712,7 +713,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 		ppath = (Path *)
 			create_append_path(root, result_rel, NIL, partial_pathlist,
 							   NULL, parallel_workers, enable_parallel_append,
-							   NIL, -1);
+							   NIL, -1, REL_AGG_KIND_NONE);
 		ppath = (Path *)
 			create_gather_path(root, result_rel, ppath,
 							   result_rel->reltarget, NULL, NULL);
@@ -822,7 +823,8 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 	 * Append the child results together.
 	 */
 	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
-									   NULL, 0, false, NIL, -1);
+									   NULL, 0, false, NIL, -1,
+									   REL_AGG_KIND_NONE);
 
 	/* Identify the grouping semantics */
 	groupList = generate_setop_grouplist(op, tlist);
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index dbf9adcdac..1b813ccc0a 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -27,6 +27,7 @@
 #include "optimizer/planmain.h"
 #include "optimizer/prep.h"
 #include "optimizer/restrictinfo.h"
+/* TODO Remove this if create_grouped_path ends up in another module. */
 #include "optimizer/tlist.h"
 #include "optimizer/var.h"
 #include "parser/parsetree.h"
@@ -57,7 +58,6 @@ static List *reparameterize_pathlist_by_child(PlannerInfo *root,
 								 List *pathlist,
 								 RelOptInfo *child_rel);
 
-
 /*****************************************************************************
  *		MISC. PATH UTILITIES
  *****************************************************************************/
@@ -243,6 +243,7 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
 void
 set_cheapest(RelOptInfo *parent_rel)
 {
+	bool		grouped = parent_rel->agg_info != NULL;
 	Path	   *cheapest_startup_path;
 	Path	   *cheapest_total_path;
 	Path	   *best_param_path;
@@ -252,7 +253,22 @@ set_cheapest(RelOptInfo *parent_rel)
 	Assert(IsA(parent_rel, RelOptInfo));
 
 	if (parent_rel->pathlist == NIL)
-		elog(ERROR, "could not devise a query plan for the given query");
+	{
+		if (!grouped)
+			elog(ERROR, "could not devise a query plan for the given query");
+		else
+		{
+			/*
+			 * Creation of grouped paths is not guaranteed. Currently this
+			 * happens if REL_AGG_KIND_SIMPLE is applied to append relation.
+			 */
+			if (IS_SIMPLE_REL(parent_rel) || IS_JOIN_REL(parent_rel))
+				mark_dummy_rel(parent_rel);
+			else
+				Assert(false);
+			return;
+		}
+	}
 
 	cheapest_startup_path = cheapest_total_path = best_param_path = NULL;
 	parameterized_paths = NIL;
@@ -955,10 +971,15 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
 					Relids required_outer, int parallel_workers)
 {
 	Path	   *pathnode = makeNode(Path);
+	bool		grouped = rel->agg_info != NULL;
 
 	pathnode->pathtype = T_SeqScan;
 	pathnode->parent = rel;
-	pathnode->pathtarget = rel->reltarget;
+	/* For grouped relation only generate the aggregation input. */
+	if (!grouped)
+		pathnode->pathtarget = rel->reltarget;
+	else
+		pathnode->pathtarget = rel->agg_info->input;
 	pathnode->param_info = get_baserel_parampathinfo(root, rel,
 													 required_outer);
 	pathnode->parallel_aware = parallel_workers > 0 ? true : false;
@@ -1038,10 +1059,15 @@ create_index_path(PlannerInfo *root,
 	RelOptInfo *rel = index->rel;
 	List	   *indexquals,
 			   *indexqualcols;
+	bool		grouped = rel->agg_info != NULL;
 
 	pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan;
 	pathnode->path.parent = rel;
-	pathnode->path.pathtarget = rel->reltarget;
+	/* For grouped relation only generate the aggregation input. */
+	if (!grouped)
+		pathnode->path.pathtarget = rel->reltarget;
+	else
+		pathnode->path.pathtarget = rel->agg_info->input;
 	pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
 														  required_outer);
 	pathnode->path.parallel_aware = false;
@@ -1189,10 +1215,15 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
 					Relids required_outer)
 {
 	TidPath    *pathnode = makeNode(TidPath);
+	bool		grouped = rel->agg_info != NULL;
 
 	pathnode->path.pathtype = T_TidScan;
 	pathnode->path.parent = rel;
-	pathnode->path.pathtarget = rel->reltarget;
+	/* For grouped relation only generate the aggregation input. */
+	if (!grouped)
+		pathnode->path.pathtarget = rel->reltarget;
+	else
+		pathnode->path.pathtarget = rel->agg_info->input;
 	pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
 														  required_outer);
 	pathnode->path.parallel_aware = false;
@@ -1221,7 +1252,8 @@ create_append_path(PlannerInfo *root,
 				   List *subpaths, List *partial_subpaths,
 				   Relids required_outer,
 				   int parallel_workers, bool parallel_aware,
-				   List *partitioned_rels, double rows)
+				   List *partitioned_rels, double rows,
+				   RelAggKind agg_kind)
 {
 	AppendPath *pathnode = makeNode(AppendPath);
 	ListCell   *l;
@@ -1229,8 +1261,24 @@ create_append_path(PlannerInfo *root,
 	Assert(!parallel_aware || parallel_workers > 0);
 
 	pathnode->path.pathtype = T_Append;
+
+	if (agg_kind == REL_AGG_KIND_NONE)
+		pathnode->path.pathtarget = rel->reltarget;
+	else
+	{
+		if (agg_kind == REL_AGG_KIND_SIMPLE)
+		{
+			rel = rel->grouped->no_final_agg;
+			pathnode->path.pathtarget = rel->agg_info->target_simple;
+		}
+		else if (agg_kind == REL_AGG_KIND_PARTIAL)
+		{
+			rel = rel->grouped->needs_final_agg;
+			pathnode->path.pathtarget = rel->agg_info->target_partial;
+		}
+	}
+
 	pathnode->path.parent = rel;
-	pathnode->path.pathtarget = rel->reltarget;
 
 	/*
 	 * When generating an Append path for a partitioned table, there may be
@@ -1341,11 +1389,13 @@ append_startup_cost_compare(const void *a, const void *b)
 /*
  * create_merge_append_path
  *	  Creates a path corresponding to a MergeAppend plan, returning the
- *	  pathnode.
+ *	  pathnode. target can be supplied by caller. If NULL is passed, the field
+ *	  is set to rel->reltarget.
  */
 MergeAppendPath *
 create_merge_append_path(PlannerInfo *root,
 						 RelOptInfo *rel,
+						 PathTarget *target,
 						 List *subpaths,
 						 List *pathkeys,
 						 Relids required_outer,
@@ -1358,7 +1408,7 @@ create_merge_append_path(PlannerInfo *root,
 
 	pathnode->path.pathtype = T_MergeAppend;
 	pathnode->path.parent = rel;
-	pathnode->path.pathtarget = rel->reltarget;
+	pathnode->path.pathtarget = target ? target : rel->reltarget;
 	pathnode->path.param_info = get_appendrel_parampathinfo(rel,
 															required_outer);
 	pathnode->path.parallel_aware = false;
@@ -1528,7 +1578,9 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	MemoryContext oldcontext;
 	int			numCols;
 
-	/* Caller made a mistake if subpath isn't cheapest_total ... */
+	/*
+	 * Caller made a mistake if subpath isn't cheapest_total.
+	 */
 	Assert(subpath == rel->cheapest_total_path);
 	Assert(subpath->parent == rel);
 	/* ... or if SpecialJoinInfo is the wrong one */
@@ -2149,6 +2201,7 @@ calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
  *	  relations.
  *
  * 'joinrel' is the join relation.
+ * 'target' is the join path target
  * 'jointype' is the type of join required
  * 'workspace' is the result from initial_cost_nestloop
  * 'extra' contains various information about the join
@@ -2163,6 +2216,7 @@ calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
 NestPath *
 create_nestloop_path(PlannerInfo *root,
 					 RelOptInfo *joinrel,
+					 PathTarget *target,
 					 JoinType jointype,
 					 JoinCostWorkspace *workspace,
 					 JoinPathExtraData *extra,
@@ -2203,7 +2257,7 @@ create_nestloop_path(PlannerInfo *root,
 
 	pathnode->path.pathtype = T_NestLoop;
 	pathnode->path.parent = joinrel;
-	pathnode->path.pathtarget = joinrel->reltarget;
+	pathnode->path.pathtarget = target;
 	pathnode->path.param_info =
 		get_joinrel_parampathinfo(root,
 								  joinrel,
@@ -2235,6 +2289,7 @@ create_nestloop_path(PlannerInfo *root,
  *	  two relations
  *
  * 'joinrel' is the join relation
+ * 'target' is the join path target
  * 'jointype' is the type of join required
  * 'workspace' is the result from initial_cost_mergejoin
  * 'extra' contains various information about the join
@@ -2251,6 +2306,7 @@ create_nestloop_path(PlannerInfo *root,
 MergePath *
 create_mergejoin_path(PlannerInfo *root,
 					  RelOptInfo *joinrel,
+					  PathTarget *target,
 					  JoinType jointype,
 					  JoinCostWorkspace *workspace,
 					  JoinPathExtraData *extra,
@@ -2267,7 +2323,7 @@ create_mergejoin_path(PlannerInfo *root,
 
 	pathnode->jpath.path.pathtype = T_MergeJoin;
 	pathnode->jpath.path.parent = joinrel;
-	pathnode->jpath.path.pathtarget = joinrel->reltarget;
+	pathnode->jpath.path.pathtarget = target;
 	pathnode->jpath.path.param_info =
 		get_joinrel_parampathinfo(root,
 								  joinrel,
@@ -2303,6 +2359,7 @@ create_mergejoin_path(PlannerInfo *root,
  *	  Creates a pathnode corresponding to a hash join between two relations.
  *
  * 'joinrel' is the join relation
+ * 'target' is the join path target
  * 'jointype' is the type of join required
  * 'workspace' is the result from initial_cost_hashjoin
  * 'extra' contains various information about the join
@@ -2317,6 +2374,7 @@ create_mergejoin_path(PlannerInfo *root,
 HashPath *
 create_hashjoin_path(PlannerInfo *root,
 					 RelOptInfo *joinrel,
+					 PathTarget *target,
 					 JoinType jointype,
 					 JoinCostWorkspace *workspace,
 					 JoinPathExtraData *extra,
@@ -2331,7 +2389,7 @@ create_hashjoin_path(PlannerInfo *root,
 
 	pathnode->jpath.path.pathtype = T_HashJoin;
 	pathnode->jpath.path.parent = joinrel;
-	pathnode->jpath.path.pathtarget = joinrel->reltarget;
+	pathnode->jpath.path.pathtarget = target;
 	pathnode->jpath.path.param_info =
 		get_joinrel_parampathinfo(root,
 								  joinrel,
@@ -2413,8 +2471,8 @@ create_projection_path(PlannerInfo *root,
 	 * Note: in the latter case, create_projection_plan has to recheck our
 	 * conclusion; see comments therein.
 	 */
-	if (is_projection_capable_path(subpath) ||
-		equal(oldtarget->exprs, target->exprs))
+	if ((is_projection_capable_path(subpath) ||
+		 equal(oldtarget->exprs, target->exprs)))
 	{
 		/* No separate Result node needed */
 		pathnode->dummypp = true;
@@ -2799,8 +2857,7 @@ create_agg_path(PlannerInfo *root,
 	pathnode->path.pathtype = T_Agg;
 	pathnode->path.parent = rel;
 	pathnode->path.pathtarget = target;
-	/* For now, assume we are above any joins, so no parameterization */
-	pathnode->path.param_info = NULL;
+	pathnode->path.param_info = subpath->param_info;
 	pathnode->path.parallel_aware = false;
 	pathnode->path.parallel_safe = rel->consider_parallel &&
 		subpath->parallel_safe;
@@ -2833,6 +2890,188 @@ create_agg_path(PlannerInfo *root,
 }
 
 /*
+ * Apply AGG_SORTED aggregation path to subpath if it's suitably sorted.
+ *
+ * check_pathkeys can be passed FALSE if the function was already called for
+ * given index --- since the target should not change, we can skip the check
+ * of sorting during subsequent calls.
+ *
+ * agg_info contains both aggregate and grouping expressions.
+ *
+ * NULL is returned if sorting of subpath output is not suitable.
+ */
+AggPath *
+create_agg_sorted_path(PlannerInfo *root, Path *subpath,
+					   bool check_pathkeys, double input_rows,
+					   RelAggKind agg_kind)
+{
+	RelOptInfo *rel;
+	Node	   *agg_exprs;
+	AggSplit	aggsplit;
+	AggClauseCosts agg_costs;
+	PathTarget *target;
+	double		dNumGroups;
+	AggPath    *result = NULL;
+	RelAggInfo *agg_info;
+
+	rel = subpath->parent;
+	agg_info = rel->agg_info;
+	Assert(agg_info != NULL);
+
+	if (agg_kind == REL_AGG_KIND_SIMPLE)
+	{
+		aggsplit = AGGSPLIT_SIMPLE;
+		agg_exprs = (Node *) agg_info->agg_exprs_simple;
+		target = agg_info->target_simple;
+	}
+	else if (agg_kind == REL_AGG_KIND_PARTIAL)
+	{
+		aggsplit = AGGSPLIT_INITIAL_SERIAL;
+		agg_exprs = (Node *) agg_info->agg_exprs_partial;
+		target = agg_info->target_partial;
+	}
+	else
+		Assert(false);
+
+	if (subpath->pathkeys == NIL)
+		return NULL;
+
+	if (!grouping_is_sortable(root->parse->groupClause))
+		return NULL;
+
+	if (check_pathkeys)
+	{
+		ListCell   *lc1;
+		List	   *key_subset = NIL;
+
+		/*
+		 * Find all query pathkeys that our relation does affect.
+		 */
+		foreach(lc1, root->group_pathkeys)
+		{
+			PathKey    *gkey = castNode(PathKey, lfirst(lc1));
+			ListCell   *lc2;
+
+			foreach(lc2, subpath->pathkeys)
+			{
+				PathKey    *skey = castNode(PathKey, lfirst(lc2));
+
+				if (skey == gkey)
+				{
+					key_subset = lappend(key_subset, gkey);
+					break;
+				}
+			}
+		}
+
+		if (key_subset == NIL)
+			return NULL;
+
+		/* Check if AGG_SORTED is useful for the whole query.  */
+		if (!pathkeys_contained_in(key_subset, subpath->pathkeys))
+			return NULL;
+	}
+
+	MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
+	get_agg_clause_costs(root, (Node *) agg_exprs, aggsplit, &agg_costs);
+
+	Assert(agg_info->group_exprs != NIL);
+	dNumGroups = estimate_num_groups(root, agg_info->group_exprs,
+									 input_rows, NULL);
+
+	Assert(agg_info->group_clauses != NIL);
+	result = create_agg_path(root, rel, subpath, target,
+							 AGG_SORTED, aggsplit,
+							 agg_info->group_clauses, NIL, &agg_costs,
+							 dNumGroups);
+
+	return result;
+}
+
+/*
+ * Apply AGG_HASHED aggregation to subpath.
+ *
+ * Arguments have the same meaning as those of create_agg_sorted_path.
+ */
+AggPath *
+create_agg_hashed_path(PlannerInfo *root, Path *subpath,
+					   double input_rows, RelAggKind agg_kind)
+{
+	RelOptInfo *rel;
+	bool		can_hash;
+	Node	   *agg_exprs;
+	AggSplit	aggsplit;
+	AggClauseCosts agg_costs;
+	PathTarget *target;
+	double		dNumGroups;
+	Size		hashaggtablesize;
+	Query	   *parse = root->parse;
+	AggPath    *result = NULL;
+	RelAggInfo *agg_info;
+
+	rel = subpath->parent;
+	agg_info = rel->agg_info;
+	Assert(agg_info != NULL);
+
+	if (agg_kind == REL_AGG_KIND_SIMPLE)
+	{
+		aggsplit = AGGSPLIT_SIMPLE;
+		agg_exprs = (Node *) agg_info->agg_exprs_simple;
+		target = agg_info->target_simple;
+	}
+	else if (agg_kind == REL_AGG_KIND_PARTIAL)
+	{
+		aggsplit = AGGSPLIT_INITIAL_SERIAL;
+		agg_exprs = (Node *) agg_info->agg_exprs_partial;
+		target = agg_info->target_partial;
+	}
+	else
+		Assert(false);
+
+	MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
+	get_agg_clause_costs(root, agg_exprs, aggsplit, &agg_costs);
+
+	can_hash = (parse->groupClause != NIL &&
+				parse->groupingSets == NIL &&
+				agg_costs.numOrderedAggs == 0 &&
+				grouping_is_hashable(parse->groupClause));
+
+	if (can_hash)
+	{
+		Assert(agg_info->group_exprs != NIL);
+		dNumGroups = estimate_num_groups(root, agg_info->group_exprs,
+										 input_rows, NULL);
+
+		hashaggtablesize = estimate_hashagg_tablesize(subpath, &agg_costs,
+													  dNumGroups);
+
+		if (hashaggtablesize < work_mem * 1024L)
+		{
+			/*
+			 * Create the partial aggregation path.
+			 */
+			Assert(agg_info->group_clauses != NIL);
+
+			result = create_agg_path(root, rel, subpath,
+									 target,
+									 AGG_HASHED,
+									 aggsplit,
+									 agg_info->group_clauses, NIL,
+									 &agg_costs,
+									 dNumGroups);
+
+			/*
+			 * The agg path should require no fewer parameters than the plain
+			 * one.
+			 */
+			result->path.param_info = subpath->param_info;
+		}
+	}
+
+	return result;
+}
+
+/*
  * create_groupingsets_path
  *	  Creates a pathnode that represents performing GROUPING SETS aggregation
  *
@@ -3512,7 +3751,7 @@ create_limit_path(PlannerInfo *root, RelOptInfo *rel,
 Path *
 reparameterize_path(PlannerInfo *root, Path *path,
 					Relids required_outer,
-					double loop_count)
+					double loop_count, RelAggKind agg_kind)
 {
 	RelOptInfo *rel = path->parent;
 
@@ -3580,7 +3819,8 @@ reparameterize_path(PlannerInfo *root, Path *path,
 
 					spath = reparameterize_path(root, spath,
 												required_outer,
-												loop_count);
+												loop_count,
+												agg_kind);
 					if (spath == NULL)
 						return NULL;
 					/* We have to re-split the regular and partial paths */
@@ -3596,7 +3836,8 @@ reparameterize_path(PlannerInfo *root, Path *path,
 									   apath->path.parallel_workers,
 									   apath->path.parallel_aware,
 									   apath->partitioned_rels,
-									   -1);
+									   -1,
+									   agg_kind);
 			}
 		default:
 			break;
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index c69740eda6..114a3445db 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -17,6 +17,7 @@
 #include <limits.h>
 
 #include "miscadmin.h"
+#include "catalog/pg_constraint.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -26,6 +27,8 @@
 #include "optimizer/prep.h"
 #include "optimizer/restrictinfo.h"
 #include "optimizer/tlist.h"
+#include "optimizer/var.h"
+#include "parser/parse_oper.h"
 #include "partitioning/partbounds.h"
 #include "utils/hsearch.h"
 
@@ -57,6 +60,9 @@ static void add_join_rel(PlannerInfo *root, RelOptInfo *joinrel);
 static void build_joinrel_partition_info(RelOptInfo *joinrel,
 							 RelOptInfo *outer_rel, RelOptInfo *inner_rel,
 							 List *restrictlist, JoinType jointype);
+static void init_grouping_targets(PlannerInfo *root, RelOptInfo *rel,
+					  PathTarget *target, PathTarget *agg_input,
+					  List *gvis, List **group_exprs_extra_p);
 
 
 /*
@@ -72,7 +78,10 @@ setup_simple_rel_arrays(PlannerInfo *root)
 	/* Arrays are accessed using RT indexes (1..N) */
 	root->simple_rel_array_size = list_length(root->parse->rtable) + 1;
 
-	/* simple_rel_array is initialized to all NULLs */
+	/*
+	 * simple_rel_array / simple_grouped_rel_array are both initialized to all
+	 * NULLs
+	 */
 	root->simple_rel_array = (RelOptInfo **)
 		palloc0(root->simple_rel_array_size * sizeof(RelOptInfo *));
 
@@ -148,7 +157,14 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
 	rel->reloptkind = parent ? RELOPT_OTHER_MEMBER_REL : RELOPT_BASEREL;
 	rel->relids = bms_make_singleton(relid);
 	rel->rows = 0;
-	/* cheap startup cost is interesting iff not all tuples to be retrieved */
+
+	/*
+	 * Cheap startup cost is interesting iff not all tuples to be retrieved.
+	 * XXX As for grouped relation, the startup cost might be interesting for
+	 * AGG_SORTED (if it can produce the ordering that matches
+	 * root->query_pathkeys) but not in general (other kinds of aggregation
+	 * need the whole relation). Yet it seems worth trying.
+	 */
 	rel->consider_startup = (root->tuple_fraction > 0);
 	rel->consider_param_startup = false;	/* might get changed later */
 	rel->consider_parallel = false; /* might get changed later */
@@ -162,6 +178,8 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
 	rel->cheapest_parameterized_paths = NIL;
 	rel->direct_lateral_relids = NULL;
 	rel->lateral_relids = NULL;
+	rel->agg_info = NULL;
+	rel->grouped = NULL;
 	rel->relid = relid;
 	rel->rtekind = rte->rtekind;
 	/* min_attr, max_attr, attr_needed, attr_widths are set below */
@@ -380,13 +398,23 @@ build_join_rel_hash(PlannerInfo *root)
 RelOptInfo *
 find_join_rel(PlannerInfo *root, Relids relids)
 {
+	HTAB	   *join_rel_hash;
+	List	   *join_rel_list;
+
+	join_rel_hash = root->join_rel_hash;
+	join_rel_list = root->join_rel_list;
+
 	/*
 	 * Switch to using hash lookup when list grows "too long".  The threshold
 	 * is arbitrary and is known only here.
 	 */
-	if (!root->join_rel_hash && list_length(root->join_rel_list) > 32)
+	if (!join_rel_hash && list_length(join_rel_list) > 32)
+	{
 		build_join_rel_hash(root);
 
+		join_rel_hash = root->join_rel_hash;
+	}
+
 	/*
 	 * Use either hashtable lookup or linear search, as appropriate.
 	 *
@@ -395,12 +423,12 @@ find_join_rel(PlannerInfo *root, Relids relids)
 	 * so would force relids out of a register and thus probably slow down the
 	 * list-search case.
 	 */
-	if (root->join_rel_hash)
+	if (join_rel_hash)
 	{
 		Relids		hashkey = relids;
 		JoinHashEntry *hentry;
 
-		hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
+		hentry = (JoinHashEntry *) hash_search(join_rel_hash,
 											   &hashkey,
 											   HASH_FIND,
 											   NULL);
@@ -411,7 +439,7 @@ find_join_rel(PlannerInfo *root, Relids relids)
 	{
 		ListCell   *l;
 
-		foreach(l, root->join_rel_list)
+		foreach(l, join_rel_list)
 		{
 			RelOptInfo *rel = (RelOptInfo *) lfirst(l);
 
@@ -481,7 +509,9 @@ set_foreign_rel_properties(RelOptInfo *joinrel, RelOptInfo *outer_rel,
 static void
 add_join_rel(PlannerInfo *root, RelOptInfo *joinrel)
 {
-	/* GEQO requires us to append the new joinrel to the end of the list! */
+	/*
+	 * GEQO requires us to append the new joinrel to the end of the list!
+	 */
 	root->join_rel_list = lappend(root->join_rel_list, joinrel);
 
 	/* store it into the auxiliary hashtable if there is one. */
@@ -511,6 +541,9 @@ add_join_rel(PlannerInfo *root, RelOptInfo *joinrel)
  * 'restrictlist_ptr': result variable.  If not NULL, *restrictlist_ptr
  *		receives the list of RestrictInfo nodes that apply to this
  *		particular pair of joinable relations.
+ * 'grouped' forces creation of a "standalone" object, i.e.  w/o search in the
+ *		join list and without adding the result to the list. Caller is
+ *		responsible for setup of reltarget in such a case.
  *
  * restrictlist_ptr makes the routine's API a little grotty, but it saves
  * duplicated calculation of the restrictlist...
@@ -521,10 +554,12 @@ build_join_rel(PlannerInfo *root,
 			   RelOptInfo *outer_rel,
 			   RelOptInfo *inner_rel,
 			   SpecialJoinInfo *sjinfo,
-			   List **restrictlist_ptr)
+			   List **restrictlist_ptr,
+			   bool grouped)
 {
-	RelOptInfo *joinrel;
+	RelOptInfo *joinrel = NULL;
 	List	   *restrictlist;
+	bool		create_target = !grouped;
 
 	/* This function should be used only for join between parents. */
 	Assert(!IS_OTHER_REL(outer_rel) && !IS_OTHER_REL(inner_rel));
@@ -532,7 +567,8 @@ build_join_rel(PlannerInfo *root,
 	/*
 	 * See if we already have a joinrel for this set of base rels.
 	 */
-	joinrel = find_join_rel(root, joinrelids);
+	if (!grouped)
+		joinrel = find_join_rel(root, joinrelids);
 
 	if (joinrel)
 	{
@@ -555,11 +591,11 @@ build_join_rel(PlannerInfo *root,
 	joinrel->reloptkind = RELOPT_JOINREL;
 	joinrel->relids = bms_copy(joinrelids);
 	joinrel->rows = 0;
-	/* cheap startup cost is interesting iff not all tuples to be retrieved */
+	/* See the comment in build_simple_rel(). */
 	joinrel->consider_startup = (root->tuple_fraction > 0);
 	joinrel->consider_param_startup = false;
 	joinrel->consider_parallel = false;
-	joinrel->reltarget = create_empty_pathtarget();
+	joinrel->reltarget = NULL;
 	joinrel->pathlist = NIL;
 	joinrel->ppilist = NIL;
 	joinrel->partial_pathlist = NIL;
@@ -573,6 +609,8 @@ build_join_rel(PlannerInfo *root,
 				  inner_rel->direct_lateral_relids);
 	joinrel->lateral_relids = min_join_parameterization(root, joinrel->relids,
 														outer_rel, inner_rel);
+	joinrel->agg_info = NULL;
+	joinrel->grouped = NULL;
 	joinrel->relid = 0;			/* indicates not a baserel */
 	joinrel->rtekind = RTE_JOIN;
 	joinrel->min_attr = 0;
@@ -623,9 +661,13 @@ build_join_rel(PlannerInfo *root,
 	 * and inner rels we first try to build it from.  But the contents should
 	 * be the same regardless.
 	 */
-	build_joinrel_tlist(root, joinrel, outer_rel);
-	build_joinrel_tlist(root, joinrel, inner_rel);
-	add_placeholders_to_joinrel(root, joinrel, outer_rel, inner_rel);
+	if (create_target)
+	{
+		joinrel->reltarget = create_empty_pathtarget();
+		build_joinrel_tlist(root, joinrel, outer_rel);
+		build_joinrel_tlist(root, joinrel, inner_rel);
+		add_placeholders_to_joinrel(root, joinrel, outer_rel, inner_rel);
+	}
 
 	/*
 	 * add_placeholders_to_joinrel also took care of adding the ph_lateral
@@ -662,31 +704,39 @@ build_join_rel(PlannerInfo *root,
 
 	/*
 	 * Set estimates of the joinrel's size.
-	 */
-	set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
-							   sjinfo, restrictlist);
-
-	/*
-	 * Set the consider_parallel flag if this joinrel could potentially be
-	 * scanned within a parallel worker.  If this flag is false for either
-	 * inner_rel or outer_rel, then it must be false for the joinrel also.
-	 * Even if both are true, there might be parallel-restricted expressions
-	 * in the targetlist or quals.
 	 *
-	 * Note that if there are more than two rels in this relation, they could
-	 * be divided between inner_rel and outer_rel in any arbitrary way.  We
-	 * assume this doesn't matter, because we should hit all the same baserels
-	 * and joinclauses while building up to this joinrel no matter which we
-	 * take; therefore, we should make the same decision here however we get
-	 * here.
+	 * XXX The function claims to need reltarget but it does not seem to
+	 * actually use it. Should we call it unconditionally so that callers of
+	 * build_join_rel() do not have to care?
 	 */
-	if (inner_rel->consider_parallel && outer_rel->consider_parallel &&
-		is_parallel_safe(root, (Node *) restrictlist) &&
-		is_parallel_safe(root, (Node *) joinrel->reltarget->exprs))
-		joinrel->consider_parallel = true;
+	if (create_target)
+	{
+		set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
+								   sjinfo, restrictlist);
+
+		/*
+		 * Set the consider_parallel flag if this joinrel could potentially be
+		 * scanned within a parallel worker.  If this flag is false for either
+		 * inner_rel or outer_rel, then it must be false for the joinrel also.
+		 * Even if both are true, there might be parallel-restricted
+		 * expressions in the targetlist or quals.
+		 *
+		 * Note that if there are more than two rels in this relation, they
+		 * could be divided between inner_rel and outer_rel in any arbitrary
+		 * way.  We assume this doesn't matter, because we should hit all the
+		 * same baserels and joinclauses while building up to this joinrel no
+		 * matter which we take; therefore, we should make the same decision
+		 * here however we get here.
+		 */
+		if (inner_rel->consider_parallel && outer_rel->consider_parallel &&
+			is_parallel_safe(root, (Node *) restrictlist) &&
+			is_parallel_safe(root, (Node *) joinrel->reltarget->exprs))
+			joinrel->consider_parallel = true;
+	}
 
 	/* Add the joinrel to the PlannerInfo. */
-	add_join_rel(root, joinrel);
+	if (!grouped)
+		add_join_rel(root, joinrel);
 
 	/*
 	 * Also, if dynamic-programming join search is active, add the new joinrel
@@ -694,7 +744,7 @@ build_join_rel(PlannerInfo *root,
 	 * of members should be for equality, but some of the level 1 rels might
 	 * have been joinrels already, so we can only assert <=.
 	 */
-	if (root->join_rel_level)
+	if (root->join_rel_level && !grouped)
 	{
 		Assert(root->join_cur_level > 0);
 		Assert(root->join_cur_level <= bms_num_members(joinrel->relids));
@@ -718,16 +768,19 @@ build_join_rel(PlannerInfo *root,
  * 'restrictlist': list of RestrictInfo nodes that apply to this particular
  *		pair of joinable relations
  * 'jointype' is the join type (inner, left, full, etc)
+ * 'grouped': does the join contain partial aggregate? (If it does, then
+ * caller is responsible for setup of reltarget.)
  */
 RelOptInfo *
 build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
 					 RelOptInfo *inner_rel, RelOptInfo *parent_joinrel,
 					 List *restrictlist, SpecialJoinInfo *sjinfo,
-					 JoinType jointype)
+					 JoinType jointype, bool grouped)
 {
 	RelOptInfo *joinrel = makeNode(RelOptInfo);
 	AppendRelInfo **appinfos;
 	int			nappinfos;
+	bool		create_target = !grouped;
 
 	/* Only joins between "other" relations land here. */
 	Assert(IS_OTHER_REL(outer_rel) && IS_OTHER_REL(inner_rel));
@@ -735,11 +788,11 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
 	joinrel->reloptkind = RELOPT_OTHER_JOINREL;
 	joinrel->relids = bms_union(outer_rel->relids, inner_rel->relids);
 	joinrel->rows = 0;
-	/* cheap startup cost is interesting iff not all tuples to be retrieved */
+	/* See the comment in build_simple_rel(). */
 	joinrel->consider_startup = (root->tuple_fraction > 0);
 	joinrel->consider_param_startup = false;
 	joinrel->consider_parallel = false;
-	joinrel->reltarget = create_empty_pathtarget();
+	joinrel->reltarget = NULL;
 	joinrel->pathlist = NIL;
 	joinrel->ppilist = NIL;
 	joinrel->partial_pathlist = NIL;
@@ -749,6 +802,8 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
 	joinrel->cheapest_parameterized_paths = NIL;
 	joinrel->direct_lateral_relids = NULL;
 	joinrel->lateral_relids = NULL;
+	joinrel->agg_info = NULL;
+	joinrel->grouped = NULL;
 	joinrel->relid = 0;			/* indicates not a baserel */
 	joinrel->rtekind = RTE_JOIN;
 	joinrel->min_attr = 0;
@@ -789,11 +844,15 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
 	/* Compute information relevant to foreign relations. */
 	set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
 
-	/* Build targetlist */
-	build_joinrel_tlist(root, joinrel, outer_rel);
-	build_joinrel_tlist(root, joinrel, inner_rel);
-	/* Add placeholder variables. */
-	add_placeholders_to_child_joinrel(root, joinrel, parent_joinrel);
+	if (create_target)
+	{
+		/* Build targetlist */
+		joinrel->reltarget = create_empty_pathtarget();
+		build_joinrel_tlist(root, joinrel, outer_rel);
+		build_joinrel_tlist(root, joinrel, inner_rel);
+		/* Add placeholder variables. */
+		add_placeholders_to_child_joinrel(root, joinrel, parent_joinrel);
+	}
 
 	/* Construct joininfo list. */
 	appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos);
@@ -801,7 +860,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
 														(Node *) parent_joinrel->joininfo,
 														nappinfos,
 														appinfos);
-	pfree(appinfos);
 
 	/*
 	 * Lateral relids referred in child join will be same as that referred in
@@ -828,14 +886,22 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
 
 
 	/* Set estimates of the child-joinrel's size. */
-	set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
-							   sjinfo, restrictlist);
+	/* XXX See the corresponding comment in build_join_rel(). */
+	if (create_target)
+		set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
+								   sjinfo, restrictlist);
 
-	/* We build the join only once. */
-	Assert(!find_join_rel(root, joinrel->relids));
+	/*
+	 * We build the join only once. (Grouped joins should not exist in the
+	 * list.)
+	 */
+	Assert(!find_join_rel(root, joinrel->relids) || grouped);
 
 	/* Add the relation to the PlannerInfo. */
-	add_join_rel(root, joinrel);
+	if (!grouped)
+		add_join_rel(root, joinrel);
+
+	pfree(appinfos);
 
 	return joinrel;
 }
@@ -1768,3 +1834,662 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
 		joinrel->nullable_partexprs[cnt] = nullable_partexpr;
 	}
 }
+
+/*
+ * Check if the relation can produce grouped paths and return the information
+ * it'll need for it. The passed relation is the non-grouped one which has the
+ * reltarget already constructed.
+ */
+RelAggInfo *
+create_rel_agg_info(PlannerInfo *root, RelOptInfo *rel)
+{
+	List	   *gvis;
+	List	   *aggregates = NIL;
+	List	   *grp_exprs = NIL;
+	bool		found_higher_agg;
+	ListCell   *lc;
+	RelAggInfo *result;
+	PathTarget *target_partial,
+			   *target_simple,
+			   *agg_input;
+	List	   *exprs_tmp;
+	List	   *grp_exprs_extra = NIL;
+	int			i;
+	List	   *sortgroupclauses = NIL;
+
+	/*
+	 * The function shouldn't have been called if there's no opportunity for
+	 * aggregation push-down.
+	 */
+	Assert(root->grouped_var_list != NIL);
+
+	/*
+	 * The source relation has nothing to do with grouping.
+	 */
+	Assert(rel->agg_info == NULL);
+
+	/*
+	 * The current implementation of aggregation push-down cannot handle
+	 * PlaceHolderVar (PHV).
+	 *
+	 * If we knew that the PHV should be evaluated in this target (and of
+	 * course, if its expression matched some grouping expression or Aggref
+	 * argument), we'd just let init_grouping_targets create GroupedVar for
+	 * the corresponding expression (phexpr). On the other hand, if we knew
+	 * that the PHV is evaluated below the current rel, we'd ignore it because
+	 * the referencing GroupedVar would take care of propagation of the value
+	 * to upper joins. (PHV whose ph_eval_at is above the current rel make the
+	 * aggregation push-down impossible in any case because the partial
+	 * aggregation would receive wrong input if we ignored the ph_eval_at.)
+	 *
+	 * The problem is that the same PHV can be evaluated in the target of the
+	 * current rel or in that of lower rel --- depending on the input paths.
+	 * For example, consider rel->relids = {A, B, C} and if ph_eval_at = {B,
+	 * C}. Path "A JOIN (B JOIN C)" implies that the PHV is evaluated by the
+	 * "(B JOIN C)", while path "(A JOIN B) JOIN C" evaluates the PHV itself.
+	 */
+	foreach(lc, rel->reltarget->exprs)
+	{
+		Expr	   *expr = lfirst(lc);
+
+		if (IsA(expr, PlaceHolderVar))
+			return NULL;
+	}
+
+	if (IS_SIMPLE_REL(rel))
+	{
+		RangeTblEntry *rte = root->simple_rte_array[rel->relid];;
+
+		/*
+		 * rtekind != RTE_RELATION case is not supported yet.
+		 */
+		if (rte->rtekind != RTE_RELATION)
+			return NULL;
+	}
+
+	/* Caller should only pass base relations or joins. */
+	Assert(rel->reloptkind == RELOPT_BASEREL ||
+		   rel->reloptkind == RELOPT_JOINREL);
+
+	/*
+	 * If any outer join can set the attribute value to NULL, the Agg plan
+	 * would receive different input at the base rel level.
+	 *
+	 * XXX For RELOPT_JOINREL, do not return if all the joins that can set any
+	 * entry of the grouped target (do we need to postpone this check until
+	 * the grouped target is available, and init_grouping_targets take care?)
+	 * of this rel to NULL are provably below rel. (It's ok if rel is one of
+	 * these joins.)
+	 */
+	if (bms_overlap(rel->relids, root->nullable_baserels))
+		return NULL;
+
+	/*
+	 * Use equivalence classes to generate additional grouping expressions for
+	 * the current rel. Without these we might not be able to apply
+	 * aggregation to the relation result set.
+	 *
+	 * It's important that create_grouping_expr_grouped_var_infos has
+	 * processed the explicit grouping columns by now. If the grouping clause
+	 * contains multiple expressions belonging to the same EC, the original
+	 * (i.e. not derived) one should be preferred when we build grouping
+	 * target for a relation. Otherwise we have a problem when trying to match
+	 * target entries to grouping clauses during plan creation, see
+	 * get_grouping_expression().
+	 */
+	gvis = list_copy(root->grouped_var_list);
+	foreach(lc, root->grouped_var_list)
+	{
+		GroupedVarInfo *gvi = lfirst_node(GroupedVarInfo, lc);
+		int			relid = -1;
+
+		/* Only interested in grouping expressions. */
+		if (IsA(gvi->gvexpr, Aggref))
+			continue;
+
+		while ((relid = bms_next_member(rel->relids, relid)) >= 0)
+		{
+			GroupedVarInfo *gvi_trans;
+
+			gvi_trans = translate_expression_to_rels(root, gvi, relid);
+			if (gvi_trans != NULL)
+				gvis = lappend(gvis, gvi_trans);
+		}
+	}
+
+	/*
+	 * Check if some aggregates or grouping expressions can be evaluated in
+	 * this relation's target, and collect all vars referenced by these
+	 * aggregates / grouping expressions;
+	 */
+	found_higher_agg = false;
+	foreach(lc, gvis)
+	{
+		GroupedVarInfo *gvi = lfirst_node(GroupedVarInfo, lc);
+
+		/*
+		 * The subset includes gv_eval_at uninitialized, which includes
+		 * Aggref.aggstar.
+		 */
+		if (bms_is_subset(gvi->gv_eval_at, rel->relids))
+		{
+			/*
+			 * init_grouping_targets will handle plain Var grouping
+			 * expressions because it needs to look them up in
+			 * grouped_var_list anyway.
+			 *
+			 * XXX A plain Var could actually be handled w/o GroupedVar, but
+			 * thus init_grouping_targets would have to spend extra effort
+			 * looking for the EC-related vars, instead of relying on
+			 * create_grouping_expr_grouped_var_infos. (Processing of
+			 * particular expression would look different, so we could hardly
+			 * reuse the same piece of code.)
+			 */
+			if (IsA(gvi->gvexpr, Var))
+				continue;
+
+			/*
+			 * The derived grouping expressions should not be referenced by
+			 * the query targetlist, so do not add them if we're at the top of
+			 * the join tree.
+			 */
+			if (gvi->derived && bms_equal(rel->relids, root->all_baserels))
+				continue;
+
+			/*
+			 * Accept the aggregate / grouping expression.
+			 *
+			 * (GroupedVarInfo is more convenient for the next processing than
+			 * Aggref, see add_aggregates_to_grouped_target.)
+			 */
+			if (IsA(gvi->gvexpr, Aggref))
+				aggregates = lappend(aggregates, gvi);
+			else
+				grp_exprs = lappend(grp_exprs, gvi);
+		}
+		else if (bms_overlap(gvi->gv_eval_at, rel->relids) &&
+				 IsA(gvi->gvexpr, Aggref))
+		{
+			/*
+			 * Remember that there is at least one aggregate expression that
+			 * needs more than this rel.
+			 */
+			found_higher_agg = true;
+		}
+	}
+
+	/*
+	 * Grouping makes little sense w/o aggregate function and w/o grouping
+	 * expressions.
+	 */
+	if (aggregates == NIL)
+	{
+		list_free(gvis);
+		return NULL;
+	}
+
+	/*
+	 * Give up if some other aggregate(s) need multiple relations including
+	 * the current one. The problem is that grouping of the current relation
+	 * could make some input variables unavailable for the "higher aggregate",
+	 * and it'd also decrease the number of input rows the "higher aggregate"
+	 * receives.
+	 *
+	 * In contrast, grp_exprs is only supposed to contain generic grouping
+	 * expression, so it can be NIL so far. If all the grouping keys are just
+	 * plain Vars, init_grouping_targets will take care of them.
+	 */
+	if (found_higher_agg)
+	{
+		list_free(gvis);
+		return NULL;
+	}
+
+	/*
+	 * Create target for grouped paths as well as one for the input paths of
+	 * the aggregation paths.
+	 */
+	target_partial = create_empty_pathtarget();
+	agg_input = create_empty_pathtarget();
+	init_grouping_targets(root, rel, target_partial, agg_input, gvis,
+						  &grp_exprs_extra);
+	list_free(gvis);
+
+	/*
+	 * Add (non-Var) grouping expressions (in the form of GroupedVar) to
+	 * target_agg.
+	 *
+	 * Follow the convention that the grouping expressions should precede
+	 * aggregates.
+	 */
+	add_grouped_vars_to_target(root, target_partial, grp_exprs);
+
+	/*
+	 * Aggregation push-down makes no sense w/o grouping expressions.
+	 */
+	if (list_length(target_partial->exprs) == 0)
+		return NULL;
+
+	/*
+	 * If the aggregation target should have extra grouping expressions, add
+	 * them now. This step includes assignment of tleSortGroupRef's which we
+	 * can generate now (the "ordinary" grouping expressions are present in
+	 * the target by now).
+	 */
+	if (list_length(grp_exprs_extra) > 0)
+	{
+		Index		sortgroupref;
+
+		/*
+		 * Always start at root->max_sortgroupref. The extra grouping
+		 * expressions aren't used during the final aggregation, so the
+		 * sortgroupref values don't need to be unique across the query. Thus
+		 * we don't have to increase root->max_sortgroupref, which makes
+		 * recognition of the extra grouping expressions pretty easy.
+		 */
+		sortgroupref = root->max_sortgroupref;
+
+		/*
+		 * Generate the SortGroupClause's and add the expressions to the
+		 * target.
+		 */
+		foreach(lc, grp_exprs_extra)
+		{
+			Var		   *var = lfirst_node(Var, lc);
+			SortGroupClause *cl = makeNode(SortGroupClause);
+			int			i = 0;
+			ListCell   *lc2;
+
+			/*
+			 * TODO Verify that these fields are sufficient for this special
+			 * SortGroupClause.
+			 */
+			cl->tleSortGroupRef = ++sortgroupref;
+			get_sort_group_operators(var->vartype,
+									 false, true, false,
+									 NULL, &cl->eqop, NULL,
+									 &cl->hashable);
+			sortgroupclauses = lappend(sortgroupclauses, cl);
+			add_column_to_pathtarget(target_partial, (Expr *) var,
+									 cl->tleSortGroupRef);
+
+			/*
+			 * The aggregation input target must emit this var too. It can
+			 * already be there, so avoid adding it again.
+			 */
+			foreach(lc2, agg_input->exprs)
+			{
+				Expr	   *expr = (Expr *) lfirst(lc2);
+
+				if (equal(expr, var))
+				{
+					/*
+					 * The fact that the var is in agg_input does not imply
+					 * that it has sortgroupref set. For example, the reason
+					 * that it's there can be that a generic grouping
+					 * expression references it, so grouping by the var alone
+					 * hasn't been considered so far.
+					 */
+					if (agg_input->sortgrouprefs == NULL)
+					{
+						agg_input->sortgrouprefs = (Index *)
+							palloc0(list_length(agg_input->exprs) *
+									sizeof(Index));
+					}
+					if (agg_input->sortgrouprefs[i] == 0)
+						agg_input->sortgrouprefs[i] = cl->tleSortGroupRef;
+
+					break;
+				}
+
+				i++;
+			}
+			if (lc2 != NULL)
+				continue;
+
+			/*
+			 * Add the var if it's not in the target yet.
+			 */
+			add_column_to_pathtarget(agg_input, (Expr *) var,
+									 cl->tleSortGroupRef);
+		}
+	}
+
+	/*
+	 * Add aggregates (in the form of GroupedVar) to the grouping target.
+	 */
+	add_grouped_vars_to_target(root, target_partial, aggregates);
+
+	/*
+	 * Make sure that the paths generating input data for partial aggregation
+	 * include non-Var grouping expressions.
+	 */
+	foreach(lc, grp_exprs)
+	{
+		GroupedVarInfo *gvi;
+
+		gvi = lfirst_node(GroupedVarInfo, lc);
+		add_column_to_pathtarget(agg_input, gvi->gvexpr, gvi->sortgroupref);
+	}
+
+	/*
+	 * Since neither target nor agg_input is supposed to be identical to the
+	 * source reltarget, compute the width and cost again.
+	 */
+	set_pathtarget_cost_width(root, target_partial);
+	set_pathtarget_cost_width(root, agg_input);
+
+	/*
+	 * Setup a target for 1-stage aggregation (REL_AGG_KIND_SIMPLE).
+	 */
+	target_simple = copy_pathtarget(target_partial);
+	exprs_tmp = NIL;
+	foreach(lc, target_simple->exprs)
+	{
+		Expr	   *expr = (Expr *) lfirst(lc);
+
+		/*
+		 * The difference from target_partial is that the contained
+		 * GroupedVars do not have agg_partial set.
+		 */
+		if (IsA(expr, GroupedVar))
+		{
+			GroupedVar *gvar_new = makeNode(GroupedVar);
+
+			memcpy(gvar_new, expr, sizeof(GroupedVar));
+			gvar_new->agg_partial = NULL;
+			expr = (Expr *) gvar_new;
+		}
+		exprs_tmp = lappend(exprs_tmp, expr);
+	}
+	target_simple->exprs = exprs_tmp;
+	set_pathtarget_cost_width(root, target_simple);
+
+	result = makeNode(RelAggInfo);
+	result->target_partial = target_partial;
+	result->target_simple = target_simple;
+	result->input = agg_input;
+
+	/*
+	 * Build a list of grouping expressions and a list of the corresponding
+	 * SortGroupClauses.
+	 */
+	i = 0;
+	foreach(lc, target_partial->exprs)
+	{
+		Index		sortgroupref = 0;
+		SortGroupClause *cl;
+		Expr	   *texpr;
+
+		texpr = (Expr *) lfirst(lc);
+
+		if (IsA(texpr, GroupedVar) &&
+			IsA(((GroupedVar *) texpr)->gvexpr, Aggref))
+		{
+			/*
+			 * texpr should represent the first aggregate in the targetlist.
+			 */
+			break;
+		}
+
+		/*
+		 * Find the clause by sortgroupref.
+		 */
+		sortgroupref = target_partial->sortgrouprefs[i++];
+
+		/*
+		 * Besides being an aggregate, the target expression should have no
+		 * other reason then being a column of a relation functionally
+		 * dependent on the GROUP BY clause. So it's not actually a grouping
+		 * column.
+		 */
+		if (sortgroupref == 0)
+			continue;
+
+		cl = get_sortgroupref_clause_noerr(sortgroupref,
+										   root->parse->groupClause);
+
+		/*
+		 * If query does not have this clause, it must be target-specific.
+		 */
+		if (cl == NULL)
+			cl = get_sortgroupref_clause(sortgroupref, sortgroupclauses);
+
+		result->group_clauses = list_append_unique(result->group_clauses,
+												   cl);
+
+		/*
+		 * Add only unique clauses because of joins (both sides of a join can
+		 * point at the same grouping clause). XXX Is it worth adding a bool
+		 * argument indicating that we're dealing with join right now?
+		 */
+		result->group_exprs = list_append_unique(result->group_exprs,
+												 texpr);
+	}
+
+	/* Finally collect the aggregates. */
+	while (lc != NULL)
+	{
+		GroupedVar *gvar = castNode(GroupedVar, lfirst(lc));
+
+		Assert(IsA(gvar->gvexpr, Aggref));
+		result->agg_exprs_simple = lappend(result->agg_exprs_simple,
+										   gvar->gvexpr);
+
+		Assert(gvar->agg_partial != NULL);
+		result->agg_exprs_partial = lappend(result->agg_exprs_partial,
+											gvar->agg_partial);
+		lc = lnext(lc);
+	}
+
+	return result;
+}
+
+/*
+ * Initialize target for grouped paths (target) as well as a target for paths
+ * that generate input for partial aggregation (agg_input).
+ *
+ * gvis a list of GroupedVarInfo's possibly useful for rel.
+ *
+ * The *group_exprs_extra_p list may receive additional grouping expressions
+ * that the query does not have. These can make the aggregation of base
+ * relation / join less efficient, but can allow for join of the grouped
+ * relation that wouldn't be possible otherwise.
+ */
+static void
+init_grouping_targets(PlannerInfo *root, RelOptInfo *rel,
+					  PathTarget *target, PathTarget *agg_input,
+					  List *gvis, List **group_exprs_extra_p)
+{
+	ListCell   *lc;
+	List	   *vars_unresolved = NIL;
+
+	foreach(lc, rel->reltarget->exprs)
+	{
+		Var		   *tvar;
+		GroupedVar *gvar;
+
+		/*
+		 * Given that PlaceHolderVar currently prevents us from doing
+		 * aggregation push-down, the source target cannot contain anything
+		 * more complex than a Var. (As for generic grouping expressions,
+		 * add_grouped_vars_to_target will retrieve them from the query
+		 * targetlist and add them to "target" outside this function.)
+		 */
+		tvar = lfirst_node(Var, lc);
+
+		gvar = get_grouping_expression(gvis, (Expr *) tvar);
+		if (gvar != NULL)
+		{
+			/*
+			 * It's o.k. to use the target expression for grouping.
+			 *
+			 * The actual Var is added to the target. If we used the
+			 * containing GroupedVar, references from various clauses (e.g.
+			 * join quals) wouldn't work.
+			 */
+			add_column_to_pathtarget(target, gvar->gvexpr,
+									 gvar->sortgroupref);
+
+			/*
+			 * As for agg_input, add the original expression but set
+			 * sortgroupref in addition.
+			 */
+			add_column_to_pathtarget(agg_input, gvar->gvexpr,
+									 gvar->sortgroupref);
+
+			/* Process the next expression. */
+			continue;
+		}
+
+		/*
+		 * Further investigation involves dependency check, for which we need
+		 * to have all the plain-var grouping expressions gathered. So far
+		 * only store the var in a list.
+		 */
+		vars_unresolved = lappend(vars_unresolved, tvar);
+	}
+
+	/*
+	 * Check for other possible reasons for the var to be in the plain target.
+	 */
+	foreach(lc, vars_unresolved)
+	{
+		Var		   *var;
+		RangeTblEntry *rte;
+		List	   *deps = NIL;
+		Relids		relids_subtract;
+		int			ndx;
+		RelOptInfo *baserel;
+
+		var = lfirst_node(Var, lc);
+		rte = root->simple_rte_array[var->varno];
+
+		/*
+		 * Dependent var is almost the same as one that has sortgroupref.
+		 */
+		if (check_functional_grouping(rte->relid, var->varno,
+									  var->varlevelsup,
+									  target->exprs, &deps))
+		{
+
+			Index		sortgroupref = 0;
+
+			add_column_to_pathtarget(target, (Expr *) var, sortgroupref);
+
+			/*
+			 * The var shouldn't be actually used as a grouping key (instead,
+			 * the one this depends on will be), so sortgroupref should not be
+			 * important. But once we have it ...
+			 */
+			add_column_to_pathtarget(agg_input, (Expr *) var, sortgroupref);
+
+			/*
+			 * The var may or may not be present in generic grouping
+			 * expression(s) or aggregate arguments, but we already have it in
+			 * the targets, so don't care.
+			 */
+			continue;
+		}
+
+		/*
+		 * Isn't the expression needed by joins above the current rel?
+		 *
+		 * The relids we're not interested in do include 0, which is the
+		 * top-level targetlist. The only reason for relids to contain 0
+		 * should be that arg_var is referenced either by aggregate or by
+		 * grouping expression, but right now we're interested in the *other*
+		 * reasons. (As soon as GroupedVars are installed, the top level
+		 * aggregates / grouping expressions no longer need direct reference
+		 * to arg_var anyway.)
+		 */
+		relids_subtract = bms_copy(rel->relids);
+		bms_add_member(relids_subtract, 0);
+
+		baserel = find_base_rel(root, var->varno);
+		ndx = var->varattno - baserel->min_attr;
+		if (bms_nonempty_difference(baserel->attr_needed[ndx],
+									relids_subtract))
+		{
+			/*
+			 * The variable is needed by upper join. This includes one that is
+			 * referenced by a generic grouping expression but couldn't be
+			 * recognized as grouping expression on its own at the top of the
+			 * loop.
+			 *
+			 * The only way to bring this var to the aggregation output is to
+			 * add it to the grouping expressions too.
+			 *
+			 * Since root->parse->groupClause is not supposed to contain this
+			 * expression, we need to construct special SortGroupClause. Its
+			 * tleSortGroupRef needs to be unique within "target", so postpone
+			 * creation of the SortGroupRefs until we're done with the
+			 * iteration of rel->reltarget->exprs.
+			 */
+			*group_exprs_extra_p = lappend(*group_exprs_extra_p, var);
+		}
+		else
+		{
+			/*
+			 * As long as the query is semantically correct, arriving here
+			 * means that the var is referenced either by aggregate argument
+			 * or by generic grouping expression. The per-relation aggregation
+			 * target should not contain it, as it only provides input for the
+			 * final aggregation.
+			 */
+		}
+
+		/*
+		 * The var is not suitable for grouping, but agg_input ought to stay
+		 * complete.
+		 */
+		add_column_to_pathtarget(agg_input, (Expr *) var, 0);
+	}
+}
+
+
+/*
+ * Translate RelAggInfo of parent relation so it matches given child relation.
+ */
+RelAggInfo *
+translate_rel_agg_info(PlannerInfo *root, RelAggInfo *parent,
+					   AppendRelInfo **appinfos, int nappinfos)
+{
+	RelAggInfo *result;
+
+	result = makeNode(RelAggInfo);
+
+	result->target_simple = copy_pathtarget(parent->target_simple);
+	result->target_simple->exprs = (List *)
+		adjust_appendrel_attrs(root,
+							   (Node *) result->target_simple->exprs,
+							   nappinfos, appinfos);
+	result->target_partial = copy_pathtarget(parent->target_partial);
+	result->target_partial->exprs = (List *)
+		adjust_appendrel_attrs(root,
+							   (Node *) result->target_partial->exprs,
+							   nappinfos, appinfos);
+
+	result->input = copy_pathtarget(parent->input);
+	result->input->exprs = (List *)
+		adjust_appendrel_attrs(root,
+							   (Node *) result->input->exprs,
+							   nappinfos, appinfos);
+
+	result->group_clauses = parent->group_clauses;
+
+	result->group_exprs = (List *)
+		adjust_appendrel_attrs(root,
+							   (Node *) parent->group_exprs,
+							   nappinfos, appinfos);
+
+	result->agg_exprs_simple = (List *)
+		adjust_appendrel_attrs(root,
+							   (Node *) parent->agg_exprs_simple,
+							   nappinfos, appinfos);
+	result->agg_exprs_partial = (List *)
+		adjust_appendrel_attrs(root,
+							   (Node *) parent->agg_exprs_partial,
+							   nappinfos, appinfos);
+	return result;
+}
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index 5500f33e63..b09fddeb32 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -426,7 +426,6 @@ get_sortgrouplist_exprs(List *sgClauses, List *targetList)
 	return result;
 }
 
-
 /*****************************************************************************
  *		Functions to extract data from a list of SortGroupClauses
  *
@@ -801,6 +800,133 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target)
 }
 
 /*
+ * Replace each GroupedVar in the source targetlist with the original
+ * expression --- either Aggref or a non-Var grouping expression.
+ *
+ * Even if the query targetlist has the Aggref wrapped in a generic
+ * expression, any subplan should emit the corresponding GroupedVar
+ * alone. (Aggregate finalization is needed before the aggregate result can be
+ * used for any purposes and that happens at the top level of the query.)
+ * Therefore we do not have to recurse into the target expressions here.
+ */
+List *
+replace_grouped_vars_with_aggrefs(PlannerInfo *root, List *src)
+{
+	List	   *result = NIL;
+	ListCell   *l;
+
+	foreach(l, src)
+	{
+		TargetEntry *te,
+				   *te_new;
+		Expr	   *expr_new = NULL;
+
+		te = lfirst_node(TargetEntry, l);
+
+		if (IsA(te->expr, GroupedVar))
+		{
+			GroupedVar *gvar;
+
+			gvar = castNode(GroupedVar, te->expr);
+			if (IsA(gvar->gvexpr, Aggref))
+			{
+				if (gvar->agg_partial)
+				{
+					/*
+					 * Partial aggregate should appear in the targetlist so
+					 * that it looks as if convert_combining_aggrefs arranged
+					 * it.
+					 */
+					expr_new = (Expr *) gvar->agg_partial;
+				}
+				else
+				{
+					/*
+					 * Restore the original aggregate. This is typical for the
+					 * REL_AGG_KIND_SIMPLE kind of aggregate push-down.
+					 */
+					Assert(IsA(gvar->gvexpr, Aggref));
+
+					expr_new = (Expr *) gvar->gvexpr;
+				}
+			}
+			else
+				expr_new = gvar->gvexpr;
+		}
+
+		if (expr_new != NULL)
+		{
+			te_new = flatCopyTargetEntry(te);
+			te_new->expr = (Expr *) expr_new;
+		}
+		else
+			te_new = te;
+		result = lappend(result, te_new);
+	}
+
+	return result;
+}
+
+/*
+ * For each aggregate add GroupedVar to the grouped target.
+ *
+ * Caller passes the aggregates in the form of GroupedVarInfos so that we
+ * don't have to look for gvid.
+ */
+void
+add_grouped_vars_to_target(PlannerInfo *root, PathTarget *target,
+						   List *expressions)
+{
+	ListCell   *lc;
+
+	/* Create the vars and add them to the target. */
+	foreach(lc, expressions)
+	{
+		GroupedVarInfo *gvi;
+		GroupedVar *gvar;
+
+		gvi = lfirst_node(GroupedVarInfo, lc);
+		gvar = makeNode(GroupedVar);
+		gvar->gvid = gvi->gvid;
+		gvar->gvexpr = gvi->gvexpr;
+		gvar->agg_partial = gvi->agg_partial;
+		add_column_to_pathtarget(target, (Expr *) gvar, gvi->sortgroupref);
+	}
+}
+
+/*
+ * Return GroupedVar containing the passed-in expression if one exists, or
+ * NULL if the expression cannot be used as grouping key.
+ */
+GroupedVar *
+get_grouping_expression(List *gvis, Expr *expr)
+{
+	ListCell   *lc;
+
+	foreach(lc, gvis)
+	{
+		GroupedVarInfo *gvi = lfirst_node(GroupedVarInfo, lc);
+
+		if (IsA(gvi->gvexpr, Aggref))
+			continue;
+
+		if (equal(gvi->gvexpr, expr))
+		{
+			GroupedVar *result = makeNode(GroupedVar);
+
+			Assert(gvi->sortgroupref > 0);
+			result->gvexpr = gvi->gvexpr;
+			result->gvid = gvi->gvid;
+			result->sortgroupref = gvi->sortgroupref;
+			return result;
+		}
+	}
+
+	/* The expression cannot be used as grouping key. */
+	return NULL;
+}
+
+/*
  * split_pathtarget_at_srfs
  *		Split given PathTarget into multiple levels to position SRFs safely
  *
diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c
index b16b1e4656..459dc3087c 100644
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -840,3 +840,25 @@ alias_relid_set(PlannerInfo *root, Relids relids)
 	}
 	return result;
 }
+
+/*
+ * Return GroupedVarInfo for given GroupedVar.
+ *
+ * XXX Consider better location of this routine.
+ */
+GroupedVarInfo *
+find_grouped_var_info(PlannerInfo *root, GroupedVar *gvar)
+{
+	ListCell   *l;
+
+	foreach(l, root->grouped_var_list)
+	{
+		GroupedVarInfo *gvi = lfirst_node(GroupedVarInfo, l);
+
+		if (gvi->gvid == gvar->gvid)
+			return gvi;
+	}
+
+	elog(ERROR, "GroupedVarInfo not found");
+	return NULL;				/* keep compiler quiet */
+}
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index abe1dbc521..3671f8dda3 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -104,6 +104,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
 	Oid			vatype;
 	FuncDetailCode fdresult;
 	char		aggkind = 0;
+	Oid			aggcombinefn = InvalidOid;
 	ParseCallbackState pcbstate;
 
 	/*
@@ -360,6 +361,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
 			elog(ERROR, "cache lookup failed for aggregate %u", funcid);
 		classForm = (Form_pg_aggregate) GETSTRUCT(tup);
 		aggkind = classForm->aggkind;
+		aggcombinefn = classForm->aggcombinefn;
 		catDirectArgs = classForm->aggnumdirectargs;
 		ReleaseSysCache(tup);
 
@@ -740,6 +742,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
 		aggref->aggstar = agg_star;
 		aggref->aggvariadic = func_variadic;
 		aggref->aggkind = aggkind;
+		aggref->aggcombinefn = aggcombinefn;
 		/* agglevelsup will be set by transformAggregateCall */
 		aggref->aggsplit = AGGSPLIT_SIMPLE; /* planner might change this */
 		aggref->location = location;
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 065238b0fe..c17ef5edba 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -7723,6 +7723,23 @@ get_rule_expr(Node *node, deparse_context *context,
 			get_agg_expr((Aggref *) node, context, (Aggref *) node);
 			break;
 
+		case T_GroupedVar:
+			{
+				GroupedVar *gvar = castNode(GroupedVar, node);
+				Expr	   *expr = gvar->gvexpr;
+
+				if (IsA(expr, Aggref))
+					get_agg_expr(gvar->agg_partial, context, (Aggref *) gvar->gvexpr);
+				else if (IsA(expr, Var))
+					(void) get_variable((Var *) expr, 0, false, context);
+				else
+				{
+					Assert(IsA(gvar->gvexpr, OpExpr));
+					get_oper_expr((OpExpr *) expr, context);
+				}
+				break;
+			}
+
 		case T_GroupingFunc:
 			{
 				GroupingFunc *gexpr = (GroupingFunc *) node;
@@ -9208,10 +9225,18 @@ get_agg_combine_expr(Node *node, deparse_context *context, void *private)
 	Aggref	   *aggref;
 	Aggref	   *original_aggref = private;
 
-	if (!IsA(node, Aggref))
+	if (IsA(node, Aggref))
+		aggref = (Aggref *) node;
+	else if (IsA(node, GroupedVar))
+	{
+		GroupedVar *gvar = castNode(GroupedVar, node);
+
+		aggref = gvar->agg_partial;
+		original_aggref = castNode(Aggref, gvar->gvexpr);
+	}
+	else
 		elog(ERROR, "combining Aggref does not point to an Aggref");
 
-	aggref = (Aggref *) node;
 	get_agg_expr(aggref, context, original_aggref);
 }
 
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 4b08cdb721..eb02d1801c 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -114,6 +114,7 @@
 #include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_type.h"
 #include "executor/executor.h"
+#include "executor/nodeAgg.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -3884,6 +3885,39 @@ estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
 	ReleaseVariableStats(vardata);
 }
 
+/*
+ * estimate_hashagg_tablesize
+ *	  estimate the number of bytes that a hash aggregate hashtable will
+ *	  require based on the agg_costs, path width and dNumGroups.
+ *
+ * XXX this may be over-estimating the size now that hashagg knows to omit
+ * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
+ * grouping columns not in the hashed set are counted here even though hashagg
+ * won't store them. Is this a problem?
+ */
+Size
+estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
+						   double dNumGroups)
+{
+	Size		hashentrysize;
+
+	/* Estimate per-hash-entry space at tuple width... */
+	hashentrysize = MAXALIGN(path->pathtarget->width) +
+		MAXALIGN(SizeofMinimalTupleHeader);
+
+	/* plus space for pass-by-ref transition values... */
+	hashentrysize += agg_costs->transitionSpace;
+	/* plus the per-hash-entry overhead */
+	hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
+
+	/*
+	 * Note that this disregards the effect of fill-factor and growth policy
+	 * of the hash-table. That's probably ok, given default the default
+	 * fill-factor is relatively high. It'd be hard to meaningfully factor in
+	 * "double-in-size" growth policies here.
+	 */
+	return hashentrysize * dNumGroups;
+}
 
 /*-------------------------------------------------------------------------
  *
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index b05fb209bb..bc335be32d 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -944,6 +944,15 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 	{
+		{"enable_agg_pushdown", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables aggregation push-down."),
+			NULL
+		},
+		&enable_agg_pushdown,
+		false,
+		NULL, NULL, NULL
+	},
+	{
 		{"enable_parallel_append", PGC_USERSET, QUERY_TUNING_METHOD,
 			gettext_noop("Enables the planner's use of parallel append plans."),
 			NULL
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 43f1552241..f76fa9d532 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -222,6 +222,7 @@ typedef enum NodeTag
 	T_IndexOptInfo,
 	T_ForeignKeyOptInfo,
 	T_ParamPathInfo,
+	T_RelAggInfo,
 	T_Path,
 	T_IndexPath,
 	T_BitmapHeapPath,
@@ -262,9 +263,11 @@ typedef enum NodeTag
 	T_PathTarget,
 	T_RestrictInfo,
 	T_PlaceHolderVar,
+	T_GroupedVar,
 	T_SpecialJoinInfo,
 	T_AppendRelInfo,
 	T_PlaceHolderInfo,
+	T_GroupedVarInfo,
 	T_MinMaxAggInfo,
 	T_PlannerParamItem,
 	T_RollupData,
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 1b4b0d75af..6af31f2722 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -296,6 +296,7 @@ typedef struct Aggref
 	Oid			aggcollid;		/* OID of collation of result */
 	Oid			inputcollid;	/* OID of collation that function should use */
 	Oid			aggtranstype;	/* type Oid of aggregate's transition value */
+	Oid			aggcombinefn;	/* combine function (see pg_aggregate.h) */
 	List	   *aggargtypes;	/* type Oids of direct and aggregated args */
 	List	   *aggdirectargs;	/* direct arguments, if an ordered-set agg */
 	List	   *args;			/* aggregated arguments and sort expressions */
@@ -306,6 +307,7 @@ typedef struct Aggref
 	bool		aggvariadic;	/* true if variadic arguments have been
 								 * combined into an array last argument */
 	char		aggkind;		/* aggregate kind (see pg_aggregate.h) */
+
 	Index		agglevelsup;	/* > 0 if agg belongs to outer query */
 	AggSplit	aggsplit;		/* expected agg-splitting mode of parent Agg */
 	int			location;		/* token location, or -1 if unknown */
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 7cae3fcfb5..d3a7a97672 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -193,7 +193,8 @@ typedef struct PlannerInfo
 	 * unreferenced view RTE; or if the RelOptInfo hasn't been made yet.
 	 */
 	struct RelOptInfo **simple_rel_array;	/* All 1-rel RelOptInfos */
-	int			simple_rel_array_size;	/* allocated size of array */
+
+	int			simple_rel_array_size;	/* allocated size of the arrays above */
 
 	/*
 	 * simple_rte_array is the same length as simple_rel_array and holds
@@ -247,6 +248,7 @@ typedef struct PlannerInfo
 	 * join_rel_level is NULL if not in use.
 	 */
 	List	  **join_rel_level; /* lists of join-relation RelOptInfos */
+
 	int			join_cur_level; /* index of list being extended */
 
 	List	   *init_plans;		/* init SubPlans for query */
@@ -279,6 +281,8 @@ typedef struct PlannerInfo
 
 	List	   *placeholder_list;	/* list of PlaceHolderInfos */
 
+	List	   *grouped_var_list;	/* List of GroupedVarInfos. */
+
 	List	   *fkey_list;		/* list of ForeignKeyOptInfos */
 
 	List	   *query_pathkeys; /* desired pathkeys for query_planner() */
@@ -305,6 +309,12 @@ typedef struct PlannerInfo
 	 */
 	List	   *processed_tlist;
 
+	/*
+	 * The maximum ressortgroupref among target entries in processed_list.
+	 * Useful when adding extra grouping expressions for partial aggregation.
+	 */
+	int			max_sortgroupref;
+
 	/* Fields filled during create_plan() for use in setrefs.c */
 	AttrNumber *grouping_map;	/* for GroupingFunc fixup */
 	List	   *minmax_aggs;	/* List of MinMaxAggInfos */
@@ -387,6 +397,36 @@ typedef struct PartitionSchemeData
 
 typedef struct PartitionSchemeData *PartitionScheme;
 
+/*
+ * Grouped paths created at relation level are added to the relations stored
+ * in this structure.
+ */
+typedef struct RelOptGrouped
+{
+	/*
+	 * Paths belonging to this relation need additional processing by
+	 * create_grouping_paths() and subroutines.
+	 *
+	 * This field should always be set.
+	 */
+	struct RelOptInfo *needs_final_agg;
+
+	/*
+	 * Paths belonging to this relation do not need create_grouping_paths().
+	 * They are ready for the next upper rel processing, e.g.
+	 * create_ordered_paths().
+	 *
+	 * This relation should not contain any partial paths. XXX Consider if
+	 * there are special cases where we can apply AGGSPLIT_SIMPLE aggregates
+	 * to partitions and process the result using parallel Append w/o getting
+	 * duplicate groups.
+	 *
+	 * RelOptGrouped may have this field NULL, e.g. for partitioned table
+	 * (because partitions can generate duplicate values of the grouping key).
+	 */
+	struct RelOptInfo *no_final_agg;
+} RelOptGrouped;
+
 /*----------
  * RelOptInfo
  *		Per-relation information for planning/optimization
@@ -467,6 +507,8 @@ typedef struct PartitionSchemeData *PartitionScheme;
  *		direct_lateral_relids - rels this rel has direct LATERAL references to
  *		lateral_relids - required outer rels for LATERAL, as a Relids set
  *			(includes both direct and indirect lateral references)
+ *		gpi - GroupedPathInfo if the relation can produce grouped paths, NULL
+ *		otherwise.
  *
  * If the relation is a base relation it will have these fields set:
  *
@@ -646,6 +688,16 @@ typedef struct RelOptInfo
 	Relids		direct_lateral_relids;	/* rels directly laterally referenced */
 	Relids		lateral_relids; /* minimum parameterization of rel */
 
+	/* Information needed to apply partial aggregation to this rel's paths. */
+	struct RelAggInfo *agg_info;
+
+	/*
+	 * If the relation can produce grouped paths, store them here.
+	 *
+	 * If "grouped" is valid then "agg_info" must be NULL and vice versa.
+	 */
+	struct RelOptGrouped *grouped;
+
 	/* information about a base rel (not set for join rels!) */
 	Index		relid;
 	Oid			reltablespace;	/* containing tablespace */
@@ -1051,6 +1103,79 @@ typedef struct ParamPathInfo
 
 
 /*
+ * What kind of aggregation should be applied to base relation or join?
+ */
+typedef enum
+{
+	REL_AGG_KIND_NONE,			/* No aggregation. */
+	REL_AGG_KIND_SIMPLE,		/* AGGSPLIT_SIMPLE */
+	REL_AGG_KIND_PARTIAL		/* AGGSPLIT_INITIAL_SERIAL */
+} RelAggKind;
+
+/*
+ * RelAggInfo
+ *
+ * RelOptInfo needs information contained here if its paths should be
+ * aggregated.
+ *
+ * "target_simple" or "target_partial" will be used as pathtarget for
+ * REL_AGG_KIND_SIMPLE and REL_AGG_KIND_PARTIAL aggregation respectively, if
+ * "explicit aggregation" is applied to base relation or join. The same target
+ * will will also --- if the relation is a join --- be used to joinin grouped
+ * path to a non-grouped one.
+ *
+ * These targets contain plain-Var grouping expressions, generic grouping
+ * expressions wrapped in GroupedVar structure, or Aggrefs which are also
+ * wrapped in GroupedVar. Once GroupedVar is evaluated, its value is passed to
+ * the upper paths w/o being evaluated again. If final aggregation appears to
+ * be necessary above the final join, the contained Aggrefs are supposed to
+ * provide the final aggregation plan with input values, i.e. the aggregate
+ * transient state.
+ *
+ * Note: There's a convention that GroupedVars that contain Aggref expressions
+ * are supposed to follow the other expressions of the target. Iterations of
+ * ->exprs may rely on this arrangement.
+ *
+ * "input" contains Vars used either as grouping expressions or aggregate
+ * arguments, plus those used in grouping expressions which are not plain Vars
+ * themselves. Paths providing the aggregation plan with input data should use
+ * this target.
+ *
+ * "group_clauses" and "group_exprs" are lists of SortGroupClause and the
+ * corresponding grouping expressions respectively.
+ *
+ * "agg_exprs_simple" and "agg_exprs_partial" are lists of Aggref nodes for
+ * the "simple" and partial aggregation respectively, to be evaluated by the
+ * relation.
+ *
+ * "rows" is the estimated number of result tuples produced by grouped
+ * paths.
+ */
+typedef struct RelAggInfo
+{
+	NodeTag		type;
+
+	PathTarget *target_simple;	/* Target for REL_AGG_KIND_SIMPLE. */
+	PathTarget *target_partial; /* Target for REL_AGG_KIND_PARTIAL. */
+
+	PathTarget *input;			/* pathtarget of paths that generate input for
+								 * aggregation paths. */
+
+	List	   *group_clauses;
+	List	   *group_exprs;
+
+	/*
+	 * TODO Consider removing these fields and creating the Aggref, partial or
+	 * simple, when needed, but avoid creating it multiple times (e.g. once
+	 * for hash grouping, other times for sorted grouping).
+	 */
+	List	   *agg_exprs_simple;	/* Expressions for REL_AGG_KIND_SIMPLE */
+	List	   *agg_exprs_partial;	/* Expressions for REL_AGG_KIND_PARTIAL */
+
+	double		rows;
+} RelAggInfo;
+
+/*
  * Type "Path" is used as-is for sequential-scan paths, as well as some other
  * simple plan types that we don't need any extra information in the path for.
  * For other path types it is the first component of a larger struct.
@@ -1526,12 +1651,16 @@ typedef struct HashPath
  * ProjectionPath node, which is marked dummy to indicate that we intend to
  * assign the work to the input plan node.  The estimated cost for the
  * ProjectionPath node will account for whether a Result will be used or not.
+ *
+ * force_result field tells that the Result node must be used for some reason
+ * even though the subpath could normally handle the projection.
  */
 typedef struct ProjectionPath
 {
 	Path		path;
 	Path	   *subpath;		/* path representing input source */
 	bool		dummypp;		/* true if no separate Result is needed */
+	bool		force_result;	/* Is Result node required? */
 } ProjectionPath;
 
 /*
@@ -2012,6 +2141,44 @@ typedef struct PlaceHolderVar
 	Index		phlevelsup;		/* > 0 if PHV belongs to outer query */
 } PlaceHolderVar;
 
+
+/*
+ * Similar to the concept of PlaceHolderVar, we treat aggregates and grouping
+ * columns as special variables if grouping is possible below the top-level
+ * join. Likewise, the variable is evaluated below the query targetlist (in
+ * particular, in the targetlist of AGGSPLIT_INITIAL_SERIAL aggregation node
+ * which has base relation or a join as the input) and bubbles up through the
+ * join tree until it reaches AGGSPLIT_FINAL_DESERIAL aggregation node.
+ *
+ * gvexpr is either Aggref or a generic (non-Var) grouping expression. (If a
+ * simple Var, we don't replace it with GroupedVar.)
+ *
+ * agg_partial also points to the corresponding field of GroupedVarInfo if
+ * gvexpr is Aggref.
+ */
+typedef struct GroupedVar
+{
+	Expr		xpr;
+	Expr	   *gvexpr;			/* the represented expression */
+
+	/*
+	 * TODO
+	 *
+	 * Do we need to cache the partial aggregate? (The simple aggregate should
+	 * be in gvexpr.) If not, make sure translation of the GroupedVar to child
+	 * rels works.
+	 *
+	 */
+	Aggref	   *agg_partial;	/* partial aggregate if gvexpr is an aggregate
+								 * and if it's used in a target of partial
+								 * aggregation. */
+
+	Index		sortgroupref;	/* SortGroupClause.tleSortGroupRef if gvexpr
+								 * is grouping expression. */
+	Index		gvid;			/* GroupedVarInfo */
+	int32		width;			/* Expression width. */
+} GroupedVar;
+
 /*
  * "Special join" info.
  *
@@ -2208,6 +2375,26 @@ typedef struct PlaceHolderInfo
 } PlaceHolderInfo;
 
 /*
+ * Likewise, GroupedVarInfo exists for each distinct GroupedVar.
+ */
+typedef struct GroupedVarInfo
+{
+	NodeTag		type;
+
+	Index		gvid;			/* GroupedVar.gvid */
+	Expr	   *gvexpr;			/* the represented expression. */
+	Aggref	   *agg_partial;	/* if gvexpr is aggregate, agg_partial is the
+								 * corresponding partial aggregate */
+	Index		sortgroupref;	/* If gvexpr is a grouping expression, this is
+								 * the tleSortGroupRef of the corresponding
+								 * SortGroupClause. */
+	Relids		gv_eval_at;		/* lowest level we can evaluate the expression
+								 * at or NULL if it can happen anywhere. */
+	bool		derived;		/* derived from another GroupedVarInfo using
+								 * equeivalence classes? */
+} GroupedVarInfo;
+
+/*
  * This struct describes one potentially index-optimizable MIN/MAX aggregate
  * function.  MinMaxAggPath contains a list of these, and if we accept that
  * path, the list is stored into root->minmax_aggs for use during setrefs.c.
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index ed854fdd40..f9f3d14b0b 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -88,4 +88,6 @@ extern Query *inline_set_returning_function(PlannerInfo *root,
 extern List *expand_function_arguments(List *args, Oid result_type,
 						  HeapTuple func_tuple);
 
+extern GroupedVarInfo *translate_expression_to_rels(PlannerInfo *root,
+							 GroupedVarInfo *gvi, Index relid);
 #endif							/* CLAUSES_H */
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 77ca7ff837..bb6ec0f4e1 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -72,6 +72,7 @@ extern PGDLLIMPORT bool enable_partitionwise_aggregate;
 extern PGDLLIMPORT bool enable_parallel_append;
 extern PGDLLIMPORT bool enable_parallel_hash;
 extern PGDLLIMPORT bool enable_partition_pruning;
+extern PGDLLIMPORT bool enable_agg_pushdown;
 extern PGDLLIMPORT int constraint_exclusion;
 
 extern double clamp_row_est(double nrows);
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 4ba358e72d..b2f51fa119 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -68,9 +68,11 @@ extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel,
 				   List *subpaths, List *partial_subpaths,
 				   Relids required_outer,
 				   int parallel_workers, bool parallel_aware,
-				   List *partitioned_rels, double rows);
+				   List *partitioned_rels, double rows,
+				   RelAggKind agg_kind);
 extern MergeAppendPath *create_merge_append_path(PlannerInfo *root,
 						 RelOptInfo *rel,
+						 PathTarget *target,
 						 List *subpaths,
 						 List *pathkeys,
 						 Relids required_outer,
@@ -123,6 +125,7 @@ extern Relids calc_non_nestloop_required_outer(Path *outer_path, Path *inner_pat
 
 extern NestPath *create_nestloop_path(PlannerInfo *root,
 					 RelOptInfo *joinrel,
+					 PathTarget *target,
 					 JoinType jointype,
 					 JoinCostWorkspace *workspace,
 					 JoinPathExtraData *extra,
@@ -134,6 +137,7 @@ extern NestPath *create_nestloop_path(PlannerInfo *root,
 
 extern MergePath *create_mergejoin_path(PlannerInfo *root,
 					  RelOptInfo *joinrel,
+					  PathTarget *target,
 					  JoinType jointype,
 					  JoinCostWorkspace *workspace,
 					  JoinPathExtraData *extra,
@@ -148,6 +152,7 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root,
 
 extern HashPath *create_hashjoin_path(PlannerInfo *root,
 					 RelOptInfo *joinrel,
+					 PathTarget *target,
 					 JoinType jointype,
 					 JoinCostWorkspace *workspace,
 					 JoinPathExtraData *extra,
@@ -196,6 +201,14 @@ extern AggPath *create_agg_path(PlannerInfo *root,
 				List *qual,
 				const AggClauseCosts *aggcosts,
 				double numGroups);
+extern AggPath *create_agg_sorted_path(PlannerInfo *root,
+					   Path *subpath,
+					   bool check_pathkeys,
+					   double input_rows,
+					   RelAggKind agg_kind);
+extern AggPath *create_agg_hashed_path(PlannerInfo *root,
+					   Path *subpath,
+					   double input_rows, RelAggKind agg_kind);
 extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
 						 RelOptInfo *rel,
 						 Path *subpath,
@@ -253,7 +266,8 @@ extern LimitPath *create_limit_path(PlannerInfo *root, RelOptInfo *rel,
 
 extern Path *reparameterize_path(PlannerInfo *root, Path *path,
 					Relids required_outer,
-					double loop_count);
+					double loop_count,
+					RelAggKind agg_kind);
 extern Path *reparameterize_path_by_child(PlannerInfo *root, Path *path,
 							 RelOptInfo *child_rel);
 
@@ -271,7 +285,8 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root,
 			   RelOptInfo *outer_rel,
 			   RelOptInfo *inner_rel,
 			   SpecialJoinInfo *sjinfo,
-			   List **restrictlist_ptr);
+			   List **restrictlist_ptr,
+			   bool grouped);
 extern Relids min_join_parameterization(PlannerInfo *root,
 						  Relids joinrelids,
 						  RelOptInfo *outer_rel,
@@ -297,6 +312,11 @@ extern ParamPathInfo *find_param_path_info(RelOptInfo *rel,
 extern RelOptInfo *build_child_join_rel(PlannerInfo *root,
 					 RelOptInfo *outer_rel, RelOptInfo *inner_rel,
 					 RelOptInfo *parent_joinrel, List *restrictlist,
-					 SpecialJoinInfo *sjinfo, JoinType jointype);
-
+					 SpecialJoinInfo *sjinfo, JoinType jointype,
+					 bool grouped);
+extern RelAggInfo *create_rel_agg_info(PlannerInfo *root, RelOptInfo *rel);
+extern RelAggInfo *translate_rel_agg_info(PlannerInfo *root,
+					   RelAggInfo *agg_info,
+					   AppendRelInfo **appinfos,
+					   int nappinfos);
 #endif							/* PATHNODE_H */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index cafde307ad..760673d591 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -21,6 +21,7 @@
  * allpaths.c
  */
 extern PGDLLIMPORT bool enable_geqo;
+extern PGDLLIMPORT bool enable_agg_pushdown;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
@@ -50,17 +51,23 @@ extern PGDLLIMPORT join_search_hook_type join_search_hook;
 
 extern RelOptInfo *make_one_rel(PlannerInfo *root, List *joinlist);
 extern void set_dummy_rel_pathlist(RelOptInfo *rel);
-extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed,
+extern RelOptInfo *standard_join_search(PlannerInfo *root,
+					 int levels_needed,
 					 List *initial_rels);
 
 extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel,
 					  bool override_rows);
+
+extern bool create_grouped_path(PlannerInfo *root, RelOptInfo *rel,
+					Path *subpath, bool precheck,
+					bool partial, AggStrategy aggstrategy, RelAggKind agg_kind);
 extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages,
 						double index_pages, int max_workers);
 extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
 							Path *bitmapqual);
 extern void generate_partitionwise_join_paths(PlannerInfo *root,
-								  RelOptInfo *rel);
+								  RelOptInfo *rel,
+								  RelAggKind agg_kind);
 
 #ifdef OPTIMIZER_DEBUG
 extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
@@ -70,7 +77,8 @@ extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
  * indxpath.c
  *	  routines to generate index paths
  */
-extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
+extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel,
+				   RelAggKind agg_kind);
 extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
 							  List *restrictlist,
 							  List *exprlist, List *oprlist);
@@ -92,7 +100,8 @@ extern Expr *adjust_rowcompare_for_index(RowCompareExpr *clause,
  * tidpath.h
  *	  routines to generate tid paths
  */
-extern void create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel);
+extern void create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel,
+					 RelAggKind agg_kind);
 
 /*
  * joinpath.c
@@ -101,7 +110,8 @@ extern void create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel);
 extern void add_paths_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
 					 JoinType jointype, SpecialJoinInfo *sjinfo,
-					 List *restrictlist);
+					 List *restrictlist,
+					 RelAggKind agg_kind, bool do_aggregate);
 
 /*
  * joinrels.c
@@ -238,6 +248,7 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root,
 					   EquivalenceClass *eclass, Oid opfamily,
 					   int strategy, bool nulls_first);
 extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
-						List *live_childrels);
+						List *live_childrels,
+						RelAggKind agg_kind);
 
 #endif							/* PATHS_H */
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index c8ab0280d2..ac76375b31 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -76,6 +76,8 @@ extern void add_base_rels_to_query(PlannerInfo *root, Node *jtnode);
 extern void build_base_rel_tlists(PlannerInfo *root, List *final_tlist);
 extern void add_vars_to_targetlist(PlannerInfo *root, List *vars,
 					   Relids where_needed, bool create_new_ph);
+extern void add_grouped_base_rels_to_query(PlannerInfo *root);
+extern void add_grouped_vars_to_rels(PlannerInfo *root);
 extern void find_lateral_references(PlannerInfo *root);
 extern void create_lateral_join_info(PlannerInfo *root);
 extern List *deconstruct_jointree(PlannerInfo *root);
diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h
index 9fa52e1278..68c32e1caa 100644
--- a/src/include/optimizer/tlist.h
+++ b/src/include/optimizer/tlist.h
@@ -16,7 +16,6 @@
 
 #include "nodes/relation.h"
 
-
 extern TargetEntry *tlist_member(Expr *node, List *targetlist);
 extern TargetEntry *tlist_member_ignore_relabel(Expr *node, List *targetlist);
 
@@ -41,7 +40,6 @@ extern Node *get_sortgroupclause_expr(SortGroupClause *sgClause,
 						 List *targetList);
 extern List *get_sortgrouplist_exprs(List *sgClauses,
 						List *targetList);
-
 extern SortGroupClause *get_sortgroupref_clause(Index sortref,
 						List *clauses);
 extern SortGroupClause *get_sortgroupref_clause_noerr(Index sortref,
@@ -65,6 +63,13 @@ extern void split_pathtarget_at_srfs(PlannerInfo *root,
 						 PathTarget *target, PathTarget *input_target,
 						 List **targets, List **targets_contain_srfs);
 
+/* TODO Find the best location (position and in some cases even file) for the
+ * following ones. */
+extern List *replace_grouped_vars_with_aggrefs(PlannerInfo *root, List *src);
+extern void add_grouped_vars_to_target(PlannerInfo *root, PathTarget *target,
+						   List *expressions);
+extern GroupedVar *get_grouping_expression(List *gvis, Expr *expr);
+
 /* Convenience macro to get a PathTarget with valid cost/width fields */
 #define create_pathtarget(root, tlist) \
 	set_pathtarget_cost_width(root, make_pathtarget_from_tlist(tlist))
diff --git a/src/include/optimizer/var.h b/src/include/optimizer/var.h
index 43c53b5344..5a795c3231 100644
--- a/src/include/optimizer/var.h
+++ b/src/include/optimizer/var.h
@@ -36,5 +36,7 @@ extern bool contain_vars_of_level(Node *node, int levelsup);
 extern int	locate_var_of_level(Node *node, int levelsup);
 extern List *pull_var_clause(Node *node, int flags);
 extern Node *flatten_join_alias_vars(PlannerInfo *root, Node *node);
+extern GroupedVarInfo *find_grouped_var_info(PlannerInfo *root,
+					  GroupedVar *gvar);
 
 #endif							/* VAR_H */
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 95e44280c4..3a14fc6036 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -213,6 +213,9 @@ extern void estimate_hash_bucket_stats(PlannerInfo *root,
 						   Node *hashkey, double nbuckets,
 						   Selectivity *mcv_freq,
 						   Selectivity *bucketsize_frac);
+extern Size estimate_hashagg_tablesize(Path *path,
+						   const AggClauseCosts *agg_costs,
+						   double dNumGroups);
 
 extern List *deconstruct_indexquals(IndexPath *path);
 extern void genericcostestimate(PlannerInfo *root, IndexPath *path,
diff --git a/src/test/regress/expected/agg_pushdown.out b/src/test/regress/expected/agg_pushdown.out
new file mode 100644
index 0000000000..09b380d21f
--- /dev/null
+++ b/src/test/regress/expected/agg_pushdown.out
@@ -0,0 +1,316 @@
+BEGIN;
+CREATE TABLE agg_pushdown_parent (
+	i int primary key);
+CREATE TABLE agg_pushdown_child1 (
+	j int primary key,
+	parent int references agg_pushdown_parent,
+	v double precision);
+CREATE INDEX ON agg_pushdown_child1(parent);
+CREATE TABLE agg_pushdown_child2 (
+	k int primary key,
+	parent int references agg_pushdown_parent,
+	v double precision);
+INSERT INTO agg_pushdown_parent(i)
+SELECT n
+FROM generate_series(0, 7) AS s(n);
+INSERT INTO agg_pushdown_child1(j, parent, v)
+SELECT 64 * i + n, i, random()
+FROM generate_series(0, 63) AS s(n), agg_pushdown_parent;
+INSERT INTO agg_pushdown_child2(k, parent, v)
+SELECT 64 * i + n, i, random()
+FROM generate_series(0, 63) AS s(n), agg_pushdown_parent;
+ANALYZE;
+SET enable_agg_pushdown TO on;
+-- Perform scan of a table and partially aggregate the result.
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Finalize HashAggregate
+   Group Key: p.i
+   ->  Hash Join
+         Hash Cond: (p.i = c1.parent)
+         ->  Seq Scan on agg_pushdown_parent p
+         ->  Hash
+               ->  Partial HashAggregate
+                     Group Key: c1.parent
+                     ->  Seq Scan on agg_pushdown_child1 c1
+(9 rows)
+
+-- Scan index on agg_pushdown_child1(parent) column and partially aggregate
+-- the result using AGG_SORTED strategy.
+SET enable_seqscan TO off;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+                                         QUERY PLAN                                          
+---------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Nested Loop
+         ->  Partial GroupAggregate
+               Group Key: c1.parent
+               ->  Index Scan using agg_pushdown_child1_parent_idx on agg_pushdown_child1 c1
+         ->  Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+               Index Cond: (i = c1.parent)
+(8 rows)
+
+SET enable_seqscan TO on;
+-- Perform nestloop join between agg_pushdown_child1 and agg_pushdown_child2
+-- and partially aggregate the result.
+SET enable_nestloop TO on;
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO off;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+                                            QUERY PLAN                                             
+---------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Sort
+         Sort Key: p.i
+         ->  Nested Loop
+               ->  Partial HashAggregate
+                     Group Key: c1.parent
+                     ->  Nested Loop
+                           ->  Seq Scan on agg_pushdown_child1 c1
+                           ->  Index Scan using agg_pushdown_child2_pkey on agg_pushdown_child2 c2
+                                 Index Cond: (k = c1.j)
+                                 Filter: (c1.parent = parent)
+               ->  Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+                     Index Cond: (i = c1.parent)
+(14 rows)
+
+-- The same for hash join.
+SET enable_nestloop TO off;
+SET enable_hashjoin TO on;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+                                       QUERY PLAN                                       
+----------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Sort
+         Sort Key: p.i
+         ->  Hash Join
+               Hash Cond: (p.i = c1.parent)
+               ->  Seq Scan on agg_pushdown_parent p
+               ->  Hash
+                     ->  Partial HashAggregate
+                           Group Key: c1.parent
+                           ->  Hash Join
+                                 Hash Cond: ((c1.parent = c2.parent) AND (c1.j = c2.k))
+                                 ->  Seq Scan on agg_pushdown_child1 c1
+                                 ->  Hash
+                                       ->  Seq Scan on agg_pushdown_child2 c2
+(15 rows)
+
+-- The same for merge join.
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO on;
+SET enable_seqscan TO off;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+                                            QUERY PLAN                                             
+---------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Merge Join
+         Merge Cond: (c1.parent = p.i)
+         ->  Sort
+               Sort Key: c1.parent
+               ->  Partial HashAggregate
+                     Group Key: c1.parent
+                     ->  Merge Join
+                           Merge Cond: (c1.j = c2.k)
+                           Join Filter: (c1.parent = c2.parent)
+                           ->  Index Scan using agg_pushdown_child1_pkey on agg_pushdown_child1 c1
+                           ->  Index Scan using agg_pushdown_child2_pkey on agg_pushdown_child2 c2
+         ->  Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+(14 rows)
+
+-- Generic grouping expression.
+EXPLAIN (COSTS off)
+SELECT p.i / 2, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i / 2;
+                                               QUERY PLAN                                                
+---------------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: (((c1.parent / 2)))
+   ->  Sort
+         Sort Key: (((c1.parent / 2)))
+         ->  Merge Join
+               Merge Cond: (c1.parent = p.i)
+               ->  Sort
+                     Sort Key: c1.parent
+                     ->  Partial HashAggregate
+                           Group Key: (c1.parent / 2), c1.parent, c2.parent
+                           ->  Merge Join
+                                 Merge Cond: (c1.j = c2.k)
+                                 Join Filter: (c1.parent = c2.parent)
+                                 ->  Index Scan using agg_pushdown_child1_pkey on agg_pushdown_child1 c1
+                                 ->  Index Scan using agg_pushdown_child2_pkey on agg_pushdown_child2 c2
+               ->  Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+(16 rows)
+
+-- The same tests for parallel plans.
+RESET ALL;
+SET parallel_setup_cost TO 0;
+SET parallel_tuple_cost TO 0;
+SET min_parallel_table_scan_size TO 0;
+SET min_parallel_index_scan_size TO 0;
+SET max_parallel_workers_per_gather TO 4;
+SET enable_agg_pushdown TO on;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Finalize HashAggregate
+   Group Key: p.i
+   ->  Gather
+         Workers Planned: 2
+         ->  Parallel Hash Join
+               Hash Cond: (c1.parent = p.i)
+               ->  Partial HashAggregate
+                     Group Key: c1.parent
+                     ->  Parallel Seq Scan on agg_pushdown_child1 c1
+               ->  Parallel Hash
+                     ->  Parallel Seq Scan on agg_pushdown_parent p
+(11 rows)
+
+SET enable_seqscan TO off;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Gather Merge
+         Workers Planned: 2
+         ->  Nested Loop
+               ->  Partial GroupAggregate
+                     Group Key: c1.parent
+                     ->  Parallel Index Scan using agg_pushdown_child1_parent_idx on agg_pushdown_child1 c1
+               ->  Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+                     Index Cond: (i = c1.parent)
+(10 rows)
+
+SET enable_seqscan TO on;
+SET enable_nestloop TO on;
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO off;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+                                               QUERY PLAN                                                
+---------------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Gather Merge
+         Workers Planned: 2
+         ->  Sort
+               Sort Key: p.i
+               ->  Nested Loop
+                     ->  Partial HashAggregate
+                           Group Key: c1.parent
+                           ->  Nested Loop
+                                 ->  Parallel Seq Scan on agg_pushdown_child1 c1
+                                 ->  Index Scan using agg_pushdown_child2_pkey on agg_pushdown_child2 c2
+                                       Index Cond: (k = c1.j)
+                                       Filter: (c1.parent = parent)
+                     ->  Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+                           Index Cond: (i = c1.parent)
+(16 rows)
+
+SET enable_nestloop TO off;
+SET enable_hashjoin TO on;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+                                          QUERY PLAN                                          
+----------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Sort
+         Sort Key: p.i
+         ->  Gather
+               Workers Planned: 1
+               ->  Parallel Hash Join
+                     Hash Cond: (p.i = c1.parent)
+                     ->  Parallel Seq Scan on agg_pushdown_parent p
+                     ->  Parallel Hash
+                           ->  Partial HashAggregate
+                                 Group Key: c1.parent
+                                 ->  Parallel Hash Join
+                                       Hash Cond: ((c1.parent = c2.parent) AND (c1.j = c2.k))
+                                       ->  Parallel Seq Scan on agg_pushdown_child1 c1
+                                       ->  Parallel Hash
+                                             ->  Parallel Seq Scan on agg_pushdown_child2 c2
+(17 rows)
+
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO on;
+SET enable_seqscan TO off;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+                                                    QUERY PLAN                                                    
+------------------------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: p.i
+   ->  Gather Merge
+         Workers Planned: 2
+         ->  Merge Join
+               Merge Cond: (c1.parent = p.i)
+               ->  Sort
+                     Sort Key: c1.parent
+                     ->  Partial HashAggregate
+                           Group Key: c1.parent
+                           ->  Merge Join
+                                 Merge Cond: (c1.j = c2.k)
+                                 Join Filter: (c1.parent = c2.parent)
+                                 ->  Parallel Index Scan using agg_pushdown_child1_pkey on agg_pushdown_child1 c1
+                                 ->  Index Scan using agg_pushdown_child2_pkey on agg_pushdown_child2 c2
+               ->  Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+(16 rows)
+
+EXPLAIN (COSTS off)
+SELECT p.i / 2, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i / 2;
+                                                       QUERY PLAN                                                       
+------------------------------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Group Key: (((c1.parent / 2)))
+   ->  Sort
+         Sort Key: (((c1.parent / 2)))
+         ->  Gather
+               Workers Planned: 2
+               ->  Merge Join
+                     Merge Cond: (c1.parent = p.i)
+                     ->  Sort
+                           Sort Key: c1.parent
+                           ->  Partial HashAggregate
+                                 Group Key: (c1.parent / 2), c1.parent, c2.parent
+                                 ->  Merge Join
+                                       Merge Cond: (c1.j = c2.k)
+                                       Join Filter: (c1.parent = c2.parent)
+                                       ->  Parallel Index Scan using agg_pushdown_child1_pkey on agg_pushdown_child1 c1
+                                       ->  Index Scan using agg_pushdown_child2_pkey on agg_pushdown_child2 c2
+                     ->  Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p
+(18 rows)
+
+ROLLBACK;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 16f979c8d9..6d406c65cc 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -98,6 +98,9 @@ test: rules psql_crosstab amutils
 test: select_parallel
 test: write_parallel
 
+# this one runs parallel workers too
+test: agg_pushdown
+
 # no relation related tests can be put in this group
 test: publication subscription
 
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 42632be675..f480c7aaa0 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -139,6 +139,7 @@ test: rules
 test: psql_crosstab
 test: select_parallel
 test: write_parallel
+test: agg_pushdown
 test: publication
 test: subscription
 test: amutils
diff --git a/src/test/regress/sql/agg_pushdown.sql b/src/test/regress/sql/agg_pushdown.sql
new file mode 100644
index 0000000000..05e2f5504f
--- /dev/null
+++ b/src/test/regress/sql/agg_pushdown.sql
@@ -0,0 +1,137 @@
+BEGIN;
+
+CREATE TABLE agg_pushdown_parent (
+	i int primary key);
+
+CREATE TABLE agg_pushdown_child1 (
+	j int primary key,
+	parent int references agg_pushdown_parent,
+	v double precision);
+
+CREATE INDEX ON agg_pushdown_child1(parent);
+
+CREATE TABLE agg_pushdown_child2 (
+	k int primary key,
+	parent int references agg_pushdown_parent,
+	v double precision);
+
+INSERT INTO agg_pushdown_parent(i)
+SELECT n
+FROM generate_series(0, 7) AS s(n);
+
+INSERT INTO agg_pushdown_child1(j, parent, v)
+SELECT 64 * i + n, i, random()
+FROM generate_series(0, 63) AS s(n), agg_pushdown_parent;
+
+INSERT INTO agg_pushdown_child2(k, parent, v)
+SELECT 64 * i + n, i, random()
+FROM generate_series(0, 63) AS s(n), agg_pushdown_parent;
+
+ANALYZE;
+
+SET enable_agg_pushdown TO on;
+
+-- Perform scan of a table and partially aggregate the result.
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+
+-- Scan index on agg_pushdown_child1(parent) column and partially aggregate
+-- the result using AGG_SORTED strategy.
+SET enable_seqscan TO off;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+
+SET enable_seqscan TO on;
+
+-- Perform nestloop join between agg_pushdown_child1 and agg_pushdown_child2
+-- and partially aggregate the result.
+SET enable_nestloop TO on;
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO off;
+
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+
+-- The same for hash join.
+SET enable_nestloop TO off;
+SET enable_hashjoin TO on;
+
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+
+-- The same for merge join.
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO on;
+SET enable_seqscan TO off;
+
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+
+-- Generic grouping expression.
+EXPLAIN (COSTS off)
+SELECT p.i / 2, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i / 2;
+
+-- The same tests for parallel plans.
+RESET ALL;
+
+SET parallel_setup_cost TO 0;
+SET parallel_tuple_cost TO 0;
+SET min_parallel_table_scan_size TO 0;
+SET min_parallel_index_scan_size TO 0;
+SET max_parallel_workers_per_gather TO 4;
+
+SET enable_agg_pushdown TO on;
+
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+
+SET enable_seqscan TO off;
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
+AS c1 ON c1.parent = p.i GROUP BY p.i;
+
+SET enable_seqscan TO on;
+
+SET enable_nestloop TO on;
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO off;
+
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+
+SET enable_nestloop TO off;
+SET enable_hashjoin TO on;
+
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+
+SET enable_hashjoin TO off;
+SET enable_mergejoin TO on;
+SET enable_seqscan TO off;
+
+EXPLAIN (COSTS off)
+SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
+
+EXPLAIN (COSTS off)
+SELECT p.i / 2, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
+agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
+c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i / 2;
+
+ROLLBACK;