POC: GROUP BY optimization

Started by Teodor Sigaevover 7 years ago172 messages

teodor@sigaev.ru

over 7 years ago

2 attachment(s)

Hi!

As far as I known, columns in GROUP BY could be reordered without loss of
correctness. But reorder could give a benefits in performance. Suggested patch
implements that in several ways:

1) Reorder GROUP BY columns by number of unique values in descent order. Simple
example shows a performance difference by two times (see
opt_group_by_demostrator.sql, on my notebook first two queries demonstrate
that). The idea is: if first column is unique then sort comparator will never
compute difference of following columns.

2) Reorder GROUP BY columns to match existing pathkeys which are result of index
scan or ORDER BY clause. It prevents to add sort node - suppose, it's a clear win.

3) Patch makes suggestion to use index by GROUP BY clause, but unlike to ORDER
BY or merge join case it doesn't pay attention to actual order of columns
because of 2)

Patch doesn't change any cost estimation computation, although 1) could take an
advantage of it.

Some issues/problems/notices:

1) I didn't play around GROUPING SETS at all. As I can see, current coding
prevents any optimization around it and, suppose, it should be addressed to
another patch

2) I'm not completely satisfied with counting of number of groups per column, it
looks ugly without refactoring estimate_num_groups(). At least, now it could be
called multiple times for each column. May be, this number should be added to
PathKey structure?

3) EquivalenceClass->ec_sortref. If planner faced with column first time in
WHERE clause it doesn't fill target reference field because it is unknown yet.
Patch looks for accordance of PathKey (group_pathkeys) and SortGroupClause
(groupClause) and fails in this case. So, get_eclass_for_sort_expr() now updates
ec_sortref field if it's not initialized yet.

4) Algorithms to reorder columns is proportional to N^2 where N is number of
columns, but I hope it isn't a problem because number of GROUP BY columns isn't
very big usually.

--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/

Attachments:

opt_group_by-v5.patchtext/x-patch; name=opt_group_by-v5.patchDownload

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index b22b36ec0e..c073eb061a 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -686,7 +686,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ec66cb9c3c..6f61bd20de 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -26,6 +26,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -327,6 +328,192 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * Find a group clause by it's tle reference
+ */
+static SortGroupClause*
+find_sort_group_clause_by_sortref(List *groupClauses, Index tleSortGroupRef)
+{
+	ListCell	*cell;
+
+	foreach(cell, groupClauses)
+	{
+		SortGroupClause *sgc = (SortGroupClause*) lfirst(cell);
+
+		if (sgc->tleSortGroupRef == tleSortGroupRef)
+			return sgc;
+	}
+
+	elog(ERROR, "could not find target reference as sort group clause");
+
+	return NULL;
+}
+
+/*
+ * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
+ * returns new lists,  original GROUP BY lists stay untouched.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List		*new_group_pathkeys= NIL,
+				*new_group_clauses = NIL;
+	ListCell	*key;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * For each pathkey it tries to find corresponding GROUP BY pathkey and
+	 * clause.
+	 */
+	foreach(key, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(key);
+		SortGroupClause	*sgc;
+
+		/*
+		 * Pathkey should use the same allocated struct, so, equiality of
+		 * pointers is enough
+		 */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = find_sort_group_clause_by_sortref(*group_clauses,
+												pathkey->pk_eclass->ec_sortref);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	n = list_length(new_group_pathkeys);
+
+	/*
+	 * Just append the rest of pathkeys and clauses
+	 */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+												 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * Work struct from sorting pathkeys
+ */
+typedef struct PathKeyNumGroup
+{
+	PathKey	   *key;
+	double		numGroups;
+	int			order; /* just to make qsort stable */
+} PathKeyNumGroup;
+
+static int
+pathkey_numgroups_cmp(const void *a, const void *b)
+{
+	const PathKeyNumGroup *pka = (const PathKeyNumGroup*)a,
+						  *pkb = (const PathKeyNumGroup*)b;
+
+	if (pka->numGroups == pkb->numGroups)
+		/* make qsort stable */
+		return (pka->order > pkb->order) ? 1 : -1;
+	return (pka->numGroups > pkb->numGroups) ? -1 : 1;
+}
+
+/*
+ * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
+ * speedup sorting. Returns newly allocated lists, old ones stay untouched.
+ */
+void
+sort_group_key_by_distinct(PlannerInfo *root, double nrows, List *targetList,
+						   List **groupPathkeys, List **groupClauses,
+						   int startNum)
+{
+	PathKeyNumGroup	   *keys;
+	int					nkeys = list_length(*groupPathkeys) - startNum;
+	List			   *pathkeyExprList,
+					   *newGroupPathkeys = NIL,
+					   *newGroupClauses = NIL;
+	ListCell		   *cell;
+	int					i = 0;
+
+	if (nkeys < 2)
+		return; /* nothing to do */
+
+	keys = palloc(nkeys * sizeof(*keys));
+	pathkeyExprList = list_make1(NULL);
+
+	/*
+	 * for each pathkeys which aren't supported underlied index (or something
+	 * else) we count a number of group to sort them in descending order
+	 */
+	for_each_cell(cell, list_nth_cell(*groupPathkeys, startNum))
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+		Node			*pathkeyExpr;
+		SortGroupClause	*sgc;
+
+		sgc = find_sort_group_clause_by_sortref(*groupClauses,
+												pathkey->pk_eclass->ec_sortref);
+		pathkeyExpr = get_sortgroupclause_expr(sgc, targetList);
+		linitial(pathkeyExprList) = pathkeyExpr;
+		keys[i].numGroups= estimate_num_groups(root, pathkeyExprList,
+											   nrows, NULL);
+		keys[i].key = pathkey;
+		keys[i].order = i;
+
+		i++;
+	}
+
+	list_free(pathkeyExprList);
+
+	qsort(keys, nkeys, sizeof(*keys), pathkey_numgroups_cmp);
+	i = 0;
+
+	/*
+	 * Construct result value
+	 */
+	foreach(cell, *groupPathkeys)
+	{
+		PathKey	   *key;
+		ListCell   *gcell;
+
+		if (i < startNum)
+			/* presorted head */
+			key = (PathKey *) lfirst(cell);
+		else
+			/* key, sorted above */
+			key = keys[i - startNum].key;
+
+		newGroupPathkeys = lappend(newGroupPathkeys, key);
+
+		/*
+		 * Order GROUP BY clauses the same as path keys
+		 * XXX what to do if t's not found?
+		 */
+		foreach(gcell, *groupClauses)
+		{
+			SortGroupClause *sgc = (SortGroupClause*) lfirst(gcell);
+
+			if (key->pk_eclass->ec_sortref == sgc->tleSortGroupRef)
+				newGroupClauses = lappend(newGroupClauses, sgc);
+		}
+
+		i++;
+	}
+
+	pfree(keys);
+
+	/* Just append the rest GROUP BY clauses */
+	newGroupClauses = list_concat_unique_ptr(newGroupClauses, *groupClauses);
+
+	*groupPathkeys = newGroupPathkeys;
+	*groupClauses = newGroupClauses;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
@@ -1611,6 +1798,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return 0;					/* path ordering not useful */
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered, so we don't bother about actual order in
+ * pathkeys
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	if (root->group_pathkeys == NIL)
+		return 0;				/* no special ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1625,6 +1845,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1660,6 +1883,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */ 
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 67a2c7a581..16d607348a 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6182,18 +6182,42 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			if (parse->groupingSets)
+			{
+				is_sorted = pathkeys_contained_in(group_pathkeys,
+												  path->pathkeys);
+			}
+			else
+			{
+				n_preordered_groups =
+						group_keys_reorder_by_pathkeys(path->pathkeys,
+													   &group_pathkeys,
+													   &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
+			}
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_path || is_sorted)
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
 				if (!is_sorted)
+				{
+					if (!parse->groupingSets)
+						sort_group_key_by_distinct(root,
+												   path->rows,
+												   extra->targetList,
+												   &group_pathkeys,
+												   &group_clauses,
+												   n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6213,9 +6237,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6230,7 +6254,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6251,19 +6275,34 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
+				List	   *group_pathkeys = root->group_pathkeys,
+						   *group_clauses = parse->groupClause;
+				bool		is_sorted;
+				int			n_preordered_groups;
+
+				n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																	 &group_pathkeys,
+																	 &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
 				/*
 				 * Insert a Sort node, if required.  But there's no point in
 				 * sorting anything but the cheapest path.
 				 */
-				if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+				if (!is_sorted)
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
+					sort_group_key_by_distinct(root,
+											   path->rows,
+											   extra->targetList,
+											   &group_pathkeys,
+											   &group_clauses,
+											   n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 				}
 
@@ -6273,9 +6312,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6284,7 +6323,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 			}
@@ -6521,18 +6560,32 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_total_path || is_sorted)
 			{
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					sort_group_key_by_distinct(root,
+											   path->rows,
+											   extra->targetList,
+											   &group_pathkeys,
+											   &group_clauses,
+											   n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_path(partially_grouped_rel, (Path *)
@@ -6540,9 +6593,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 											 partially_grouped_rel,
 											 path,
 											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 NIL,
 											 agg_partial_costs,
 											 dNumPartialGroups));
@@ -6551,7 +6604,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 							 create_group_path(root,
 											   partially_grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   NIL,
 											   dNumPartialGroups));
 			}
@@ -6565,18 +6618,33 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_partial_path || is_sorted)
 			{
+
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					sort_group_key_by_distinct(root,
+											   path->rows,
+											   extra->targetList,
+											   &group_pathkeys,
+											   &group_clauses,
+											   n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
@@ -6584,9 +6652,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 group_clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 group_clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6595,7 +6663,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   group_clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index cafde307ad..fb211cc32f 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -190,6 +190,15 @@ typedef enum
 
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
+extern void sort_group_key_by_distinct(PlannerInfo *root,
+									   double nrows,
+									   List *targetList,
+									   List **groupPathkeys,
+									   List **groupClauses,
+									   int	startNum);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f85e913850..081ac4ac28 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2065,3 +2065,152 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns by uniqueness
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, c, d, v
+   ->  Sort
+         Sort Key: p, c, d, v
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index b983f9c506..3915a837f0 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1140,7 +1140,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1284,7 +1284,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 054a381dad..adecec8872 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -281,9 +281,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b
+   Group Key: b, a
    ->  Sort
-         Sort Key: a, b
+         Sort Key: b, a
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -292,9 +292,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c
+   Group Key: b, a, c
    ->  Sort
-         Sort Key: a, b, c
+         Sort Key: b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -303,9 +303,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c, d
+   Group Key: d, b, a, c
    ->  Sort
-         Sort Key: a, b, c, d
+         Sort Key: d, b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 506d0442d7..b6b2c5c26e 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -907,3 +907,72 @@ EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1;
 SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
+
+-- GROUP BY optimization by reorder columns by uniqueness
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+

opt_group_by_demostrator.sqlapplication/sql; name=opt_group_by_demostrator.sqlDownload

Teodor Sigaev

teodor@sigaev.ru

over 7 years ago

In reply to: Teodor Sigaev (#1)

1 attachment(s)

Re: POC: GROUP BY optimization

Cosmetics change: remove find_sort_group_clause_by_sortref() function added in
v5 patch version because it duplicates existsing get_sortgroupref_clause().

--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/

Attachments:

opt_group_by-v6.patchtext/x-patch; name=opt_group_by-v6.patchDownload

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index b22b36ec0e..c073eb061a 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -686,7 +686,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ec66cb9c3c..ca3c78dbbe 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -26,6 +26,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -327,6 +328,171 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
+ * returns new lists,  original GROUP BY lists stay untouched.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List		*new_group_pathkeys= NIL,
+				*new_group_clauses = NIL;
+	ListCell	*key;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * For each pathkey it tries to find corresponding GROUP BY pathkey and
+	 * clause.
+	 */
+	foreach(key, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(key);
+		SortGroupClause	*sgc;
+
+		/*
+		 * Pathkey should use the same allocated struct, so, equiality of
+		 * pointers is enough
+		 */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	n = list_length(new_group_pathkeys);
+
+	/*
+	 * Just append the rest of pathkeys and clauses
+	 */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+												 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * Work struct from sorting pathkeys
+ */
+typedef struct PathKeyNumGroup
+{
+	PathKey	   *key;
+	double		numGroups;
+	int			order; /* just to make qsort stable */
+} PathKeyNumGroup;
+
+static int
+pathkey_numgroups_cmp(const void *a, const void *b)
+{
+	const PathKeyNumGroup *pka = (const PathKeyNumGroup*)a,
+						  *pkb = (const PathKeyNumGroup*)b;
+
+	if (pka->numGroups == pkb->numGroups)
+		/* make qsort stable */
+		return (pka->order > pkb->order) ? 1 : -1;
+	return (pka->numGroups > pkb->numGroups) ? -1 : 1;
+}
+
+/*
+ * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
+ * speedup sorting. Returns newly allocated lists, old ones stay untouched.
+ */
+void
+sort_group_key_by_distinct(PlannerInfo *root, double nrows, List *targetList,
+						   List **groupPathkeys, List **groupClauses,
+						   int startNum)
+{
+	PathKeyNumGroup	   *keys;
+	int					nkeys = list_length(*groupPathkeys) - startNum;
+	List			   *pathkeyExprList,
+					   *newGroupPathkeys = NIL,
+					   *newGroupClauses = NIL;
+	ListCell		   *cell;
+	int					i = 0;
+
+	if (nkeys < 2)
+		return; /* nothing to do */
+
+	keys = palloc(nkeys * sizeof(*keys));
+	pathkeyExprList = list_make1(NULL);
+
+	/*
+	 * for each pathkeys which aren't supported underlied index (or something
+	 * else) we count a number of group to sort them in descending order
+	 */
+	for_each_cell(cell, list_nth_cell(*groupPathkeys, startNum))
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+		Node			*pathkeyExpr;
+		SortGroupClause	*sgc;
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *groupClauses);
+		pathkeyExpr = get_sortgroupclause_expr(sgc, targetList);
+		linitial(pathkeyExprList) = pathkeyExpr;
+		keys[i].numGroups= estimate_num_groups(root, pathkeyExprList,
+											   nrows, NULL);
+		keys[i].key = pathkey;
+		keys[i].order = i;
+
+		i++;
+	}
+
+	list_free(pathkeyExprList);
+
+	qsort(keys, nkeys, sizeof(*keys), pathkey_numgroups_cmp);
+	i = 0;
+
+	/*
+	 * Construct result value
+	 */
+	foreach(cell, *groupPathkeys)
+	{
+		PathKey	   *key;
+		ListCell   *gcell;
+
+		if (i < startNum)
+			/* presorted head */
+			key = (PathKey *) lfirst(cell);
+		else
+			/* key, sorted above */
+			key = keys[i - startNum].key;
+
+		newGroupPathkeys = lappend(newGroupPathkeys, key);
+
+		/*
+		 * Order GROUP BY clauses the same as path keys
+		 * XXX what to do if t's not found?
+		 */
+		foreach(gcell, *groupClauses)
+		{
+			SortGroupClause *sgc = (SortGroupClause*) lfirst(gcell);
+
+			if (key->pk_eclass->ec_sortref == sgc->tleSortGroupRef)
+				newGroupClauses = lappend(newGroupClauses, sgc);
+		}
+
+		i++;
+	}
+
+	pfree(keys);
+
+	/* Just append the rest GROUP BY clauses */
+	newGroupClauses = list_concat_unique_ptr(newGroupClauses, *groupClauses);
+
+	*groupPathkeys = newGroupPathkeys;
+	*groupClauses = newGroupClauses;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
@@ -1611,6 +1777,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return 0;					/* path ordering not useful */
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered, so we don't bother about actual order in
+ * pathkeys
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	if (root->group_pathkeys == NIL)
+		return 0;				/* no special ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1625,6 +1824,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1660,6 +1862,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */ 
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 67a2c7a581..16d607348a 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6182,18 +6182,42 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			if (parse->groupingSets)
+			{
+				is_sorted = pathkeys_contained_in(group_pathkeys,
+												  path->pathkeys);
+			}
+			else
+			{
+				n_preordered_groups =
+						group_keys_reorder_by_pathkeys(path->pathkeys,
+													   &group_pathkeys,
+													   &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
+			}
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_path || is_sorted)
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
 				if (!is_sorted)
+				{
+					if (!parse->groupingSets)
+						sort_group_key_by_distinct(root,
+												   path->rows,
+												   extra->targetList,
+												   &group_pathkeys,
+												   &group_clauses,
+												   n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6213,9 +6237,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6230,7 +6254,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6251,19 +6275,34 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
+				List	   *group_pathkeys = root->group_pathkeys,
+						   *group_clauses = parse->groupClause;
+				bool		is_sorted;
+				int			n_preordered_groups;
+
+				n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																	 &group_pathkeys,
+																	 &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
 				/*
 				 * Insert a Sort node, if required.  But there's no point in
 				 * sorting anything but the cheapest path.
 				 */
-				if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+				if (!is_sorted)
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
+					sort_group_key_by_distinct(root,
+											   path->rows,
+											   extra->targetList,
+											   &group_pathkeys,
+											   &group_clauses,
+											   n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 				}
 
@@ -6273,9 +6312,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6284,7 +6323,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 			}
@@ -6521,18 +6560,32 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_total_path || is_sorted)
 			{
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					sort_group_key_by_distinct(root,
+											   path->rows,
+											   extra->targetList,
+											   &group_pathkeys,
+											   &group_clauses,
+											   n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_path(partially_grouped_rel, (Path *)
@@ -6540,9 +6593,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 											 partially_grouped_rel,
 											 path,
 											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 NIL,
 											 agg_partial_costs,
 											 dNumPartialGroups));
@@ -6551,7 +6604,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 							 create_group_path(root,
 											   partially_grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   NIL,
 											   dNumPartialGroups));
 			}
@@ -6565,18 +6618,33 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_partial_path || is_sorted)
 			{
+
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					sort_group_key_by_distinct(root,
+											   path->rows,
+											   extra->targetList,
+											   &group_pathkeys,
+											   &group_clauses,
+											   n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
@@ -6584,9 +6652,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 group_clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 group_clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6595,7 +6663,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   group_clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index cafde307ad..fb211cc32f 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -190,6 +190,15 @@ typedef enum
 
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
+extern void sort_group_key_by_distinct(PlannerInfo *root,
+									   double nrows,
+									   List *targetList,
+									   List **groupPathkeys,
+									   List **groupClauses,
+									   int	startNum);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f85e913850..081ac4ac28 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2065,3 +2065,152 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns by uniqueness
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, c, d, v
+   ->  Sort
+         Sort Key: p, c, d, v
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index b983f9c506..3915a837f0 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1140,7 +1140,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1284,7 +1284,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 054a381dad..adecec8872 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -281,9 +281,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b
+   Group Key: b, a
    ->  Sort
-         Sort Key: a, b
+         Sort Key: b, a
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -292,9 +292,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c
+   Group Key: b, a, c
    ->  Sort
-         Sort Key: a, b, c
+         Sort Key: b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -303,9 +303,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c, d
+   Group Key: d, b, a, c
    ->  Sort
-         Sort Key: a, b, c, d
+         Sort Key: d, b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 506d0442d7..b6b2c5c26e 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -907,3 +907,72 @@ EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1;
 SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
+
+-- GROUP BY optimization by reorder columns by uniqueness
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 7 years ago

In reply to: Teodor Sigaev (#2)

Re: POC: GROUP BY optimization

Hi Teodor,

Thanks for the patch. This (missing) optimization popped-up repeatedly
recently, and I was planning to look into it for PG12. So now I don't
have to, because you've done all the hard work ;-)

I've have done only very basic review so far, but ISTM the general
approach of considering additional indexes in create_index_paths is
correct. That being said, I do have a couple of comments:

1) add_path() ensures that we only keep the one cheapest path sorted
path for each pathkeys. This patch somewhat defeats that because it
considers additional pathkeys (essentially any permutation of group
keys) as interesting. So we may end up with more paths in the list.

I wonder if we should make the add_path() logic smarter to recognize
when two paths have different pathkeys but were generated to match the
same grouping, to reduce the number of paths added by this optimization.
Currently we do that for each pathkeys list independently, but we're
considering many more pathkeys orderings ...

That being said, maybe this concern is moot - to generate large number
of new paths, there would have to be a lot of indexes matching the group
by clause (as a prefix), and the group by clause would need to be fairly
large (to have many permutations). That seems like a fairly rare case. I
do know a couple of people who do create large numbers of indexes on
tables, but those are usually single-column indexes.

2) sort reordering based on ndistinct estimates

This part of the patch (reordering the columns in an explicit sort node)
seems to be largely independent of the "consider other indexes" part. So
I do suggest separating those two, and discussing them independently.

But thinking about this optimization, I'm worried it relies on a couple
of important assumptions. For now those decisions could have be made by
the person writing the SQL query, but this optimization makes that
impossible. So we really need to get this right.

For example, it seems to disregard that different data types have
different comparison costs. For example comparing bytea will be far more
expensive compared to int4, so it may be much more efficient to compare
int4 columns first, even if there are far fewer distinct values in them.

This costing is probably related to abbreviated keys too, which is a
huge improvement for text and other varlena-based types.

Also, simply sorting the columns by their ndistinct estimate is somewhat
naive, because it assumes the columns are independent. Imagine for
example a table with three columns:

CREATE TABLE t (a INT, b INT, c INT);

INSERT INTO t SELECT mod(i,100), mod(i,100)/2, 49*random()
FROM generate_series(1,1000000) s(i);

Clearly, "a" has the most distinct values (100), while both "b" and "c"
have 50 distinct values. But "b" does not actually split any of the "a"
groups, while "c" does:

select count(*) from (select 1 from t group by a,b) foo;
count
-------
100
(1 row)

select count(*) from (select 1 from t group by a,c) foo;
count
-------
5000
(1 row)

So clearly, when evaluating GROUP BY a,b,c it'd be more efficient to use
"(a,c,b)" than "(a,b,c)" but there is no way to decide this merely using
per-column ndistinct values. Luckily, we now have ndistinct multi-column
coefficients which could be used to decide this I believe (but I haven't
tried).

The real issue however is that this decision can't be made entirely
locally. Consider for example this:

explain select a,b,c, count(*) from t group by a,b,c order by c,b,a;
QUERY PLAN

------------------------------------------------------------------------------
Sort (cost=156010.16..156260.16 rows=100000 width=20)
Sort Key: c, b, a
-> GroupAggregate (cost=132154.34..145654.34 rows=100000 width=20)
Group Key: a, c, b
-> Sort (cost=132154.34..134654.34 rows=1000000 width=12)
Sort Key: a, c, b
-> Seq Scan on t (cost=0.00..15406.00 rows=1000000
width=12)
(7 rows)

while without the patch, this would happen:

explain select a,b,c, count(*) from t group by a,b,c order by c,b,a;
QUERY PLAN

------------------------------------------------------------------------
GroupAggregate (cost=132154.34..145654.34 rows=100000 width=20)
Group Key: c, b, a
-> Sort (cost=132154.34..134654.34 rows=1000000 width=12)
Sort Key: c, b, a
-> Seq Scan on t (cost=0.00..15406.00 rows=1000000 width=12)
(5 rows)

Which is clearly cheaper (at least according to the optimizer) than
doing two separate sorts. So the optimization actually does the wrong
thing here, and it needs to somehow consider the other ordering
requirements (in this case ORDER BY) - either by generating multiple
paths with different orderings or some heuristics.

I'm also wondering how freely we can change the group by result
ordering. Assume you disable hash aggregate and parallel query -
currently we are guaranteed to use group aggregate that produces exactly
the ordering as specified in GROUP BY, but this patch removes that
"guarantee" and we can produce arbitrary permutation of the ordering.
But as I argued in other threads, such implicit guarantees are really
fragile, can silently break for arbitrary reasons (say, parallel query
will do just that) and can be easily fixed by adding a proper ORDER BY.
So I don't think this is an issue.

The other random thought is how will/could this interact with the
incremental sorts patch. I know Alexander wanted to significantly limit
where we actually consider the incremental sort, but I'm not sure if
this was one of those places or not (is sure looks like a place where we
would greatly benefit from it).

So to summarize this initial review - I do suggest splitting the patch
into two parts. One that does the index magic, and one for this
reordering optimization. The first part (additional indexes) seems quite
fairly safe, likely to get committable soon. The other part (ndistinct
reordering) IMHO requires more thought regarding costing and interaction
with other query parts.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Teodor Sigaev

teodor@sigaev.ru

over 7 years ago

In reply to: Tomas Vondra (#3)

2 attachment(s)

Re: POC: GROUP BY optimization

Thanks for the patch. This (missing) optimization popped-up repeatedly recently,
and I was planning to look into it for PG12. So now I don't have to, because
you've done all the hard work ;-)

You are welcome. Actually one of out customers faced the problem with GROUP BY
column order and exactly with reordering without any indexes, you mean it as
problem 2). Index optimization was noticed by me later. But based on your
suggested patch's order I split the patch to index and non-index part and
second part depends of first one. They touch the same part of code and they
could not be independent

1) add_path() ensures that we only keep the one cheapest path sorted path for
each pathkeys. This patch somewhat defeats that because it considers additional
pathkeys (essentially any permutation of group keys) as interesting. So we may
end up with more paths in the list.

Seems, index scan here could give benefits here only if:
1) it's a index only scan
2) it's a index full (opposite to only) scan but physical order of heap is
close to logical index order (ie table is clustered)

In other cases costs of disk seeking will be very high. But on this stage of
planing we don't know that facts yet. So we couldn't make a good decision here
and should believe in add_path() logic.

I wonder if we should make the add_path() logic smarter to recognize when two
paths have different pathkeys but were generated to match the same grouping,
to reduce the number of paths added by this optimization. Currently we do

that > for each pathkeys list independently, but we're considering many more
pathkeys > orderings ...

Hm. I tend to say no.
select .. from t1 group by a, b
union
select .. from t2 group by a, b

t1 and t2 could have different set of indexes and different distribution, so
locally it could be cheaper to use one index (for example, one defined as (b, a)
and second as (a,b,c,d) - second is larger) but totally - another (example:
second table doesn't have (b,a) index)

2) sort reordering based on ndistinct estimates

But thinking about this optimization, I'm worried it relies on a couple of
important assumptions. For now those decisions could have be made by the person
writing the SQL query, but this optimization makes that impossible. So we really
need to get this right.

Hm, sql by design should not be used that way, but, of course, it's used :(

For example, it seems to disregard that different data types have different
comparison costs. For example comparing bytea will be far more expensive
compared to int4, so it may be much more efficient to compare int4 columns
first, even if there are far fewer distinct values in them.

as I can see cost_sort() doesn't pay attention to this details. And it should be
a separated patch to improve that.

Also, simply sorting the columns by their ndistinct estimate is somewhat naive,
because it assumes the columns are independent. Imagine for example a table with
three columns:
So clearly, when evaluating GROUP BY a,b,c it'd be more efficient to use
"(a,c,b)" than "(a,b,c)" but there is no way to decide this merely using
per-column ndistinct values. Luckily, we now have ndistinct multi-column
coefficients which could be used to decide this I believe (but I haven't tried).

Agree, but I don't know how to use it here. Except, may be:
1) first column - the column with bigger estimated number of groups
2) second column - the pair of (first, second) with bigger estimated number of
groups
3) third column - the triple of (first, second, third) with bigger ...

But seems even with that algorithm, ISTM, it could be implemented in cheaper manner.

The real issue however is that this decision can't be made entirely locally.
Consider for example this:

О©╫О©╫О©╫ explain select a,b,c, count(*) from t group by a,b,c order by c,b,a;

Which is clearly cheaper (at least according to the optimizer) than doing two
separate sorts. So the optimization actually does the wrong thing here, and it
needs to somehow consider the other ordering requirements (in this case ORDER
BY) - either by generating multiple paths with different orderings or some
heuristics.

Hm, thank you. I consider it is a bug of my implementation - basic idea was that
we try to match already existing or needed order and only if we fail or have
unmatched tail of pathkey list than we will try to find cheapest column order.
Fixed in v7 (0002-opt_group_by_index_and_order-v7.patch), but may be by naive
way: if we don't have a path pathkey first try to reorder columns accordingly to
order by clause. Test for your is also added.

I'm also wondering how freely we can change the group by result ordering. Assume
you disable hash aggregate and parallel query - currently we are guaranteed to
use group aggregate that produces exactly the ordering as specified in GROUP BY,
but this patch removes that "guarantee" and we can produce arbitrary permutation
of the ordering. But as I argued in other threads, such implicit guarantees are
really fragile, can silently break for arbitrary reasons (say, parallel query
will do just that) and can be easily fixed by adding a proper ORDER BY. So I
don't think this is an issue.

Agree. SQL by design doesn't give a warranty of particular order without
explicit ORDER BY clause.

The other random thought is how will/could this interact with the incremental
sorts patch. I know Alexander wanted to significantly limit where we actually
consider the incremental sort, but I'm not sure if this was one of those places
or not (is sure looks like a place where we would greatly benefit from it).

Seems, they should not directly interact. Patch tries to find cheapest column
order, Alexander's patch tries to make sort cheaper - they are a different
tasks. But I will try.

So to summarize this initial review - I do suggest splitting the patch into two
parts. One that does the index magic, and one for this reordering optimization.
The first part (additional indexes) seems quite fairly safe, likely to get
committable soon. The other part (ndistinct reordering) IMHO requires more
thought regarding costing and interaction with other query parts.

Thank you for review!

--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/

Attachments:

0002-opt_group_by_index_and_order-v7.patchtext/x-patch; name=0002-opt_group_by_index_and_order-v7.patchDownload

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 4f93afdebc..8897157817 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -380,6 +380,128 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	return n;
 }
 
+/*
+ * Work struct from sorting pathkeys
+ */
+typedef struct PathKeyNumGroup
+{
+	PathKey	   *key;
+	double		numGroups;
+	int			order; /* just to make qsort stable */
+} PathKeyNumGroup;
+
+static int
+pathkey_numgroups_cmp(const void *a, const void *b)
+{
+	const PathKeyNumGroup *pka = (const PathKeyNumGroup*)a,
+						  *pkb = (const PathKeyNumGroup*)b;
+
+	if (pka->numGroups == pkb->numGroups)
+		/* make qsort stable */
+		return (pka->order > pkb->order) ? 1 : -1;
+	return (pka->numGroups > pkb->numGroups) ? -1 : 1;
+}
+
+/*
+ * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
+ * speedup sorting. Returns newly allocated lists, old ones stay untouched.
+ * startNum defines a head of list which order should be prevented.
+ */
+void
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List *target_list,
+							  List **group_pathkeys, List **group_clauses,
+							  int startNum)
+{
+	PathKeyNumGroup	   *keys;
+	int					nkeys = list_length(*group_pathkeys) - startNum;
+	List			   *pathkeyExprList,
+					   *new_group_pathkeys = NIL,
+					   *new_group_clauses = NIL;
+	ListCell		   *cell;
+	int					i = 0;
+
+	if (nkeys < 2)
+		return; /* nothing to do */
+
+	/*
+	 * Will try to match ORDER BY pathkeys in hope that one sort is cheaper than
+	 * two
+	 */
+	if (startNum == 0 && root->sort_pathkeys)
+	{
+		startNum = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+												  group_pathkeys,
+												  group_clauses);
+
+		nkeys = list_length(*group_pathkeys) - startNum;
+		if (nkeys < 2)
+			return; /* nothing to do */
+	}
+
+	keys = palloc(nkeys * sizeof(*keys));
+	pathkeyExprList = list_make1(NULL);
+
+	/*
+	 * for each pathkeys to be ordered we count a number of group to sort them
+	 * in descending order
+	 */
+	for_each_cell(cell, list_nth_cell(*group_pathkeys, startNum))
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+		Node			*pathkeyExpr;
+		SortGroupClause	*sgc;
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+		pathkeyExpr = get_sortgroupclause_expr(sgc, target_list);
+		linitial(pathkeyExprList) = pathkeyExpr;
+		keys[i].numGroups= estimate_num_groups(root, pathkeyExprList,
+											   nrows, NULL);
+		keys[i].key = pathkey;
+		keys[i].order = i;
+
+		i++;
+	}
+
+	list_free(pathkeyExprList);
+
+	qsort(keys, nkeys, sizeof(*keys), pathkey_numgroups_cmp);
+
+	/*
+	 * Construct result lists
+	 */
+	i = 0;
+	foreach(cell, *group_pathkeys)
+	{
+		PathKey	   *pathkey;
+		SortGroupClause *sgc;
+
+		if (i < startNum)
+			/* presorted head */
+			pathkey = (PathKey *) lfirst(cell);
+		else
+			/* pathkey, sorted above */
+			pathkey = keys[i - startNum].key;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+
+		i++;
+	}
+
+	pfree(keys);
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses, *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1e7809edf2..257aa5889c 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6208,11 +6208,20 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
 				if (!is_sorted)
+				{
+					if (!parse->groupingSets)
+						get_cheapest_group_keys_order(root,
+													  path->rows,
+													  extra->targetList,
+													  &group_pathkeys,
+													  &group_clauses,
+													  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6286,6 +6295,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
@@ -6560,11 +6575,19 @@ create_partial_grouping_paths(PlannerInfo *root,
 			{
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_path(partially_grouped_rel, (Path *)
@@ -6611,11 +6634,19 @@ create_partial_grouping_paths(PlannerInfo *root,
 
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 226b293622..c52a7c6133 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -193,6 +193,12 @@ extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern int group_keys_reorder_by_pathkeys(List *pathkeys,
 										  List **group_pathkeys,
 										  List **group_clauses);
+extern void get_cheapest_group_keys_order(PlannerInfo *root,
+										  double nrows,
+										  List *target_list,
+										  List **group_pathkeys,
+										  List **group_clauses,
+										  int	n_preordered);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index e302dfbdce..f954790ea1 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2075,15 +2075,94 @@ SELECT
 	INTO btg
 FROM
 	generate_series(1, 10000) i;
-CREATE INDEX ON btg(p, v);
 VACUUM btg;
 ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
 SET enable_hashagg=off;
 SET max_parallel_workers= 0;
 SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, c, d, v
+   ->  Sort
+         Sort Key: p, c, d, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
 SET enable_seqscan=off;
 SET enable_bitmapscan=off;
--- GROUP BY optimization by reorder columns by index scan
+VACUUM btg;
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY p, v;
                    QUERY PLAN                   
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index b983f9c506..3915a837f0 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1140,7 +1140,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1284,7 +1284,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 054a381dad..adecec8872 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -281,9 +281,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b
+   Group Key: b, a
    ->  Sort
-         Sort Key: a, b
+         Sort Key: b, a
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -292,9 +292,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c
+   Group Key: b, a, c
    ->  Sort
-         Sort Key: a, b, c
+         Sort Key: b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -303,9 +303,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c, d
+   Group Key: d, b, a, c
    ->  Sort
-         Sort Key: a, b, c, d
+         Sort Key: d, b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 7ef703f3a7..061e3360c0 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -919,18 +919,44 @@ SELECT
 	INTO btg
 FROM
 	generate_series(1, 10000) i;
-CREATE INDEX ON btg(p, v);
 
 VACUUM btg;
 ANALYZE btg;
 
+-- GROUP BY optimization by reorder columns by frequency
+
 SET enable_hashagg=off;
 SET max_parallel_workers= 0;
 SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
 SET enable_seqscan=off;
 SET enable_bitmapscan=off;
+VACUUM btg;
 
--- GROUP BY optimization by reorder columns by index scan
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY p, v;

0001-opt_group_by_index-v7.patchtext/x-patch; name=0001-opt_group_by_index-v7.patchDownload

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index b22b36ec0e..c073eb061a 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -686,7 +686,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ec66cb9c3c..4f93afdebc 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -26,6 +26,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -327,6 +328,58 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
+ * returns new lists,  original GROUP BY lists stay untouched.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List		*new_group_pathkeys= NIL,
+				*new_group_clauses = NIL;
+	ListCell	*key;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * For each pathkey it tries to find corresponding GROUP BY pathkey and
+	 * clause.
+	 */
+	foreach(key, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(key);
+		SortGroupClause	*sgc;
+
+		/*
+		 * Pathkey should use the same allocated struct, so, equiality of
+		 * pointers is enough
+		 */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	n = list_length(new_group_pathkeys);
+
+	/*
+	 * Just append the rest of pathkeys and clauses
+	 */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+												 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
@@ -1611,6 +1664,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return 0;					/* path ordering not useful */
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered, so we don't bother about actual order in
+ * pathkeys
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	if (root->group_pathkeys == NIL)
+		return 0;				/* no special ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1625,6 +1711,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1660,6 +1749,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */ 
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 67a2c7a581..1e7809edf2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6182,9 +6182,28 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			if (parse->groupingSets)
+			{
+				/*
+				 * prevent group pathkey rreordering, just check the same
+				 * order paths pathkeys and group pathkeys
+				 */
+				is_sorted = pathkeys_contained_in(group_pathkeys,
+												  path->pathkeys);
+			}
+			else
+			{
+				n_preordered_groups =
+						group_keys_reorder_by_pathkeys(path->pathkeys,
+													   &group_pathkeys,
+													   &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
+			}
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_path || is_sorted)
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
@@ -6192,7 +6211,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				/* Now decide what to stick atop it */
@@ -6213,9 +6232,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6230,7 +6249,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6251,19 +6270,26 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
+				List	   *group_pathkeys = root->group_pathkeys,
+						   *group_clauses = parse->groupClause;
+				int			n_preordered_groups;
+
+				n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																	 &group_pathkeys,
+																	 &group_clauses);
 
 				/*
 				 * Insert a Sort node, if required.  But there's no point in
 				 * sorting anything but the cheapest path.
 				 */
-				if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+				if (n_preordered_groups != list_length(group_pathkeys))
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 				}
 
@@ -6273,9 +6299,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6284,7 +6310,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 			}
@@ -6521,9 +6547,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_total_path || is_sorted)
 			{
 				/* Sort the cheapest partial path, if it isn't already */
@@ -6531,7 +6563,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -6540,9 +6572,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 											 partially_grouped_rel,
 											 path,
 											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 NIL,
 											 agg_partial_costs,
 											 dNumPartialGroups));
@@ -6551,7 +6583,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 							 create_group_path(root,
 											   partially_grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   NIL,
 											   dNumPartialGroups));
 			}
@@ -6565,17 +6597,24 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_partial_path || is_sorted)
 			{
+
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -6584,9 +6623,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 group_clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 group_clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6595,7 +6634,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   group_clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index cafde307ad..226b293622 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -190,6 +190,9 @@ typedef enum
 
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f85e913850..e302dfbdce 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2065,3 +2065,107 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+CREATE INDEX ON btg(p, v);
+VACUUM btg;
+ANALYZE btg;
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+-- GROUP BY optimization by reorder columns by index scan
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 506d0442d7..7ef703f3a7 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -907,3 +907,57 @@ EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1;
 SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
+
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+CREATE INDEX ON btg(p, v);
+
+VACUUM btg;
+ANALYZE btg;
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+
+-- GROUP BY optimization by reorder columns by index scan
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 7 years ago

In reply to: Teodor Sigaev (#4)

Re: POC: GROUP BY optimization

On 06/05/2018 07:56 PM, Teodor Sigaev wrote:

Thanks for the patch. This (missing) optimization popped-up repeatedly
recently, and I was planning to look into it for PG12. So now I don't
have to, because you've done all the hard work ;-)

You are welcome. Actually one of out customers faced the problem with
GROUP BY column order and exactly with reordering without any indexes,
you mean it as problem 2). Index optimization was noticed by me later.
But based on your suggested patch's order I split the patch to index
and non-index part and second part depends of first one. They touch the
same part of code and they could not be independent

The way I see it the patch does two different things:

a) consider additional indexes by relaxing the pathkeys check

b) if there are no indexes, i.e. when an explicit Sort is needed,
consider reordering the columns by ndistinct

Not sure why those two parts couldn't be separated. I haven't tried
splitting the patch, of course, so I may be missing something.

In the worst case, one part will depend on the other, which is OK. It
still allows us to commit the first part and continue working on the
other one, for example.

1) add_path() ensures that we only keep the one cheapest path sorted
path for each pathkeys. This patch somewhat defeats that because it
considers additional pathkeys (essentially any permutation of group
keys) as interesting. So we may end up with more paths in the list.

Seems, index scan here could give benefits here only if:
О©╫ 1) it's a index only scan
О©╫ 2) it's a index full (opposite to only) scan but physical order of
heap is
О©╫О©╫О©╫О©╫ close to logical index order (ie table is clustered)

In other cases costs of disk seeking will be very high. But on this
stage of planing we don't know that facts yet. So we couldn't make a
good decision here and should believe in add_path() logic.

Not sure what you mean? Surely we do costing of the paths at this stage,
so we can decide which one is cheaper etc. The decision which paths to
keep is done by add_path(), and it should stay like this, of course. I
wasn't suggesting to move the logic elsewhere.

I wonder if we should make the add_path() logic smarter to recognize

when two

paths have different pathkeys but were generated to match the same

grouping,

to reduce the number of paths added by this optimization. Currently

we do that > for each pathkeys list independently, but we're considering
many more pathkeys > orderings ...

Hm. I tend to say no.
select .. from t1 group by a, b
union
select .. from t2 group by a, b

t1 and t2 could have different set of indexes and different
distribution, so locally it could be cheaper to use one index (for
example, one defined as (b, a) and second as (a,b,c,d) - second is
larger) but totally - another (example: second table doesn't have (b,a)
index)

But add_path() treats each of the relations independently, why couldn't
we pick a different index for each of the two relations?

2) sort reordering based on ndistinct estimates

But thinking about this optimization, I'm worried it relies on a
couple of important assumptions. For now those decisions could have be
made by the person writing the SQL query, but this optimization makes
that impossible. So we really need to get this right.

Hm, sql by design should not be used that way, but, of course, it's used :(

Well, yes and no. I'm not worried about people relying on us to give
them some ordering - they can (and should) add an ORDER BY clause to fix
that. I'm more worried about the other stuff.

For example, it seems to disregard that different data types have
different comparison costs. For example comparing bytea will be far
more expensive compared to int4, so it may be much more efficient to
compare int4 columns first, even if there are far fewer distinct
values in them.

as I can see cost_sort() doesn't pay attention to this details. And it
should be a separated patch to improve that.

But sort also does not reorder columns.

Imagine you have a custom data type that is expensive for comparisons.
You know that, so you place it at the end of GROUP BY clauses, to reduce
the number of comparisons on that field. And then we come along and just
reorder the columns, placing it first, because it happens to have a high
ndistinct statistic. And there's no way to get the original behavior :-(

Also, simply sorting the columns by their ndistinct estimate is
somewhat naive, because it assumes the columns are independent.
Imagine for example a table with three columns:
So clearly, when evaluating GROUP BY a,b,c it'd be more efficient to
use "(a,c,b)" than "(a,b,c)" but there is no way to decide this merely
using per-column ndistinct values. Luckily, we now have ndistinct
multi-column coefficients which could be used to decide this I believe
(but I haven't tried).

Agree, but I don't know how to use it here. Except, may be:
1) first column - the column with bigger estimated number of groups
2) second column - the pair of (first, second) with bigger estimated
number of groups
3) third column - the triple of (first, second, third) with bigger ...

But seems even with that algorithm, ISTM, it could be implemented in
cheaper manner.

Maybe. I do have some ideas, although I haven't tried implementing it.

If there's no extended statistic on the columns, you can do the current
thing (assuming independence etc.). There's not much we can do here.

If there's an extended statistic, you can do either a greedy search (get
the next column with the highest ndistinct coefficient) or exhaustive
search (computing the estimated number of comparisons).

Another challenge is that using only the ndistinct coefficient assumes
uniform distribution of the values. But you can have a column with 1M
distinct values, where a single value represents 99% of the rows. And
another column with 100k distinct values, with actual uniform
distribution. I'm pretty sure it'd be more efficient to place the 100k
column first.

The real issue however is that this decision can't be made entirely
locally. Consider for example this:

О©╫О©╫О©╫О©╫ explain select a,b,c, count(*) from t group by a,b,c order by c,b,a;

Which is clearly cheaper (at least according to the optimizer) than
doing two separate sorts. So the optimization actually does the wrong
thing here, and it needs to somehow consider the other ordering
requirements (in this case ORDER BY) - either by generating multiple
paths with different orderings or some heuristics.

Hm, thank you. I consider it is a bug of my implementation - basic idea
was that we try to match already existing or needed order and only if we
fail or have unmatched tail of pathkey list than we will try to find
cheapest column order.
Fixed in v7 (0002-opt_group_by_index_and_order-v7.patch), but may be by
naive way: if we don't have a path pathkey first try to reorder columns
accordingly to order by clause. Test for your is also added.

OK. I'll give it a try.

I'm also wondering how freely we can change the group by result
ordering. Assume you disable hash aggregate and parallel query -
currently we are guaranteed to use group aggregate that produces
exactly the ordering as specified in GROUP BY, but this patch removes
that "guarantee" and we can produce arbitrary permutation of the
ordering. But as I argued in other threads, such implicit guarantees
are really fragile, can silently break for arbitrary reasons (say,
parallel query will do just that) and can be easily fixed by adding a
proper ORDER BY. So I don't think this is an issue.

Agree. SQL by design doesn't give a warranty of particular order without
explicit ORDER BY clause.

The other random thought is how will/could this interact with the
incremental sorts patch. I know Alexander wanted to significantly
limit where we actually consider the incremental sort, but I'm not
sure if this was one of those places or not (is sure looks like a
place where we would greatly benefit from it).

Seems, they should not directly interact. Patch tries to find cheapest
column order, Alexander's patch tries to make sort cheaper - they are a
different tasks. But I will try.

Thanks.

So to summarize this initial review - I do suggest splitting the patch
into two parts. One that does the index magic, and one for this
reordering optimization. The first part (additional indexes) seems
quite fairly safe, likely to get committable soon. The other part
(ndistinct reordering) IMHO requires more thought regarding costing
and interaction with other query parts.

Thank you for review!

;-)

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Teodor Sigaev

teodor@sigaev.ru

over 7 years ago

In reply to: Tomas Vondra (#5)

2 attachment(s)

Re: POC: GROUP BY optimization

problem 2). Index optimization was noticed by me later. But based on your
suggested patch's order I split the patch to index and non-index part and
second part depends of first one. They touch the same part of code and they
could not be independent

The way I see it the patch does two different things:

Yes

a) consider additional indexes by relaxing the pathkeys check

Yes, but not only. Patch could reorder GROUP BY clause to match existing
pathkeys which could come from index scan (suggested by patch or not) or some
another way to get ordered output.

b) if there are no indexes, i.e. when an explicit Sort is needed, consider
reordering the columns by ndistinct

Yes. But again, this description is a bit short. First it works after first
patch and might get some preordered leading pathkeys. Second, it tries to match
ORDER BY clause order if there is no preordered leading pathkeys from first
patch (it was introduced in v7). And third, if there is a tail of unmatched
pathkeys on previous stages then it will reorder that tail.

In the worst case, one part will depend on the other, which is OK. It still
allows us to commit the first part and continue working on the other one, for
example.

Exactly it's our case. Of course, it's possible to split first patch for two
again: just suggestion of index (1.1) and reordering by existing pathkeys (1.2).
Then 1.1 will be independent but 2 still should be applied after 1.2. But patch
1.1 is rather useless.

can decide which one is cheaper etc. The decision which paths to keep is done by
add_path(), and it should stay like this, of course. I wasn't suggesting to move
the logic elsewhere.

Cool, I haven't intention to modify it too.

О©╫> I wonder if we should make the add_path() logic smarter to recognize when two
О©╫> paths have different pathkeys but were generated to match the same grouping,
О©╫> to reduce the number of paths added by this optimization. Currently we do
that > for each pathkeys list independently, but we're considering many more
pathkeys > orderings ...

Hm. I tend to say no.
select .. from t1 group by a, b
union
select .. from t2 group by a, b

t1 and t2 could have different set of indexes and different distribution, so
locally it could be cheaper to use one index (for example, one defined as (b,
a) and second as (a,b,c,d) - second is larger) but totally - another (example:
second table doesn't have (b,a) index)

But add_path() treats each of the relations independently, why couldn't we pick
a different index for each of the two relations?

Having of sorted output of both subselect could be cheaper that sorting one of
them even if index scan was cheaper. But it seems to me that is not deal of
suggested here optimization.

For example, it seems to disregard that different data types have different
comparison costs. For example comparing bytea will be far more expensive
compared to int4, so it may be much more efficient to compare int4 columns
first, even if there are far fewer distinct values in them.

as I can see cost_sort() doesn't pay attention to this details. And it should
be a separated patch to improve that.

But sort also does not reorder columns.

Yes. But estimation of cost of comparing function/number of unique values in
column could be not very accurate and so planner could make a wrong choice. I
saw 2 times difference in real-world application. Again, improving sort cost
estimation is a separate task.

Imagine you have a custom data type that is expensive for comparisons. You know
that, so you place it at the end of GROUP BY clauses, to reduce the number of
comparisons on that field. And then we come along and just reorder the columns,
placing it first, because it happens to have a high ndistinct statistic. And
there's no way to get the original behavior :-(

Hm. that it could be, but I don't know how to improve here. Current cost_sort()
will return the same cost for any columns order.

Okay, here we know an estimation of nrow, we could somehow find a comparing
function oid and find a its procost field. And then? we also need to know width
of tuple here and mostly repeat the cost_sort.

Another option is an introducing enable_groupby_reorder GUC variable although
personally I don't like an idea to add new GUC variable.

Agree, but I don't know how to use it here. Except, may be:
1) first column - the column with bigger estimated number of groups
2) second column - the pair of (first, second) with bigger estimated number of
groups
3) third column - the triple of (first, second, third) with bigger ...

But seems even with that algorithm, ISTM, it could be implemented in cheaper
manner.

Maybe. I do have some ideas, although I haven't tried implementing it.

Implemented, pls, have a look.

If there's no extended statistic on the columns, you can do the current thing
(assuming independence etc.). There's not much we can do here.

Agree

If there's an extended statistic, you can do either a greedy search (get the
next column with the highest ndistinct coefficient) or exhaustive search
(computing the estimated number of comparisons).

Another challenge is that using only the ndistinct coefficient assumes uniform
distribution of the values. But you can have a column with 1M distinct values,
where a single value represents 99% of the rows. And another column with 100k
distinct values, with actual uniform distribution. I'm pretty sure it'd be more
efficient to place the 100k column first.

Interesting. Will think, thank you

--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/

Attachments:

0001-opt_group_by_index-v8.patchtext/x-patch; name=0001-opt_group_by_index-v8.patchDownload

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index b22b36ec0e..c073eb061a 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -686,7 +686,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ec66cb9c3c..4f93afdebc 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -26,6 +26,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -327,6 +328,58 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
+ * returns new lists,  original GROUP BY lists stay untouched.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List		*new_group_pathkeys= NIL,
+				*new_group_clauses = NIL;
+	ListCell	*key;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * For each pathkey it tries to find corresponding GROUP BY pathkey and
+	 * clause.
+	 */
+	foreach(key, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(key);
+		SortGroupClause	*sgc;
+
+		/*
+		 * Pathkey should use the same allocated struct, so, equiality of
+		 * pointers is enough
+		 */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	n = list_length(new_group_pathkeys);
+
+	/*
+	 * Just append the rest of pathkeys and clauses
+	 */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+												 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
@@ -1611,6 +1664,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return 0;					/* path ordering not useful */
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered, so we don't bother about actual order in
+ * pathkeys
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	if (root->group_pathkeys == NIL)
+		return 0;				/* no special ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1625,6 +1711,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1660,6 +1749,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */ 
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 67a2c7a581..1e7809edf2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6182,9 +6182,28 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			if (parse->groupingSets)
+			{
+				/*
+				 * prevent group pathkey rreordering, just check the same
+				 * order paths pathkeys and group pathkeys
+				 */
+				is_sorted = pathkeys_contained_in(group_pathkeys,
+												  path->pathkeys);
+			}
+			else
+			{
+				n_preordered_groups =
+						group_keys_reorder_by_pathkeys(path->pathkeys,
+													   &group_pathkeys,
+													   &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
+			}
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_path || is_sorted)
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
@@ -6192,7 +6211,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				/* Now decide what to stick atop it */
@@ -6213,9 +6232,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6230,7 +6249,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6251,19 +6270,26 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
+				List	   *group_pathkeys = root->group_pathkeys,
+						   *group_clauses = parse->groupClause;
+				int			n_preordered_groups;
+
+				n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																	 &group_pathkeys,
+																	 &group_clauses);
 
 				/*
 				 * Insert a Sort node, if required.  But there's no point in
 				 * sorting anything but the cheapest path.
 				 */
-				if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+				if (n_preordered_groups != list_length(group_pathkeys))
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 				}
 
@@ -6273,9 +6299,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6284,7 +6310,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 			}
@@ -6521,9 +6547,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_total_path || is_sorted)
 			{
 				/* Sort the cheapest partial path, if it isn't already */
@@ -6531,7 +6563,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -6540,9 +6572,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 											 partially_grouped_rel,
 											 path,
 											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 NIL,
 											 agg_partial_costs,
 											 dNumPartialGroups));
@@ -6551,7 +6583,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 							 create_group_path(root,
 											   partially_grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   NIL,
 											   dNumPartialGroups));
 			}
@@ -6565,17 +6597,24 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_partial_path || is_sorted)
 			{
+
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -6584,9 +6623,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 group_clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 group_clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6595,7 +6634,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   group_clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index cafde307ad..226b293622 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -190,6 +190,9 @@ typedef enum
 
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f85e913850..e302dfbdce 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2065,3 +2065,107 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+CREATE INDEX ON btg(p, v);
+VACUUM btg;
+ANALYZE btg;
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+-- GROUP BY optimization by reorder columns by index scan
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 506d0442d7..7ef703f3a7 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -907,3 +907,57 @@ EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1;
 SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
+
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+CREATE INDEX ON btg(p, v);
+
+VACUUM btg;
+ANALYZE btg;
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+
+-- GROUP BY optimization by reorder columns by index scan
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+

0002-opt_group_by_index_and_order-v8.patchtext/x-patch; name=0002-opt_group_by_index_and_order-v8.patchDownload

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 8897157817..39f7409d75 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -380,46 +380,30 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	return n;
 }
 
-/*
- * Work struct from sorting pathkeys
- */
-typedef struct PathKeyNumGroup
-{
-	PathKey	   *key;
-	double		numGroups;
-	int			order; /* just to make qsort stable */
-} PathKeyNumGroup;
-
-static int
-pathkey_numgroups_cmp(const void *a, const void *b)
-{
-	const PathKeyNumGroup *pka = (const PathKeyNumGroup*)a,
-						  *pkb = (const PathKeyNumGroup*)b;
-
-	if (pka->numGroups == pkb->numGroups)
-		/* make qsort stable */
-		return (pka->order > pkb->order) ? 1 : -1;
-	return (pka->numGroups > pkb->numGroups) ? -1 : 1;
-}
-
 /*
  * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
  * speedup sorting. Returns newly allocated lists, old ones stay untouched.
- * startNum defines a head of list which order should be prevented.
+ * n_preordered defines a head of list which order should be prevented.
  */
 void
 get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
 							  List *target_list,
 							  List **group_pathkeys, List **group_clauses,
-							  int startNum)
+							  int n_preordered)
 {
-	PathKeyNumGroup	   *keys;
-	int					nkeys = list_length(*group_pathkeys) - startNum;
-	List			   *pathkeyExprList,
-					   *new_group_pathkeys = NIL,
-					   *new_group_clauses = NIL;
-	ListCell		   *cell;
-	int					i = 0;
+	struct
+	{
+		PathKey			*pathkey;
+		SortGroupClause	*sgc;
+		Node			*pathkeyExpr;
+	}
+				   *keys, tmp;
+	int				nkeys = list_length(*group_pathkeys) - n_preordered;
+	List		   *pathkeyExprList = NIL,
+				   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL;
+	ListCell	   *cell;
+	int				i = 0, n_keys_to_est;
 
 	if (nkeys < 2)
 		return; /* nothing to do */
@@ -428,48 +412,82 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
 	 * Will try to match ORDER BY pathkeys in hope that one sort is cheaper than
 	 * two
 	 */
-	if (startNum == 0 && root->sort_pathkeys)
+	if (n_preordered == 0 && root->sort_pathkeys)
 	{
-		startNum = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
-												  group_pathkeys,
-												  group_clauses);
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  group_pathkeys,
+													  group_clauses);
 
-		nkeys = list_length(*group_pathkeys) - startNum;
+		nkeys = list_length(*group_pathkeys) - n_preordered;
 		if (nkeys < 2)
 			return; /* nothing to do */
 	}
 
 	keys = palloc(nkeys * sizeof(*keys));
-	pathkeyExprList = list_make1(NULL);
 
 	/*
-	 * for each pathkeys to be ordered we count a number of group to sort them
-	 * in descending order
+	 * Collect information about pathkey for subsequent usage
 	 */
-	for_each_cell(cell, list_nth_cell(*group_pathkeys, startNum))
+	for_each_cell(cell, list_nth_cell(*group_pathkeys, n_preordered))
 	{
 		PathKey			*pathkey = (PathKey *) lfirst(cell);
-		Node			*pathkeyExpr;
-		SortGroupClause	*sgc;
-
-		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
-									  *group_clauses);
-		pathkeyExpr = get_sortgroupclause_expr(sgc, target_list);
-		linitial(pathkeyExprList) = pathkeyExpr;
-		keys[i].numGroups= estimate_num_groups(root, pathkeyExprList,
-											   nrows, NULL);
-		keys[i].key = pathkey;
-		keys[i].order = i;
 
+		keys[i].pathkey = pathkey;
+		keys[i].sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+											  *group_clauses);
+		keys[i].pathkeyExpr = get_sortgroupclause_expr(keys[i].sgc,
+													   target_list);
 		i++;
 	}
 
-	list_free(pathkeyExprList);
+	/*
+	 * Find the cheapest to sort order of columns. We will find a first column
+	 * with bigger number of group, then pair (first column in pair is  already
+	 * defined in first step), them triple and so on.
+	 */
+	for(n_keys_to_est = 1; n_keys_to_est <= nkeys - 1; n_keys_to_est++)
+	{
+		ListCell   *tail_cell;
+		int			best_i = 0;
+		double		best_est_num_groups = -1;
 
-	qsort(keys, nkeys, sizeof(*keys), pathkey_numgroups_cmp);
+		/* expand list of columns and remeber last cell */
+		pathkeyExprList = lappend(pathkeyExprList, NULL);
+		tail_cell = list_tail(pathkeyExprList);
+
+		/*
+		 * Find the best last column - the best means bigger number of groups,
+		 * previous columns are already choosen
+		 */
+		for(i = n_keys_to_est - 1; i < nkeys; i++)
+		{
+			double  est_num_groups;
+
+			lfirst(tail_cell) = keys[i].pathkeyExpr;
+			est_num_groups = estimate_num_groups(root, pathkeyExprList,
+												 nrows, NULL);
+
+			if (est_num_groups > best_est_num_groups)
+			{
+				best_est_num_groups = est_num_groups;
+				best_i = i;
+			}
+		}
+
+		/* Save the best choice */
+		lfirst(tail_cell) = keys[best_i].pathkeyExpr;
+		if (best_i != n_keys_to_est - 1)
+		{
+			tmp = keys[n_keys_to_est - 1];
+			keys[n_keys_to_est - 1] = keys[best_i];
+			keys[best_i] = tmp;
+		}
+	}
+	list_free(pathkeyExprList);
 
 	/*
-	 * Construct result lists
+	 * Construct result lists, keys array is already ordered to get a cheapest
+	 * sort
 	 */
 	i = 0;
 	foreach(cell, *group_pathkeys)
@@ -477,17 +495,19 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
 		PathKey	   *pathkey;
 		SortGroupClause *sgc;
 
-		if (i < startNum)
-			/* presorted head */
+		if (i < n_preordered)
+		{
 			pathkey = (PathKey *) lfirst(cell);
+			sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+										  *group_clauses);
+		}
 		else
-			/* pathkey, sorted above */
-			pathkey = keys[i - startNum].key;
+		{
+			pathkey = keys[i - n_preordered].pathkey;
+			sgc = keys[i - n_preordered].sgc;
+		}
 
 		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
-
-		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
-									  *group_clauses);
 		new_group_clauses = lappend(new_group_clauses, sgc);
 
 		i++;
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f954790ea1..31dcf70e47 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2071,7 +2071,8 @@ SELECT
 	i/2 AS p,
 	format('%60s', i%2) AS v,
 	i/4 AS c,
-	i/8 AS d
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
 	INTO btg
 FROM
 	generate_series(1, 10000) i;
@@ -2108,9 +2109,9 @@ SELECT count(*) FROM btg GROUP BY v, p, c;
          QUERY PLAN          
 -----------------------------
  GroupAggregate
-   Group Key: p, c, v
+   Group Key: p, v, c
    ->  Sort
-         Sort Key: p, c, v
+         Sort Key: p, v, c
          ->  Seq Scan on btg
 (5 rows)
 
@@ -2130,9 +2131,9 @@ SELECT count(*) FROM btg GROUP BY v, p, d, c;
           QUERY PLAN          
 ------------------------------
  GroupAggregate
-   Group Key: p, c, d, v
+   Group Key: p, v, d, c
    ->  Sort
-         Sort Key: p, c, d, v
+         Sort Key: p, v, d, c
          ->  Seq Scan on btg
 (5 rows)
 
@@ -2158,6 +2159,52 @@ SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
          ->  Seq Scan on btg
 (5 rows)
 
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
 -- GROUP BY optimization by reorder columns by index scan
 CREATE INDEX ON btg(p, v);
 SET enable_seqscan=off;
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index adecec8872..a686a75fb0 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -244,9 +244,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c, d
+   Group Key: a, d, c, b
    ->  Sort
-         Sort Key: a, b, c, d
+         Sort Key: a, d, c, b
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -255,9 +255,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: b, c, d
+   Group Key: b, d, c
    ->  Sort
-         Sort Key: b, c, d
+         Sort Key: b, d, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 061e3360c0..e4415c8d84 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -915,7 +915,8 @@ SELECT
 	i/2 AS p,
 	format('%60s', i%2) AS v,
 	i/4 AS c,
-	i/8 AS d
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
 	INTO btg
 FROM
 	generate_series(1, 10000) i;
@@ -950,6 +951,22 @@ SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
 
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
 -- GROUP BY optimization by reorder columns by index scan
 
 CREATE INDEX ON btg(p, v);

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 7 years ago

In reply to: Teodor Sigaev (#6)

Re: POC: GROUP BY optimization

On 06/06/2018 08:04 PM, Teodor Sigaev wrote:

problem 2). Index optimization was noticed by me later. But based on
your suggested patch's order I split the patch to index and non-index
part and second part depends of first one. They touch the same part
of code and they could not be independent

The way I see it the patch does two different things:

Yes

a) consider additional indexes by relaxing the pathkeys check

Yes, but not only. Patch could reorder GROUP BY clause to match existing
pathkeys which could come from index scan (suggested by patch or not) or
some another way to get ordered output.

b) if there are no indexes, i.e. when an explicit Sort is needed,
consider reordering the columns by ndistinct

Yes. But again, this description is a bit short. First it works after
first patch and might get some preordered leading pathkeys. Second, it
tries to match ORDER BY clause order if there is no preordered leading
pathkeys from first patch (it was introduced in v7). And third, if there
is a tail of unmatched pathkeys on previous stages then it will reorder
that tail.

OK, I haven't looked at v7 yet, but if I understand correctly it tries
to maintain the ordering as much as possible? Does that actually help? I
mean, the incremental sort patch allows the sorting to happen by pieces,
but here we still need to sort all the data, right?

Can you give an example demonstrating the benefit?

In the worst case, one part will depend on the other, which is OK. It
still allows us to commit the first part and continue working on the
other one, for example.

Exactly it's our case. Of course, it's possible to split first patch for
two again: just suggestion of index (1.1) and reordering by existing
pathkeys (1.2). Then 1.1 will be independent but 2 still should be
applied after 1.2. But patch 1.1 is rather useless.

can decide which one is cheaper etc. The decision which paths to keep
is done by add_path(), and it should stay like this, of course. I
wasn't suggesting to move the logic elsewhere.

Cool, I haven't intention to modify it too.

О©╫> I wonder if we should make the add_path() logic smarter to
recognize when two
О©╫> paths have different pathkeys but were generated to match the same
grouping,
О©╫> to reduce the number of paths added by this optimization.
Currently we do that > for each pathkeys list independently, but
we're considering many more pathkeys > orderings ...

Hm. I tend to say no.
select .. from t1 group by a, b
union
select .. from t2 group by a, b

t1 and t2 could have different set of indexes and different
distribution, so locally it could be cheaper to use one index (for
example, one defined as (b, a) and second as (a,b,c,d) - second is
larger) but totally - another (example: second table doesn't have
(b,a) index)

But add_path() treats each of the relations independently, why
couldn't we pick a different index for each of the two relations?

Having of sorted output of both subselect could be cheaper that sorting
one of them even if index scan was cheaper. But it seems to me that is
not deal of suggested here optimization.

OK, I think I understand what you're trying to say. We may have a query
like this:

SELECT a, SUM(x) FROM
(
SELECT a, b, COUNT(c) AS x FROM t1 GROUP BY a, b
UNION ALL
SELECT a, b, COUNT(c) AS x FROM t2 GROUP BY a, b
) foo GROUP BY a;

and indexes on (a,b) and (b,a) for both relations. The "deduplication"
by pathkeys I suggested would mean we might keep only index (a,b) on t1
and (b,a) on t2, which means the grouping by "a" can't leverage index
scans on both relations. But if we keep paths for both indexes on each
relation, we can.

That's a good point, yes.

For example, it seems to disregard that different data types have
different comparison costs. For example comparing bytea will be far
more expensive compared to int4, so it may be much more efficient to
compare int4 columns first, even if there are far fewer distinct
values in them.

as I can see cost_sort() doesn't pay attention to this details. And
it should be a separated patch to improve that.

But sort also does not reorder columns.

Yes. But estimation of cost of comparing function/number of unique
values in column could be not very accurate and so planner could make
a wrong choice.

Isn't "estimation of cost of comparing function/number of unique values
in column could be not very accurate and so planner could make a wrong
choice" is more an argument against relying on it when doing these
optimizations?

FWIW it's one of the arguments Tom made in the incremental sort patch,
which relies on it too when computing cost of the incremental sort. I'm
sure it's going to be an obstacle there too.

I saw 2 times difference in real-world application. Again, improving
sort cost estimation is a separate task.

Sure. But we also need to ask the other question, i.e. how many people
would be negatively affected by the optimization. And I admit I don't
know the answer to that, the next example is entirely made up.

Imagine you have a custom data type that is expensive for comparisons.
You know that, so you place it at the end of GROUP BY clauses, to
reduce the number of comparisons on that field. And then we come along
and just reorder the columns, placing it first, because it happens to
have a high ndistinct statistic. And there's no way to get the
original behavior :-(

Hm. that it could be, but I don't know how to improve here.О©╫ Current
cost_sort() will return the same cost for any columns order.

Okay, here we know an estimation of nrow, we could somehow find a
comparing function oid and find a its procost field. And then? we also
need to know width of tuple here and mostly repeat the cost_sort.

Another option is an introducing enable_groupby_reorder GUC variable
although personally I don't like an idea to add new GUC variable.

I don't like the idea of yet another GUC either, as it's rather blunt
instrument. Which is why I'm suggesting to split the patch into two
parts. Then we can apply the index stuff (which seems relatively
straightforward) and keep working on this second part.

FWIW I think it would be useful to have "development GUC" that would
allow us to enable/disable these options during development, because
that makes experiments much easier. But then remove them before commit.

Agree, but I don't know how to use it here. Except, may be:
1) first column - the column with bigger estimated number of groups
2) second column - the pair of (first, second) with bigger estimated
number of groups
3) third column - the triple of (first, second, third) with bigger ...

But seems even with that algorithm, ISTM, it could be implemented in
cheaper manner.

Maybe. I do have some ideas, although I haven't tried implementing it.

Implemented, pls, have a look.

If there's no extended statistic on the columns, you can do the
current thing (assuming independence etc.). There's not much we can do
here.

Agree

If there's an extended statistic, you can do either a greedy search
(get the next column with the highest ndistinct coefficient) or
exhaustive search (computing the estimated number of comparisons).

Another challenge is that using only the ndistinct coefficient assumes
uniform distribution of the values. But you can have a column with 1M
distinct values, where a single value represents 99% of the rows. And
another column with 100k distinct values, with actual uniform
distribution. I'm pretty sure it'd be more efficient to place the 100k
column first.

Interesting.О©╫ Will think, thank you

You're welcome.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Claudio Freire

klaussfreire@gmail.com

over 7 years ago

In reply to: Tomas Vondra (#7)

Re: POC: GROUP BY optimization

On Wed, Jun 6, 2018 at 5:43 PM Tomas Vondra
<tomas.vondra@2ndquadrant.com> wrote:

For example, it seems to disregard that different data types have
different comparison costs. For example comparing bytea will be far
more expensive compared to int4, so it may be much more efficient to
compare int4 columns first, even if there are far fewer distinct
values in them.

as I can see cost_sort() doesn't pay attention to this details. And
it should be a separated patch to improve that.

But sort also does not reorder columns.

Yes. But estimation of cost of comparing function/number of unique
values in column could be not very accurate and so planner could make
a wrong choice.

...

Imagine you have a custom data type that is expensive for comparisons.
You know that, so you place it at the end of GROUP BY clauses, to
reduce the number of comparisons on that field. And then we come along
and just reorder the columns, placing it first, because it happens to
have a high ndistinct statistic. And there's no way to get the
original behavior :-(

Hm. that it could be, but I don't know how to improve here. Current
cost_sort() will return the same cost for any columns order.

Okay, here we know an estimation of nrow, we could somehow find a
comparing function oid and find a its procost field. And then? we also
need to know width of tuple here and mostly repeat the cost_sort.

Another option is an introducing enable_groupby_reorder GUC variable
although personally I don't like an idea to add new GUC variable.

I don't like the idea of yet another GUC either, as it's rather blunt
instrument. Which is why I'm suggesting to split the patch into two
parts. Then we can apply the index stuff (which seems relatively
straightforward) and keep working on this second part.

FWIW I think it would be useful to have "development GUC" that would
allow us to enable/disable these options during development, because
that makes experiments much easier. But then remove them before commit.

Correct me if I'm wrong, but doesn't this patch concern itself with
precisely sort performance?

As such, estimating sort performance correctly in the various plan
variants being considered seems to be a very central aspect of it.

This means it's pretty much time to consider the effect of operator
cost *and* distinct values in the cost computation.

Assuming cost_sort does its thing, it's just a matter of building the
desired variants and picking the one with the smallest cost_sort. One
can use heuristics to build a subset of all possible column orderings
with a guiding principle that tries to maximize the likelihook of
finding a better order than the one in the query (like sorting by
ndistinct), but I'd suggest:

- Always considering the sort order verbatim as given in the SQL (ie:
what the user requests)
- Relying on cost_sort to distinguish among them, and pick a winner, and
- When in doubt (if cost_sort doesn't produce a clear winner), use the
order given by the user

Comparison cost can be approximated probabilistically as:

cost_comp = sum(cost_op_n * (1.0 / ndistinct(col_1_to_n)))

Where cost_op_n is the cost of the comparison function for column N,
and ndistinct(col_1_to_n) is an estimation of the number of distinct
values for columns prior to N in the sort order.

You can approximate ndistinct as the product of ndistinct of previous
columns, or use extended statistics when available.

I think that should give a good enough approximation of
ndistinct-enriched sort costs that considers operator cost
appropriately. That operator cost is basically an average and can be
used as a constant, so it's just a matter of passing the right
comparison_cost to cost_sort.

Even if ndistinct estimates are off, the fact that operator cost is
involved should be a good enough tool for the user should the planner
pick the wrong path - it's only a matter of boosting operator cost to
let the planner know that sorting that way is expensive.

Priorization of the user-provided order can be as simple as giving
that comparison_cost a small handicap.

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 7 years ago

In reply to: Claudio Freire (#8)

Re: POC: GROUP BY optimization

On 06/06/2018 11:22 PM, Claudio Freire wrote:

On Wed, Jun 6, 2018 at 5:43 PM Tomas Vondra
<tomas.vondra@2ndquadrant.com> wrote:

For example, it seems to disregard that different data types have
different comparison costs. For example comparing bytea will be far
more expensive compared to int4, so it may be much more efficient to
compare int4 columns first, even if there are far fewer distinct
values in them.

as I can see cost_sort() doesn't pay attention to this details. And
it should be a separated patch to improve that.

But sort also does not reorder columns.

Yes. But estimation of cost of comparing function/number of unique
values in column could be not very accurate and so planner could make
a wrong choice.

...

Imagine you have a custom data type that is expensive for comparisons.
You know that, so you place it at the end of GROUP BY clauses, to
reduce the number of comparisons on that field. And then we come along
and just reorder the columns, placing it first, because it happens to
have a high ndistinct statistic. And there's no way to get the
original behavior :-(

Hm. that it could be, but I don't know how to improve here. Current
cost_sort() will return the same cost for any columns order.

Okay, here we know an estimation of nrow, we could somehow find a
comparing function oid and find a its procost field. And then? we also
need to know width of tuple here and mostly repeat the cost_sort.

Another option is an introducing enable_groupby_reorder GUC variable
although personally I don't like an idea to add new GUC variable.

I don't like the idea of yet another GUC either, as it's rather blunt
instrument. Which is why I'm suggesting to split the patch into two
parts. Then we can apply the index stuff (which seems relatively
straightforward) and keep working on this second part.

FWIW I think it would be useful to have "development GUC" that would
allow us to enable/disable these options during development, because
that makes experiments much easier. But then remove them before commit.

Correct me if I'm wrong, but doesn't this patch concern itself with
precisely sort performance?

Yes, the second part (reordering pathkeys by ndistinct) does.

As such, estimating sort performance correctly in the various plan
variants being considered seems to be a very central aspect of it.

This means it's pretty much time to consider the effect of operator
cost *and* distinct values in the cost computation.

Yes, until now that was not really needed because the optimizer does not
consider reordering of the pathkeys - it simply grabs either GROUP BY or
ORDER BY pathkeys and runs with that.

So the costing was fairly trivial, we simply do something like

comparison_cost = 2.0 * cpu_operator_cost;

sort_cost = comparison_cost * tuples * LOG2(tuples);

which essentially ignores that there might be multiple columns, or that
the columns may have sort operator with different costs.

The question is how reliable the heuristics can be. The current patch
uses just plain ndistinct, but that seems rather unreliable but I don't
have a clear idea how to improve that - we may have MCV for the columns
and perhaps some extended statistics, but I'm not sure how far we can
run with that.

Essentially what we need to estimate the number of comparisons for each
column, to compute better comparison_cost.

Assuming cost_sort does its thing, it's just a matter of building the
desired variants and picking the one with the smallest cost_sort. One
can use heuristics to build a subset of all possible column orderings
with a guiding principle that tries to maximize the likelihook of
finding a better order than the one in the query (like sorting by
ndistinct), but I'd suggest:

- Always considering the sort order verbatim as given in the SQL (ie:
what the user requests)
- Relying on cost_sort to distinguish among them, and pick a winner, and
- When in doubt (if cost_sort doesn't produce a clear winner), use the
order given by the user

I don't see why not to generate all possible orderings (keeping only the
cheapest path for each pathkey variant) and let the optimizer to sort it
out. If the user specified an explicit ORDER BY, we'll slap an extra
Sort by at the end, increasing the cost.

Comparison cost can be approximated probabilistically as:

cost_comp = sum(cost_op_n * (1.0 / ndistinct(col_1_to_n)))

Where cost_op_n is the cost of the comparison function for column N,
and ndistinct(col_1_to_n) is an estimation of the number of distinct
values for columns prior to N in the sort order.

You can approximate ndistinct as the product of ndistinct of previous
columns, or use extended statistics when available.

Sure. The trouble is this also assumes uniform distribution, which is
not always the case.

I think that should give a good enough approximation of
ndistinct-enriched sort costs that considers operator cost
appropriately. That operator cost is basically an average and can be
used as a constant, so it's just a matter of passing the right
comparison_cost to cost_sort.

Even if ndistinct estimates are off, the fact that operator cost is
involved should be a good enough tool for the user should the
planner pick the wrong path - it's only a matter of boosting operator
cost to let the planner know that sorting that way is expensive.

There are far to many "should" in these two paragraphs.

Priorization of the user-provided order can be as simple as giving
that comparison_cost a small handicap.

I see no point in doing that, and I don't recall a single place in the
planner where we do that. If the user specified ORDER BY, we'll slap an
explicit Sort on top when needed (which acts as the handicap, but in a
clear way). Otherwise we don't do such things - it'd be just plain
confusing (consider "ORDER BY a,b" vs. "ORDER BY b,c" with same data
types, ndistinct etc. but unexpectedly different costs). Also, what
would be a good value for the handicap?

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#10

Claudio Freire

klaussfreire@gmail.com

over 7 years ago

In reply to: Tomas Vondra (#9)

Re: POC: GROUP BY optimization

On Wed, Jun 6, 2018 at 6:57 PM Tomas Vondra
<tomas.vondra@2ndquadrant.com> wrote:

On 06/06/2018 11:22 PM, Claudio Freire wrote:

On Wed, Jun 6, 2018 at 5:43 PM Tomas Vondra
As such, estimating sort performance correctly in the various plan
variants being considered seems to be a very central aspect of it.

This means it's pretty much time to consider the effect of operator
cost *and* distinct values in the cost computation.

Yes, until now that was not really needed because the optimizer does not
consider reordering of the pathkeys - it simply grabs either GROUP BY or
ORDER BY pathkeys and runs with that.

So the costing was fairly trivial, we simply do something like

comparison_cost = 2.0 * cpu_operator_cost;

sort_cost = comparison_cost * tuples * LOG2(tuples);

which essentially ignores that there might be multiple columns, or that
the columns may have sort operator with different costs.

The question is how reliable the heuristics can be. The current patch
uses just plain ndistinct, but that seems rather unreliable but I don't
have a clear idea how to improve that - we may have MCV for the columns
and perhaps some extended statistics, but I'm not sure how far we can
run with that.

Essentially what we need to estimate the number of comparisons for each
column, to compute better comparison_cost.

This could be approached by adjusting statistics by any relevant
restrictions applicable to the columns being grouped, as is done for
selectivity estimations.

I'm not sure how far that would get us, though. It would be rather
easy to lose sight of those restrictions if there are complex
operations involved.

Assuming cost_sort does its thing, it's just a matter of building the
desired variants and picking the one with the smallest cost_sort. One
can use heuristics to build a subset of all possible column orderings
with a guiding principle that tries to maximize the likelihook of
finding a better order than the one in the query (like sorting by
ndistinct), but I'd suggest:

- Always considering the sort order verbatim as given in the SQL (ie:
what the user requests)
- Relying on cost_sort to distinguish among them, and pick a winner, and
- When in doubt (if cost_sort doesn't produce a clear winner), use the
order given by the user

I don't see why not to generate all possible orderings (keeping only the
cheapest path for each pathkey variant) and let the optimizer to sort it
out.

I'm assuming costing the full N! possible orderings would be
prohibitively expensive.

If the user specified an explicit ORDER BY, we'll slap an extra
Sort by at the end, increasing the cost.

I think you misunderstood me. I'm saying the exact opposite.

When I mention handicap, I mean to give the explicitly requested group
by order a *reduced* cost, to give it an advantage over the
heuristics.

This is to try to attack the problem you mentioned where users already
accounting for operator costs in their queries would get overridden by
the planner, perhaps in detriment of overall execution time.

In essence:

- If the user requested that order, we assume it "somewhat
subjectively better" (by giving it a slightly reduced cost).
- If there is an explicit order by clause that differs from a
considered path, the required sort node will already penalize it
appropriately, nothing needs to be done in relation to sort costs.
- All other things being equal, cost_sort will make the decision. If a
plan beats the user-provided order in spite of the handicap, it will
be used. So it's still optimizing clear cases.

Comparison cost can be approximated probabilistically as:

cost_comp = sum(cost_op_n * (1.0 / ndistinct(col_1_to_n)))

Where cost_op_n is the cost of the comparison function for column N,
and ndistinct(col_1_to_n) is an estimation of the number of distinct
values for columns prior to N in the sort order.

You can approximate ndistinct as the product of ndistinct of previous
columns, or use extended statistics when available.

Sure. The trouble is this also assumes uniform distribution, which is
not always the case.

Well, (1.0 / ndistinct) = p(left == right).

If you can compute a better p(left == right) with an MCV, you can get
a better estimate. If you have an MCV. But that'd be a bit expensive,
and I'm not sure it's all that relevant.

To what degree does improving this produce better plans? As long as
average expected cost is relatively well estimated (as in, one sort
order relative to another sort order), it won't affect the decision.

But if needed, the MCV can be used for this.

So, in essence, you need to account for:

- restrictions on that column that constrain the domain
- distribution skew (MCV, nulls)
- ndistinct

To compute p(left == right).

Maybe something as simple as the following?

p_special = sum(mcv_i * mcv_i) + null_frac * null_frac
p_other = (1 - p_special) * (1 - p_special) / ndistinct(restr)

I think that should give a good enough approximation of
ndistinct-enriched sort costs that considers operator cost
appropriately. That operator cost is basically an average and can be
used as a constant, so it's just a matter of passing the right
comparison_cost to cost_sort.

Even if ndistinct estimates are off, the fact that operator cost is
involved should be a good enough tool for the user should the
planner pick the wrong path - it's only a matter of boosting operator
cost to let the planner know that sorting that way is expensive.

There are far to many "should" in these two paragraphs.

Aren't all planner decisions riddled with "should"s?

Anyway, my point is that, since the user can control operator cost,
and operator cost is involved in the computation, and the ordering
given by the user is explicitly tested for and given a slight
advantage, it should (will?) be easy for the user to overcome any poor
planner decisions by giving the planner the relevant information.

Priorization of the user-provided order can be as simple as giving
that comparison_cost a small handicap.

I see no point in doing that, and I don't recall a single place in the
planner where we do that. If the user specified ORDER BY, we'll slap an
explicit Sort on top when needed (which acts as the handicap, but in a
clear way). Otherwise we don't do such things - it'd be just plain
confusing (consider "ORDER BY a,b" vs. "ORDER BY b,c" with same data
types, ndistinct etc. but unexpectedly different costs). Also, what
would be a good value for the handicap?

As I said, I'm not talking about explicit order by, but about the case
where the user knows which group by order is the most efficient
somehow.

I'm proposing the planner shouldn't override the user without clear
evidence that the other plan is actually better, above the noise
better. That's what I mean by handicap.

#11

Claudio Freire

klaussfreire@gmail.com

over 7 years ago

In reply to: Claudio Freire (#10)

Re: POC: GROUP BY optimization

On Wed, Jun 6, 2018 at 7:18 PM Claudio Freire <klaussfreire@gmail.com> wrote:

Comparison cost can be approximated probabilistically as:

cost_comp = sum(cost_op_n * (1.0 / ndistinct(col_1_to_n)))

Where cost_op_n is the cost of the comparison function for column N,
and ndistinct(col_1_to_n) is an estimation of the number of distinct
values for columns prior to N in the sort order.

You can approximate ndistinct as the product of ndistinct of previous
columns, or use extended statistics when available.

Sure. The trouble is this also assumes uniform distribution, which is
not always the case.

Well, (1.0 / ndistinct) = p(left == right).

If you can compute a better p(left == right) with an MCV, you can get
a better estimate. If you have an MCV. But that'd be a bit expensive,
and I'm not sure it's all that relevant.

To what degree does improving this produce better plans? As long as
average expected cost is relatively well estimated (as in, one sort
order relative to another sort order), it won't affect the decision.

But if needed, the MCV can be used for this.

So, in essence, you need to account for:

- restrictions on that column that constrain the domain
- distribution skew (MCV, nulls)
- ndistinct

To compute p(left == right).

Maybe something as simple as the following?

p_special = sum(mcv_i * mcv_i) + null_frac * null_frac
p_other = (1 - p_special) * (1 - p_special) / ndistinct(restr)

Well, that came out with a slew of math errors, but the idea is clear:
compute p(left == right) given the available statistics and
constrained by any applicable restrictions.

#12

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 7 years ago

In reply to: Claudio Freire (#10)

Re: POC: GROUP BY optimization

On 06/07/2018 12:18 AM, Claudio Freire wrote:

On Wed, Jun 6, 2018 at 6:57 PM Tomas Vondra
<tomas.vondra@2ndquadrant.com> wrote:

On 06/06/2018 11:22 PM, Claudio Freire wrote:

On Wed, Jun 6, 2018 at 5:43 PM Tomas Vondra
As such, estimating sort performance correctly in the various plan
variants being considered seems to be a very central aspect of it.

This means it's pretty much time to consider the effect of operator
cost *and* distinct values in the cost computation.

Yes, until now that was not really needed because the optimizer does not
consider reordering of the pathkeys - it simply grabs either GROUP BY or
ORDER BY pathkeys and runs with that.

So the costing was fairly trivial, we simply do something like

comparison_cost = 2.0 * cpu_operator_cost;

sort_cost = comparison_cost * tuples * LOG2(tuples);

which essentially ignores that there might be multiple columns, or that
the columns may have sort operator with different costs.

The question is how reliable the heuristics can be. The current patch
uses just plain ndistinct, but that seems rather unreliable but I don't
have a clear idea how to improve that - we may have MCV for the columns
and perhaps some extended statistics, but I'm not sure how far we can
run with that.

Essentially what we need to estimate the number of comparisons for
each column, to compute better comparison_cost.

This could be approached by adjusting statistics by any relevant
restrictions applicable to the columns being grouped, as is done for
selectivity estimations.

I don't follow how is this related to restrictions? Can you elaborate?

I'm not sure how far that would get us, though. It would be rather
easy to lose sight of those restrictions if there are complex
operations involved.

Assuming cost_sort does its thing, it's just a matter of building the
desired variants and picking the one with the smallest cost_sort. One
can use heuristics to build a subset of all possible column orderings
with a guiding principle that tries to maximize the likelihook of
finding a better order than the one in the query (like sorting by
ndistinct), but I'd suggest:

- Always considering the sort order verbatim as given in the SQL (ie:
what the user requests)
- Relying on cost_sort to distinguish among them, and pick a winner, and
- When in doubt (if cost_sort doesn't produce a clear winner), use the
order given by the user

I don't see why not to generate all possible orderings (keeping only the
cheapest path for each pathkey variant) and let the optimizer to sort it
out.

I'm assuming costing the full N! possible orderings would be
prohibitively expensive.

That's possible, yes - particularly for large N values. Perhaps there's
a simpler algorithm to compute a sufficient approximation. In a greedy
way, starting from some likely-good combination of columns, or so. I
haven't thought about this part very much ...

If the user specified an explicit ORDER BY, we'll slap an extra
Sort by at the end, increasing the cost.

I think you misunderstood me. I'm saying the exact opposite.

When I mention handicap, I mean to give the explicitly requested group
by order a *reduced* cost, to give it an advantage over the
heuristics.

I did understand you. I'm saying that if the ordering does not match the
one requested by the user (in ORDER BY), we end up adding an explicit
Sort node on top of it, which increases the cost of all the other paths,
acting as a disadvantage.

But now I realize this only works for when there is an ORDER BY clause,
not for GROUP BY (in which case we don't add any Sort node).

This is to try to attack the problem you mentioned where users already
accounting for operator costs in their queries would get overridden by
the planner, perhaps in detriment of overall execution time.

In essence:

- If the user requested that order, we assume it "somewhat
subjectively better" (by giving it a slightly reduced cost).
- If there is an explicit order by clause that differs from a
considered path, the required sort node will already penalize it
appropriately, nothing needs to be done in relation to sort costs.
- All other things being equal, cost_sort will make the decision. If a
plan beats the user-provided order in spite of the handicap, it will
be used. So it's still optimizing clear cases.

OK. I still don't believe this is a good approach, because we don't know
how good our estimate of the costs is, so I have no idea how good large
the handicap needs to be.

Comparison cost can be approximated probabilistically as:

cost_comp = sum(cost_op_n * (1.0 / ndistinct(col_1_to_n)))

Where cost_op_n is the cost of the comparison function for column N,
and ndistinct(col_1_to_n) is an estimation of the number of distinct
values for columns prior to N in the sort order.

You can approximate ndistinct as the product of ndistinct of previous
columns, or use extended statistics when available.

Sure. The trouble is this also assumes uniform distribution, which is
not always the case.

Well, (1.0 / ndistinct) = p(left == right).

If you can compute a better p(left == right) with an MCV, you can get
a better estimate. If you have an MCV. But that'd be a bit expensive,
and I'm not sure it's all that relevant.

To what degree does improving this produce better plans? As long as
average expected cost is relatively well estimated (as in, one sort
order relative to another sort order), it won't affect the decision.

I'd bet I can construct corner-case examples with vastly different
behavior depending on column data distributions, so it's not entirely
insignificant. How important is it in practice I don't know.

But if needed, the MCV can be used for this.

So, in essence, you need to account for:

- restrictions on that column that constrain the domain
- distribution skew (MCV, nulls)
- ndistinct

To compute p(left == right).

Maybe something as simple as the following?

p_special = sum(mcv_i * mcv_i) + null_frac * null_frac
p_other = (1 - p_special) * (1 - p_special) / ndistinct(restr)

I think that should give a good enough approximation of
ndistinct-enriched sort costs that considers operator cost
appropriately. That operator cost is basically an average and can be
used as a constant, so it's just a matter of passing the right
comparison_cost to cost_sort.

Even if ndistinct estimates are off, the fact that operator cost is
involved should be a good enough tool for the user should the
planner pick the wrong path - it's only a matter of boosting operator
cost to let the planner know that sorting that way is expensive.

There are far to many "should" in these two paragraphs.

Aren't all planner decisions riddled with "should"s?

Anyway, my point is that, since the user can control operator cost,
and operator cost is involved in the computation, and the ordering
given by the user is explicitly tested for and given a slight
advantage, it should (will?) be easy for the user to overcome any poor
planner decisions by giving the planner the relevant information.

I really don't want to force users to tweak operator cost in order to
tune queries. It's about an order of magnitude less convenient than a
GUC, for various reasons.

Priorization of the user-provided order can be as simple as giving
that comparison_cost a small handicap.

I see no point in doing that, and I don't recall a single place in the
planner where we do that. If the user specified ORDER BY, we'll slap an
explicit Sort on top when needed (which acts as the handicap, but in a
clear way). Otherwise we don't do such things - it'd be just plain
confusing (consider "ORDER BY a,b" vs. "ORDER BY b,c" with same data
types, ndistinct etc. but unexpectedly different costs). Also, what
would be a good value for the handicap?

As I said, I'm not talking about explicit order by, but about the case
where the user knows which group by order is the most efficient
somehow.

I'm proposing the planner shouldn't override the user without clear
evidence that the other plan is actually better, above the noise
better. That's what I mean by handicap.

OK, gotcha. But I'm really unsure how large the handicap should be.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#13

Claudio Freire

klaussfreire@gmail.com

over 7 years ago

In reply to: Tomas Vondra (#12)

Re: POC: GROUP BY optimization

On Wed, Jun 6, 2018 at 8:06 PM Tomas Vondra
<tomas.vondra@2ndquadrant.com> wrote:

Comparison cost can be approximated probabilistically as:

cost_comp = sum(cost_op_n * (1.0 / ndistinct(col_1_to_n)))

Where cost_op_n is the cost of the comparison function for column N,
and ndistinct(col_1_to_n) is an estimation of the number of distinct
values for columns prior to N in the sort order.

You can approximate ndistinct as the product of ndistinct of previous
columns, or use extended statistics when available.

Sure. The trouble is this also assumes uniform distribution, which is
not always the case.

Well, (1.0 / ndistinct) = p(left == right).

If you can compute a better p(left == right) with an MCV, you can get
a better estimate. If you have an MCV. But that'd be a bit expensive,
and I'm not sure it's all that relevant.

To what degree does improving this produce better plans? As long as
average expected cost is relatively well estimated (as in, one sort
order relative to another sort order), it won't affect the decision.

I'd bet I can construct corner-case examples with vastly different
behavior depending on column data distributions, so it's not entirely
insignificant. How important is it in practice I don't know.

I guess that can only be answered by building that solution and
testing it against the corner cases.

#14

Teodor Sigaev

teodor@sigaev.ru

over 7 years ago

In reply to: Tomas Vondra (#7)

Re: POC: GROUP BY optimization

OK, I haven't looked at v7 yet, but if I understand correctly it tries
to maintain the ordering as much as possible? Does that actually help? I
mean, the incremental sort patch allows the sorting to happen by pieces,
but here we still need to sort all the data, right?

Can you give an example demonstrating the benefit?

SELECT a, SUM(x) FROM
(
SELECT a, b, COUNT(c) AS x FROM t1 GROUP BY a, b
UNION ALL
SELECT a, b, COUNT(c) AS x FROM t2 GROUP BY a, b
) foo GROUP BY a;

and indexes on (a,b) and (b,a) for both relations. The "deduplication"
by pathkeys I suggested would mean we might keep only index (a,b) on t1
and (b,a) on t2, which means the grouping by "a" can't leverage index
scans on both relations. But if we keep paths for both indexes on each
relation, we can.

yes, one of option

Isn't "estimation of cost of comparing function/number of unique values
in column could be not very accurate and so planner could make a wrong
choice" is more an argument against relying on it when doing these
optimizations?

FWIW it's one of the arguments Tom made in the incremental sort patch,
which relies on it too when computing cost of the incremental sort. I'm
sure it's going to be an obstacle there too. >

I saw 2 times difference in real-world application. Again, improving
sort cost estimation is a separate task.

Sure. But we also need to ask the other question, i.e. how many people
would be negatively affected by the optimization. And I admit I don't
know the answer to that, the next example is entirely made up.

Hm, seems, the best way here is a improving cost_sort estimation. Will try, but
I think that is separated patch

FWIW I think it would be useful to have "development GUC" that would
allow us to enable/disable these options during development, because
that makes experiments much easier. But then remove them before commit.

Will do

--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/

#15

Teodor Sigaev

teodor@sigaev.ru

over 7 years ago

In reply to: Tomas Vondra (#9)

Re: POC: GROUP BY optimization

So the costing was fairly trivial, we simply do something like

comparison_cost = 2.0 * cpu_operator_cost;

sort_cost = comparison_cost * tuples * LOG2(tuples);

which essentially ignores that there might be multiple columns, or that
the columns may have sort operator with different costs.

Agree. And distribution of keys.

The question is how reliable the heuristics can be. The current patch
uses just plain ndistinct, but that seems rather unreliable but I don't
have a clear idea how to improve that - we may have MCV for the columns
and perhaps some extended statistics, but I'm not sure how far we can
run with that.

v8 already uses another algorithm.

Essentially what we need to estimate the number of comparisons for each
column, to compute better comparison_cost.

Exactly

Priorization of the user-provided order can be as simple as giving
that comparison_cost a small handicap.

I see no point in doing that, and I don't recall a single place in the
planner where we do that. If the user specified ORDER BY, we'll slap an
explicit Sort on top when needed (which acts as the handicap, but in a
clear way). Otherwise we don't do such things - it'd be just plain
confusing (consider "ORDER BY a,b" vs. "ORDER BY b,c" with same data
types, ndistinct etc. but unexpectedly different costs). Also, what
would be a good value for the handicap?

Again agree. If we have fixed order of columns (ORDER BY) then we should not try
to reorder it. Current patch follows that if I didn't a mistake.

--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/

#16

Teodor Sigaev

teodor@sigaev.ru

over 7 years ago

In reply to: Claudio Freire (#10)

Re: POC: GROUP BY optimization

I don't see why not to generate all possible orderings (keeping only the
cheapest path for each pathkey variant) and let the optimizer to sort it
out.

I'm assuming costing the full N! possible orderings would be
prohibitively expensive.

That's true, but for the first step we need to improve cost_sort and only then
try to find the best pathkeys order by optimal way.

- If the user requested that order, we assume it "somewhat
subjectively better" (by giving it a slightly reduced cost).

I don't think so. It's not a SQL way - DB should define the optimal way to
execute query.

--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/

#17

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 7 years ago

In reply to: Teodor Sigaev (#15)

Re: POC: GROUP BY optimization

On 06/07/2018 03:41 PM, Teodor Sigaev wrote:

... snip ...

Priorization of the user-provided order can be as simple as giving
that comparison_cost a small handicap.

I see no point in doing that, and I don't recall a single place in the
planner where we do that. If the user specified ORDER BY, we'll slap an
explicit Sort on top when needed (which acts as the handicap, but in a
clear way). Otherwise we don't do such things - it'd be just plain
confusing (consider "ORDER BY a,b" vs. "ORDER BY b,c" with same data
types, ndistinct etc. but unexpectedly different costs). Also, what
would be a good value for the handicap?

Again agree. If we have fixed order of columns (ORDER BY) then we should
not try to reorder it. Current patch follows that if I didn't a mistake.

This part seems to be more a misunderstanding between me and Claudio. I
believe Claudio was referring to the column order in a GROUP BY, not
ORDER BY. In which case we don't add any Sort, of course.

I'm still opposed to adding arbitrary handicap to prioritize the order
specified by user, for the reasons I explained before. We should aim to
make the heuristics/costing reliable enough to make this unnecessary.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#18

Teodor Sigaev

teodor@sigaev.ru

over 7 years ago

In reply to: Tomas Vondra (#17)

Re: POC: GROUP BY optimization

Again agree. If we have fixed order of columns (ORDER BY) then we should not
try to reorder it. Current patch follows that if I didn't a mistake.

This part seems to be more a misunderstanding between me and Claudio. I believe
Claudio was referring to the column order in a GROUP BY, not ORDER BY. In which
case we don't add any Sort, of course.

I hope so

I'm still opposed to adding arbitrary handicap to prioritize the order specified
by user, for the reasons I explained before. We should aim to make the
heuristics/costing reliable enough to make this unnecessary.

--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/

#19

Teodor Sigaev

teodor@sigaev.ru

over 7 years ago

In reply to: Tomas Vondra (#7)

3 attachment(s)

Re: POC: GROUP BY optimization

Yes. But again, this description is a bit short. First it works after
first patch and might get some preordered leading pathkeys. Second, it
tries to match ORDER BY clause order if there is no preordered leading
pathkeys from first patch (it was introduced in v7). And third, if there
is a tail of unmatched pathkeys on previous stages then it will reorder
that tail.

OK, I haven't looked at v7 yet, but if I understand correctly it tries
to maintain the ordering as much as possible? Does that actually help? I
mean, the incremental sort patch allows the sorting to happen by pieces,
but here we still need to sort all the data, right?

Can you give an example demonstrating the benefit?

See tst.sql. queries are marked with opt (optimization is on) and noopt.

Query 1: select count(*) from btg group by v, r;
Query 2: select count(*) from btg group by n, v, r order by n;

For both queries it's possible to reorder v and r column, n column has the
single distinct value.

On my laptop:
Query 1opt vs 1noopt: 3177.500 ms vs 6604.493 ms
2opt vs 2noopt: 5800.307 ms vs 7486.967 ms

So, what we see:
1) for query 1 optimization gives 2 times better performance, for query 2 only
30%. if column 'n' will be unique then time for query 1 and 2 should be the
same. We could add check for preordered pathkeys in
get_cheapest_group_keys_order() and if estimate_num_groups(reordered pathkeys)
is close to 1 then we could do not reordering of tail of pathkeys.

2) Planing cost is the same for all queries. So, cost_sort() doesn't take into
account even number of columns.

FWIW I think it would be useful to have "development GUC" that would
allow us to enable/disable these options during development, because
that makes experiments much easier. But then remove them before commit.

Added, v9, debug_enable_group_by_match_order_by and
debug_enable_cheapest_group_by. I also checked compatibility with incremental
sort patch, and all works except small merge conflict which could be resolved
right before committing.

Next, I had a look on cost_incremental_sort() provided by incremental sort patch
and, it's a pity, it doesn't solve our problem with the impact of the cost of
per-column comparison function and number of its calls.

--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/

Attachments:

0001-opt_group_by_index-v9.patchtext/x-patch; name=0001-opt_group_by_index-v9.patchDownload

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index b22b36ec0e..c073eb061a 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -686,7 +686,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ec66cb9c3c..4f93afdebc 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -26,6 +26,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -327,6 +328,58 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
+ * returns new lists,  original GROUP BY lists stay untouched.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List		*new_group_pathkeys= NIL,
+				*new_group_clauses = NIL;
+	ListCell	*key;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * For each pathkey it tries to find corresponding GROUP BY pathkey and
+	 * clause.
+	 */
+	foreach(key, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(key);
+		SortGroupClause	*sgc;
+
+		/*
+		 * Pathkey should use the same allocated struct, so, equiality of
+		 * pointers is enough
+		 */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	n = list_length(new_group_pathkeys);
+
+	/*
+	 * Just append the rest of pathkeys and clauses
+	 */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+												 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
@@ -1611,6 +1664,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return 0;					/* path ordering not useful */
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered, so we don't bother about actual order in
+ * pathkeys
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	if (root->group_pathkeys == NIL)
+		return 0;				/* no special ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1625,6 +1711,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1660,6 +1749,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */ 
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 67a2c7a581..1e7809edf2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6182,9 +6182,28 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			if (parse->groupingSets)
+			{
+				/*
+				 * prevent group pathkey rreordering, just check the same
+				 * order paths pathkeys and group pathkeys
+				 */
+				is_sorted = pathkeys_contained_in(group_pathkeys,
+												  path->pathkeys);
+			}
+			else
+			{
+				n_preordered_groups =
+						group_keys_reorder_by_pathkeys(path->pathkeys,
+													   &group_pathkeys,
+													   &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
+			}
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_path || is_sorted)
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
@@ -6192,7 +6211,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				/* Now decide what to stick atop it */
@@ -6213,9 +6232,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6230,7 +6249,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6251,19 +6270,26 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
+				List	   *group_pathkeys = root->group_pathkeys,
+						   *group_clauses = parse->groupClause;
+				int			n_preordered_groups;
+
+				n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																	 &group_pathkeys,
+																	 &group_clauses);
 
 				/*
 				 * Insert a Sort node, if required.  But there's no point in
 				 * sorting anything but the cheapest path.
 				 */
-				if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+				if (n_preordered_groups != list_length(group_pathkeys))
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 				}
 
@@ -6273,9 +6299,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6284,7 +6310,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 			}
@@ -6521,9 +6547,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_total_path || is_sorted)
 			{
 				/* Sort the cheapest partial path, if it isn't already */
@@ -6531,7 +6563,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -6540,9 +6572,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 											 partially_grouped_rel,
 											 path,
 											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 NIL,
 											 agg_partial_costs,
 											 dNumPartialGroups));
@@ -6551,7 +6583,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 							 create_group_path(root,
 											   partially_grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   NIL,
 											   dNumPartialGroups));
 			}
@@ -6565,17 +6597,24 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_partial_path || is_sorted)
 			{
+
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -6584,9 +6623,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 group_clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 group_clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6595,7 +6634,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   group_clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index cafde307ad..226b293622 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -190,6 +190,9 @@ typedef enum
 
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f85e913850..e302dfbdce 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2065,3 +2065,107 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+CREATE INDEX ON btg(p, v);
+VACUUM btg;
+ANALYZE btg;
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+-- GROUP BY optimization by reorder columns by index scan
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 506d0442d7..7ef703f3a7 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -907,3 +907,57 @@ EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1;
 SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
+
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+CREATE INDEX ON btg(p, v);
+
+VACUUM btg;
+ANALYZE btg;
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+
+-- GROUP BY optimization by reorder columns by index scan
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+

0002-opt_group_by_index_and_order-v9.patchtext/x-patch; name=0002-opt_group_by_index_and_order-v9.patchDownload

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 4f93afdebc..586f8e6e79 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -380,6 +380,157 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	return n;
 }
 
+/************************<DEBUG PART>*************************************/
+bool debug_group_by_match_order_by = true;
+bool debug_cheapest_group_by = true;
+/************************</DEBUG PART>************************************/
+
+/*
+ * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
+ * speedup sorting. Returns newly allocated lists, old ones stay untouched.
+ * n_preordered defines a head of list which order should be prevented.
+ */
+void
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List *target_list,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	struct
+	{
+		PathKey			*pathkey;
+		SortGroupClause	*sgc;
+		Node			*pathkeyExpr;
+	}
+				   *keys, tmp;
+	int				nkeys = list_length(*group_pathkeys) - n_preordered;
+	List		   *pathkeyExprList = NIL,
+				   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL;
+	ListCell	   *cell;
+	int				i = 0, n_keys_to_est;
+
+	if (nkeys < 2)
+		return; /* nothing to do */
+
+	/*
+	 * Will try to match ORDER BY pathkeys in hope that one sort is cheaper than
+	 * two
+	 */
+	if (debug_group_by_match_order_by &&
+		n_preordered == 0 && root->sort_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  group_pathkeys,
+													  group_clauses);
+
+		nkeys = list_length(*group_pathkeys) - n_preordered;
+		if (nkeys < 2)
+			return; /* nothing to do */
+	}
+
+	if (!debug_cheapest_group_by)
+		return;
+
+	keys = palloc(nkeys * sizeof(*keys));
+
+	/*
+	 * Collect information about pathkey for subsequent usage
+	 */
+	for_each_cell(cell, list_nth_cell(*group_pathkeys, n_preordered))
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		keys[i].pathkey = pathkey;
+		keys[i].sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+											  *group_clauses);
+		keys[i].pathkeyExpr = get_sortgroupclause_expr(keys[i].sgc,
+													   target_list);
+		i++;
+	}
+
+	/*
+	 * Find the cheapest to sort order of columns. We will find a first column
+	 * with bigger number of group, then pair (first column in pair is  already
+	 * defined in first step), them triple and so on.
+	 */
+	for(n_keys_to_est = 1; n_keys_to_est <= nkeys - 1; n_keys_to_est++)
+	{
+		ListCell   *tail_cell;
+		int			best_i = 0;
+		double		best_est_num_groups = -1;
+
+		/* expand list of columns and remeber last cell */
+		pathkeyExprList = lappend(pathkeyExprList, NULL);
+		tail_cell = list_tail(pathkeyExprList);
+
+		/*
+		 * Find the best last column - the best means bigger number of groups,
+		 * previous columns are already choosen
+		 */
+		for(i = n_keys_to_est - 1; i < nkeys; i++)
+		{
+			double  est_num_groups;
+
+			lfirst(tail_cell) = keys[i].pathkeyExpr;
+			est_num_groups = estimate_num_groups(root, pathkeyExprList,
+												 nrows, NULL);
+
+			if (est_num_groups > best_est_num_groups)
+			{
+				best_est_num_groups = est_num_groups;
+				best_i = i;
+			}
+		}
+
+		/* Save the best choice */
+		lfirst(tail_cell) = keys[best_i].pathkeyExpr;
+		if (best_i != n_keys_to_est - 1)
+		{
+			tmp = keys[n_keys_to_est - 1];
+			keys[n_keys_to_est - 1] = keys[best_i];
+			keys[best_i] = tmp;
+		}
+	}
+	list_free(pathkeyExprList);
+
+	/*
+	 * Construct result lists, keys array is already ordered to get a cheapest
+	 * sort
+	 */
+	i = 0;
+	foreach(cell, *group_pathkeys)
+	{
+		PathKey	   *pathkey;
+		SortGroupClause *sgc;
+
+		if (i < n_preordered)
+		{
+			pathkey = (PathKey *) lfirst(cell);
+			sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+										  *group_clauses);
+		}
+		else
+		{
+			pathkey = keys[i - n_preordered].pathkey;
+			sgc = keys[i - n_preordered].sgc;
+		}
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+
+		i++;
+	}
+
+	pfree(keys);
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses, *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1e7809edf2..257aa5889c 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6208,11 +6208,20 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
 				if (!is_sorted)
+				{
+					if (!parse->groupingSets)
+						get_cheapest_group_keys_order(root,
+													  path->rows,
+													  extra->targetList,
+													  &group_pathkeys,
+													  &group_clauses,
+													  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6286,6 +6295,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
@@ -6560,11 +6575,19 @@ create_partial_grouping_paths(PlannerInfo *root,
 			{
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_path(partially_grouped_rel, (Path *)
@@ -6611,11 +6634,19 @@ create_partial_grouping_paths(PlannerInfo *root,
 
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ee1444c427..4236807433 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1822,7 +1822,26 @@ static struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
-
+/************************<DEBUG OPT GROUP BY>*********************************/
+	{
+		{"debug_enable_group_by_match_order_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable matching GROUP BY by ORDER BY."),
+			NULL
+		},
+		&debug_group_by_match_order_by,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("find a cheapest order of columns in GROUP BY."),
+			NULL
+		},
+		&debug_cheapest_group_by,
+		true,
+		NULL, NULL, NULL
+	},
+/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 226b293622..60d8d6c3a6 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -193,6 +193,16 @@ extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern int group_keys_reorder_by_pathkeys(List *pathkeys,
 										  List **group_pathkeys,
 										  List **group_clauses);
+/************************<DEBUG OPT GROUP BY>*********************************/
+extern bool debug_group_by_match_order_by;
+extern bool debug_cheapest_group_by;
+/************************</DEBUG OPT GROUP BY>********************************/
+extern void get_cheapest_group_keys_order(PlannerInfo *root,
+										  double nrows,
+										  List *target_list,
+										  List **group_pathkeys,
+										  List **group_clauses,
+										  int	n_preordered);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index e302dfbdce..31dcf70e47 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2071,19 +2071,145 @@ SELECT
 	i/2 AS p,
 	format('%60s', i%2) AS v,
 	i/4 AS c,
-	i/8 AS d
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
 	INTO btg
 FROM
 	generate_series(1, 10000) i;
-CREATE INDEX ON btg(p, v);
 VACUUM btg;
 ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
 SET enable_hashagg=off;
 SET max_parallel_workers= 0;
 SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
 SET enable_seqscan=off;
 SET enable_bitmapscan=off;
--- GROUP BY optimization by reorder columns by index scan
+VACUUM btg;
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY p, v;
                    QUERY PLAN                   
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index b983f9c506..3915a837f0 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1140,7 +1140,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1284,7 +1284,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 054a381dad..a686a75fb0 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -244,9 +244,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c, d
+   Group Key: a, d, c, b
    ->  Sort
-         Sort Key: a, b, c, d
+         Sort Key: a, d, c, b
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -255,9 +255,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: b, c, d
+   Group Key: b, d, c
    ->  Sort
-         Sort Key: b, c, d
+         Sort Key: b, d, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -281,9 +281,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b
+   Group Key: b, a
    ->  Sort
-         Sort Key: a, b
+         Sort Key: b, a
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -292,9 +292,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c
+   Group Key: b, a, c
    ->  Sort
-         Sort Key: a, b, c
+         Sort Key: b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -303,9 +303,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c, d
+   Group Key: d, b, a, c
    ->  Sort
-         Sort Key: a, b, c, d
+         Sort Key: d, b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 7ef703f3a7..e4415c8d84 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -915,22 +915,65 @@ SELECT
 	i/2 AS p,
 	format('%60s', i%2) AS v,
 	i/4 AS c,
-	i/8 AS d
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
 	INTO btg
 FROM
 	generate_series(1, 10000) i;
-CREATE INDEX ON btg(p, v);
 
 VACUUM btg;
 ANALYZE btg;
 
+-- GROUP BY optimization by reorder columns by frequency
+
 SET enable_hashagg=off;
 SET max_parallel_workers= 0;
 SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
 SET enable_seqscan=off;
 SET enable_bitmapscan=off;
+VACUUM btg;
 
--- GROUP BY optimization by reorder columns by index scan
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY p, v;

tst.sqlapplication/sql; name=tst.sqlDownload

#20

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 7 years ago

In reply to: Teodor Sigaev (#19)

Re: POC: GROUP BY optimization

On 06/07/2018 06:22 PM, Teodor Sigaev wrote:

Yes. But again, this description is a bit short. First it works after
first patch and might get some preordered leading pathkeys. Second, it
tries to match ORDER BY clause order if there is no preordered leading
pathkeys from first patch (it was introduced in v7). And third, if there
is a tail of unmatched pathkeys on previous stages then it will reorder
that tail.

OK, I haven't looked at v7 yet, but if I understand correctly it tries
to maintain the ordering as much as possible? Does that actually help? I
mean, the incremental sort patch allows the sorting to happen by pieces,
but here we still need to sort all the data, right?

Can you give an example demonstrating the benefit?

See tst.sql. queries are marked with opt (optimization is on) and noopt.

Query 1: select count(*) from btg group by v, r;
Query 2: select count(*) from btg group by n, v, r order by n;

For both queries it's possible to reorder v and r column, n column has
the single distinct value.

On my laptop:
Query 1opt vs 1noopt: 3177.500 ms vs 6604.493 ms
О©╫О©╫О©╫О©╫О©╫ 2opt vs 2noopt: 5800.307 ms vs 7486.967 ms

So, what we see:
1) for query 1 optimization gives 2 times better performance, for query
2 only 30%. if column 'n' will be unique then time for query 1 and 2
should be the same. We could add check for preordered pathkeys in
get_cheapest_group_keys_order() and if estimate_num_groups(reordered
pathkeys) is close to 1 then we could do not reordering of tail of
pathkeys.

OK, those are nice improvements, although the examples are somewhat
extreme (only 1 or 2 distinct values). So yes, in some cases this
reordering clearly makes a big difference, but I still think we need to
improve the heuristics to minimize regressions.

I see v9 is now calling estimate_num_groups(), so it already benefits
from extended statistics. Nice! I think the algorithm is more or less
the greedy option I proposed in one of the earlier messages - I don't
know if it's sufficient or not, but the only other idea I have is
essentially an exhaustive search through all possible permutations.

So that's a nice improvement, although I think we should also consider
non-uniform distributions (using the per-column MCV lists).

2) Planing cost is the same for all queries. So, cost_sort() doesn't
take into account even number of columns.

Yeah. As I wrote before, I think we'll need to come up with a model for
comparison_cost depending on the column order, which should make costs
for those paths different.

FWIW I think it would be useful to have "development GUC" that would
allow us to enable/disable these options during development, because
that makes experiments much easier. But then remove them before commit.

Added, v9, debug_enable_group_by_match_order_by and
debug_enable_cheapest_group_by. I also checked compatibility with
incremental sort patch, and all works except small merge conflict which
could be resolved right before committing.

OK. I think we should also have a debug GUC for the first patch (i.e.
one that would allow reordering group_pathkeys to match the input path).

Regarding the two GUCs introduced in v9, I'm not sure I 100% understand
what they are doing. Let me try explaining what I think they do:

1) debug_cheapest_group_by - Reorder GROUP BY clauses to using ndistinct
stats for columns, placing columns with more distinct values first (so
that we don't need to compare the following columns as often).

2) debug_group_by_match_order_by - Try to reorder the GROUP BY clauses
to match the ORDER BY, so that the group aggregate produces results in
the desired output (and we don't need an explicit Sort).

FWIW I doubt about the debug_group_by_match_order_by optimization. The
trouble is that it's only based on comparing the pathkeys lists, and
entirely ignores that the two result sets may operate on very different
numbers of rows.

Consider for example "SELECT * FROM t GROUP BY a,b,c,d ORDER BY c,d"
where table "t" has 1B rows, but there are only ~10k rows in the result
(which is fairly common in fact tables in star-schema DBs). IIUC the
optimization will ensure "c,d" is placed first in the GROUP BY, and then
we reorder "a,b" using ndistinct. But this may be much less efficient
than simply reordering (a,b,c,d) per ndistinct and then adding explicit
Sort on top of that (because on 10k rows that's going to be cheap).

So I think this somewhat contradicts the order-by-ndistinct optimization
and may easily undo it's benefits.

The other "issue" I have with get_cheapest_group_keys_order() is how it
interacts with group_keys_reorder_by_pathkeys(). I mean, we first call

n_preordered = group_keys_reorder_by_pathkeys(path->pathkeys, ...);

so the reordering tries to maintain ordering of the input path. Then if
(n_preordered == 0) we do this:

group_keys_reorder_by_pathkeys(root->sort_pathkeys, ...);

Doesn't that mean that if we happen to have input path with partially
matching ordering (so still requiring explicit Sort before grouping) we
may end up ignoring the ORDER BY entirely? Instead of using Sort that
would match the ORDER BY? I might have misunderstood this, though.

I'm not sure why the first group_keys_reorder_by_pathkeys() call does
not simply return 0 when the input path ordering is not sufficient for
the grouping? Is there some reasoning behind that (e.g. that sorting
partially sorted is faster)?

Overall I think this patch introduces four different optimizations that
are indeed very interesting:

1) consider additional sorted paths for grouping

2) reorder GROUP BY clauses per ndistinct to minimize comparisons

3) when adding Sort for grouping, maintain partial input ordering

4) when adding Sort for grouping, try producing the right output order
(if the ORDER BY was specified)

But at this point it's kinda mashed together in ad-hoc manner, using
very simple heuristics / assumptions. We'll need to figure out how to
combine those optimizations.

BTW I get compiler warnings that n_preordered_groups may be used
uninitialized in add_paths_to_grouping_rel, because it's only set in an
if/else branch, and then used further down.

Next, I had a look on cost_incremental_sort() provided by
incremental sort patch and, it's a pity, it doesn't solve our problem
with the impact of the cost of per-column comparison function and
number of its calls.

I currently doesn't, but that might be more an issue in the incremental
sort patch and we may need to fix that. Clearly the costing in general
in that patch needs more work, and I do recall Tom pointing out that the
current heuristics (estimating number of sort groups using ndistincts)
seems rather weak.

It may not be exactly the same problem, but it seems kinda similar to
what this patch does. I have a hunch that those patches will end up
inventing something fairly similar.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#21

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 7 years ago

In reply to: Tomas Vondra (#20)

Re: POC: GROUP BY optimization

On 06/09/2018 08:09 PM, Tomas Vondra wrote:

/snip/

4) when adding Sort for grouping, try producing the right output order
(if the ORDER BY was specified)

BTW I've just realized we already do something similar in master. If you
run a query like this:

SELECT a, b, count(*) FROM t GROUP BY b, a ORDER BY a;

we will actually plan it like this:

QUERY PLAN
---------------------------
GroupAggregate
Group Key: a, b
-> Sort
Sort Key: a, b
-> Seq Scan on t
(5 rows)

I.e. we already do reorder the group clauses to match ORDER BY, to only
require a single sort. This happens in preprocess_groupclause(), which
also explains the reasoning behind that.

I wonder if some of the new code reordering group pathkeys could/should
be moved here (not sure, maybe it's too early for those decisions). In
any case, it might be appropriate to update some of the comments before
preprocess_groupclause() which claim we don't do certain things added by
the proposed patches.

This probably also somewhat refutes my claim that the order of grouping
keys is currently fully determined by users (and so they may pick the
most efficient order), while the reorder-by-ndistinct patch would make
that impossible. Apparently when there's ORDER BY, we already mess with
the order of group clauses - there are ways to get around it (subquery
with OFFSET 0) but it's much less clear.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#22

Teodor Sigaev

teodor@sigaev.ru

over 7 years ago

In reply to: Tomas Vondra (#20)

2 attachment(s)

Re: POC: GROUP BY optimization

I see v9 is now calling estimate_num_groups(), so it already benefits
from extended statistics. Nice! I think the algorithm is more or less
the greedy option I proposed in one of the earlier messages - I don't
know if it's sufficient or not, but the only other idea I have is
essentially an exhaustive search through all possible permutations.

All possible permutation could be slow with extremely large group by clause, so
we will need some some stop limit - like join_collapse_limit. I don't like this
idea.

So that's a nice improvement, although I think we should also consider
non-uniform distributions (using the per-column MCV lists).

Could you clarify how to do that?

2) Planing cost is the same for all queries. So, cost_sort() doesn't
take into account even number of columns.

Yeah. As I wrote before, I think we'll need to come up with a model for
comparison_cost depending on the column order, which should make costs
for those paths different.

Yeah. But I still think it should be separated patch which will improve cost
estimation and plan choosing in other optimizer part too.

OK. I think we should also have a debug GUC for the first patch (i.e.
one that would allow reordering group_pathkeys to match the input path).

Added to 0002-opt_group_by_index_and_order-v10.patch .
debug_group_by_reorder_by_pathkeys.

Regarding the two GUCs introduced in v9, I'm not sure I 100% understand
what they are doing. Let me try explaining what I think they do:

1) debug_cheapest_group_by - Reorder GROUP BY clauses to using ndistinct
stats for columns, placing columns with more distinct values first (so
that we don't need to compare the following columns as often).

yes

2) debug_group_by_match_order_by - Try to reorder the GROUP BY clauses
to match the ORDER BY, so that the group aggregate produces results in
the desired output (and we don't need an explicit Sort).

yes

FWIW I doubt about the debug_group_by_match_order_by optimization. The
trouble is that it's only based on comparing the pathkeys lists, and
entirely ignores that the two result sets may operate on very different
numbers of rows.

Consider for example "SELECT * FROM t GROUP BY a,b,c,d ORDER BY c,d"
where table "t" has 1B rows, but there are only ~10k rows in the result
(which is fairly common in fact tables in star-schema DBs). IIUC the
optimization will ensure "c,d" is placed first in the GROUP BY, and then
we reorder "a,b" using ndistinct. But this may be much less efficient
than simply reordering (a,b,c,d) per ndistinct and then adding explicit
Sort on top of that (because on 10k rows that's going to be cheap).

As you pointed in next email, this optimization is already implemented in
preprocess_groupclause(). And correct resolving of this issue could be done only
after implementing of correct cost_sort() - which will pay attention to
comparison cost, number of distinct value in columns and how often it's needed
to compare second and next columns.

The other "issue" I have with get_cheapest_group_keys_order() is how it
interacts with group_keys_reorder_by_pathkeys(). I mean, we first call

n_preordered = group_keys_reorder_by_pathkeys(path->pathkeys, ...);

so the reordering tries to maintain ordering of the input path. Then if
(n_preordered == 0) we do this:

group_keys_reorder_by_pathkeys(root->sort_pathkeys, ...);

Doesn't that mean that if we happen to have input path with partially
matching ordering (so still requiring explicit Sort before grouping) we
may end up ignoring the ORDER BY entirely? Instead of using Sort that
would match the ORDER BY? I might have misunderstood this, though.

Hm. I believe ordering of input of GROUP clause is always more expensive - just
because output of GROUP BY clause should have less number of rows than its
input, what means more cheap ordering. So here it uses ORDER BY only if we don't
have a group pathkey(s) matched by path pathkeys.

I'm not sure why the first group_keys_reorder_by_pathkeys() call does
not simply return 0 when the input path ordering is not sufficient for
the grouping? Is there some reasoning behind that (e.g. that sorting
partially sorted is faster)?

Hm, what do you mean - sufficient? group_keys_reorder_by_pathkeys() always tries
to order GROUP BY columns by preexisting pathkeys. But of course, incremental
sort will add more benefits here. I'd like to hope that incremental sort patch
is close to commit.

Overall I think this patch introduces four different optimizations that
are indeed very interesting:

1) consider additional sorted paths for grouping

Agree

2) reorder GROUP BY clauses per ndistinct to minimize comparisons

Agree (with a help of estimate_num_groups() and patch tries to maximize a number
of groups on each step)

3) when adding Sort for grouping, maintain partial input ordering

Agree - and incremental sort patch will helpful here.

4) when adding Sort for grouping, try producing the right output order
(if the ORDER BY was specified)

Yes.

But at this point it's kinda mashed together in ad-hoc manner, using
very simple heuristics / assumptions. We'll need to figure out how to
combine those optimizations.

The used heuristics here:
1) if there is a some order - let's used it
2) if there is a required order - let's match that order to prevent extra sort node.

Incremental sort patch will improve cases where there is partial match of order.

BTW I get compiler warnings that n_preordered_groups may be used
uninitialized in add_paths_to_grouping_rel, because it's only set in an
if/else branch, and then used further down.

Fixed, but I believe that compiler is not smart enough here.

--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/

Attachments:

0002-opt_group_by_index_and_order-v10.patchtext/x-patch; name=0002-opt_group_by_index_and_order-v10.patchDownload

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 4f93afdebc..ec4095bd2e 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -328,6 +328,12 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/************************<DEBUG PART>*************************************/
+bool debug_group_by_reorder_by_pathkeys = true;
+bool debug_group_by_match_order_by = true;
+bool debug_cheapest_group_by = true;
+/************************</DEBUG PART>************************************/
+
 /*
  * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
  * returns new lists,  original GROUP BY lists stay untouched.
@@ -341,6 +347,9 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	ListCell	*key;
 	int			n;
 
+	if (debug_group_by_reorder_by_pathkeys == false)
+		return 0;
+
 	if (pathkeys == NIL || *group_pathkeys == NIL)
 		return 0;
 
@@ -380,6 +389,157 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	return n;
 }
 
+/*
+ * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
+ * speedup sorting. Returns newly allocated lists, old ones stay untouched.
+ * n_preordered defines a head of list which order should be prevented.
+ */
+void
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List *target_list,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	struct
+	{
+		PathKey			*pathkey;
+		SortGroupClause	*sgc;
+		Node			*pathkeyExpr;
+	}
+				   *keys, tmp;
+	int				nkeys = list_length(*group_pathkeys) - n_preordered;
+	List		   *pathkeyExprList = NIL,
+				   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL;
+	ListCell	   *cell;
+	int				i = 0, n_keys_to_est;
+
+	if (nkeys < 2)
+		return; /* nothing to do */
+
+	/*
+	 * Will try to match ORDER BY pathkeys in hope that one sort is cheaper than
+	 * two
+	 */
+	if (debug_group_by_match_order_by &&
+		n_preordered == 0 && root->sort_pathkeys)
+	{
+		bool _save_debug_group_by_reorder_by_pathkeys =
+			debug_group_by_reorder_by_pathkeys;  /* DEBUG ONLY, to be removed */
+
+		debug_group_by_reorder_by_pathkeys = true;
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  group_pathkeys,
+													  group_clauses);
+		debug_group_by_reorder_by_pathkeys = _save_debug_group_by_reorder_by_pathkeys;
+
+		nkeys = list_length(*group_pathkeys) - n_preordered;
+		if (nkeys < 2)
+			return; /* nothing to do */
+	}
+
+	if (!debug_cheapest_group_by)
+		return;
+
+	keys = palloc(nkeys * sizeof(*keys));
+
+	/*
+	 * Collect information about pathkey for subsequent usage
+	 */
+	for_each_cell(cell, list_nth_cell(*group_pathkeys, n_preordered))
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		keys[i].pathkey = pathkey;
+		keys[i].sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+											  *group_clauses);
+		keys[i].pathkeyExpr = get_sortgroupclause_expr(keys[i].sgc,
+													   target_list);
+		i++;
+	}
+
+	/*
+	 * Find the cheapest to sort order of columns. We will find a first column
+	 * with bigger number of group, then pair (first column in pair is  already
+	 * defined in first step), them triple and so on.
+	 */
+	for(n_keys_to_est = 1; n_keys_to_est <= nkeys - 1; n_keys_to_est++)
+	{
+		ListCell   *tail_cell;
+		int			best_i = 0;
+		double		best_est_num_groups = -1;
+
+		/* expand list of columns and remeber last cell */
+		pathkeyExprList = lappend(pathkeyExprList, NULL);
+		tail_cell = list_tail(pathkeyExprList);
+
+		/*
+		 * Find the best last column - the best means bigger number of groups,
+		 * previous columns are already choosen
+		 */
+		for(i = n_keys_to_est - 1; i < nkeys; i++)
+		{
+			double  est_num_groups;
+
+			lfirst(tail_cell) = keys[i].pathkeyExpr;
+			est_num_groups = estimate_num_groups(root, pathkeyExprList,
+												 nrows, NULL);
+
+			if (est_num_groups > best_est_num_groups)
+			{
+				best_est_num_groups = est_num_groups;
+				best_i = i;
+			}
+		}
+
+		/* Save the best choice */
+		lfirst(tail_cell) = keys[best_i].pathkeyExpr;
+		if (best_i != n_keys_to_est - 1)
+		{
+			tmp = keys[n_keys_to_est - 1];
+			keys[n_keys_to_est - 1] = keys[best_i];
+			keys[best_i] = tmp;
+		}
+	}
+	list_free(pathkeyExprList);
+
+	/*
+	 * Construct result lists, keys array is already ordered to get a cheapest
+	 * sort
+	 */
+	i = 0;
+	foreach(cell, *group_pathkeys)
+	{
+		PathKey	   *pathkey;
+		SortGroupClause *sgc;
+
+		if (i < n_preordered)
+		{
+			pathkey = (PathKey *) lfirst(cell);
+			sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+										  *group_clauses);
+		}
+		else
+		{
+			pathkey = keys[i - n_preordered].pathkey;
+			sgc = keys[i - n_preordered].sgc;
+		}
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+
+		i++;
+	}
+
+	pfree(keys);
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses, *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1e7809edf2..f0e23d3354 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6184,7 +6184,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			bool		is_sorted;
 			List	   *group_pathkeys = root->group_pathkeys,
 					   *group_clauses = parse->groupClause;
-			int			n_preordered_groups;
+			int			n_preordered_groups = 0;
 
 			if (parse->groupingSets)
 			{
@@ -6208,11 +6208,20 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
 				if (!is_sorted)
+				{
+					if (!parse->groupingSets)
+						get_cheapest_group_keys_order(root,
+													  path->rows,
+													  extra->targetList,
+													  &group_pathkeys,
+													  &group_clauses,
+													  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6286,6 +6295,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
@@ -6560,11 +6575,19 @@ create_partial_grouping_paths(PlannerInfo *root,
 			{
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_path(partially_grouped_rel, (Path *)
@@ -6611,11 +6634,19 @@ create_partial_grouping_paths(PlannerInfo *root,
 
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index fa3c8a7905..4175d252c0 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1822,7 +1822,35 @@ static struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
-
+/************************<DEBUG OPT GROUP BY>*********************************/
+	{
+		{"debug_group_by_reorder_by_pathkeys", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reorder GROUP BY by pathkeys"),
+			NULL
+		},
+		&debug_group_by_reorder_by_pathkeys,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_group_by_match_order_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable matching GROUP BY by ORDER BY."),
+			NULL
+		},
+		&debug_group_by_match_order_by,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("find a cheapest order of columns in GROUP BY."),
+			NULL
+		},
+		&debug_cheapest_group_by,
+		true,
+		NULL, NULL, NULL
+	},
+/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 226b293622..ef91e0dae5 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -193,6 +193,17 @@ extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern int group_keys_reorder_by_pathkeys(List *pathkeys,
 										  List **group_pathkeys,
 										  List **group_clauses);
+/************************<DEBUG OPT GROUP BY>*********************************/
+extern bool debug_group_by_reorder_by_pathkeys;
+extern bool debug_group_by_match_order_by;
+extern bool debug_cheapest_group_by;
+/************************</DEBUG OPT GROUP BY>********************************/
+extern void get_cheapest_group_keys_order(PlannerInfo *root,
+										  double nrows,
+										  List *target_list,
+										  List **group_pathkeys,
+										  List **group_clauses,
+										  int	n_preordered);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index e302dfbdce..31dcf70e47 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2071,19 +2071,145 @@ SELECT
 	i/2 AS p,
 	format('%60s', i%2) AS v,
 	i/4 AS c,
-	i/8 AS d
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
 	INTO btg
 FROM
 	generate_series(1, 10000) i;
-CREATE INDEX ON btg(p, v);
 VACUUM btg;
 ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
 SET enable_hashagg=off;
 SET max_parallel_workers= 0;
 SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
 SET enable_seqscan=off;
 SET enable_bitmapscan=off;
--- GROUP BY optimization by reorder columns by index scan
+VACUUM btg;
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY p, v;
                    QUERY PLAN                   
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index b983f9c506..3915a837f0 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1140,7 +1140,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1284,7 +1284,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 054a381dad..a686a75fb0 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -244,9 +244,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c, d
+   Group Key: a, d, c, b
    ->  Sort
-         Sort Key: a, b, c, d
+         Sort Key: a, d, c, b
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -255,9 +255,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: b, c, d
+   Group Key: b, d, c
    ->  Sort
-         Sort Key: b, c, d
+         Sort Key: b, d, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -281,9 +281,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b
+   Group Key: b, a
    ->  Sort
-         Sort Key: a, b
+         Sort Key: b, a
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -292,9 +292,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c
+   Group Key: b, a, c
    ->  Sort
-         Sort Key: a, b, c
+         Sort Key: b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -303,9 +303,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c, d
+   Group Key: d, b, a, c
    ->  Sort
-         Sort Key: a, b, c, d
+         Sort Key: d, b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 7ef703f3a7..e4415c8d84 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -915,22 +915,65 @@ SELECT
 	i/2 AS p,
 	format('%60s', i%2) AS v,
 	i/4 AS c,
-	i/8 AS d
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
 	INTO btg
 FROM
 	generate_series(1, 10000) i;
-CREATE INDEX ON btg(p, v);
 
 VACUUM btg;
 ANALYZE btg;
 
+-- GROUP BY optimization by reorder columns by frequency
+
 SET enable_hashagg=off;
 SET max_parallel_workers= 0;
 SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
 SET enable_seqscan=off;
 SET enable_bitmapscan=off;
+VACUUM btg;
 
--- GROUP BY optimization by reorder columns by index scan
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY p, v;

0001-opt_group_by_index-v10.patchtext/x-patch; name=0001-opt_group_by_index-v10.patchDownload

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index b22b36ec0e..c073eb061a 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -686,7 +686,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ec66cb9c3c..4f93afdebc 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -26,6 +26,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -327,6 +328,58 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
+ * returns new lists,  original GROUP BY lists stay untouched.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List		*new_group_pathkeys= NIL,
+				*new_group_clauses = NIL;
+	ListCell	*key;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * For each pathkey it tries to find corresponding GROUP BY pathkey and
+	 * clause.
+	 */
+	foreach(key, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(key);
+		SortGroupClause	*sgc;
+
+		/*
+		 * Pathkey should use the same allocated struct, so, equiality of
+		 * pointers is enough
+		 */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	n = list_length(new_group_pathkeys);
+
+	/*
+	 * Just append the rest of pathkeys and clauses
+	 */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+												 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
@@ -1611,6 +1664,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return 0;					/* path ordering not useful */
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered, so we don't bother about actual order in
+ * pathkeys
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	if (root->group_pathkeys == NIL)
+		return 0;				/* no special ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1625,6 +1711,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1660,6 +1749,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */ 
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 67a2c7a581..1e7809edf2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6182,9 +6182,28 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			if (parse->groupingSets)
+			{
+				/*
+				 * prevent group pathkey rreordering, just check the same
+				 * order paths pathkeys and group pathkeys
+				 */
+				is_sorted = pathkeys_contained_in(group_pathkeys,
+												  path->pathkeys);
+			}
+			else
+			{
+				n_preordered_groups =
+						group_keys_reorder_by_pathkeys(path->pathkeys,
+													   &group_pathkeys,
+													   &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
+			}
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_path || is_sorted)
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
@@ -6192,7 +6211,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				/* Now decide what to stick atop it */
@@ -6213,9 +6232,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6230,7 +6249,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6251,19 +6270,26 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
+				List	   *group_pathkeys = root->group_pathkeys,
+						   *group_clauses = parse->groupClause;
+				int			n_preordered_groups;
+
+				n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																	 &group_pathkeys,
+																	 &group_clauses);
 
 				/*
 				 * Insert a Sort node, if required.  But there's no point in
 				 * sorting anything but the cheapest path.
 				 */
-				if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+				if (n_preordered_groups != list_length(group_pathkeys))
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 				}
 
@@ -6273,9 +6299,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6284,7 +6310,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 			}
@@ -6521,9 +6547,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_total_path || is_sorted)
 			{
 				/* Sort the cheapest partial path, if it isn't already */
@@ -6531,7 +6563,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -6540,9 +6572,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 											 partially_grouped_rel,
 											 path,
 											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 NIL,
 											 agg_partial_costs,
 											 dNumPartialGroups));
@@ -6551,7 +6583,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 							 create_group_path(root,
 											   partially_grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   NIL,
 											   dNumPartialGroups));
 			}
@@ -6565,17 +6597,24 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_partial_path || is_sorted)
 			{
+
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -6584,9 +6623,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 group_clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 group_clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6595,7 +6634,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   group_clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index cafde307ad..226b293622 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -190,6 +190,9 @@ typedef enum
 
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f85e913850..e302dfbdce 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2065,3 +2065,107 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+CREATE INDEX ON btg(p, v);
+VACUUM btg;
+ANALYZE btg;
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+-- GROUP BY optimization by reorder columns by index scan
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 506d0442d7..7ef703f3a7 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -907,3 +907,57 @@ EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1;
 SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
+
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+CREATE INDEX ON btg(p, v);
+
+VACUUM btg;
+ANALYZE btg;
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+
+-- GROUP BY optimization by reorder columns by index scan
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+

#23

Teodor Sigaev

teodor@sigaev.ru

over 7 years ago

In reply to: Tomas Vondra (#21)

Re: POC: GROUP BY optimization

I.e. we already do reorder the group clauses to match ORDER BY, to only
require a single sort. This happens in preprocess_groupclause(), which
also explains the reasoning behind that.

Huh. I missed that. That means group_keys_reorder_by_pathkeys() call inside
get_cheapest_group_keys_order() duplicates work of preprocess_groupclause().
And so it's possible to replace it to simple counting number of the same
pathkeys in beginning of lists. Or remove most part of preprocess_groupclause()
- but it seems to me we should use first option, preprocess_groupclause() is
simpler, it doesn't operate with pathkeys only with SortGroupClause which is
simpler.

BTW, incremental sort path provides pathkeys_common(), exactly what we need.

I wonder if some of the new code reordering group pathkeys could/should
be moved here (not sure, maybe it's too early for those decisions). In
any case, it might be appropriate to update some of the comments before
preprocess_groupclause() which claim we don't do certain things added by
the proposed patches.

preprocess_groupclause() is too early step to use something like
group_keys_reorder_by_pathkeys() because pathkeys is not known here yet.

This probably also somewhat refutes my claim that the order of grouping
keys is currently fully determined by users (and so they may pick the
most efficient order), while the reorder-by-ndistinct patch would make
that impossible. Apparently when there's ORDER BY, we already mess with
the order of group clauses - there are ways to get around it (subquery
with OFFSET 0) but it's much less clear.

I like a moment when objections go away :)

--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/

#24

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 7 years ago

In reply to: Teodor Sigaev (#23)

Re: POC: GROUP BY optimization

On 06/13/2018 06:56 PM, Teodor Sigaev wrote:

I.e. we already do reorder the group clauses to match ORDER BY, to only
require a single sort. This happens in preprocess_groupclause(), which
also explains the reasoning behind that.

Huh. I missed that. That means group_keys_reorder_by_pathkeys() call
inside get_cheapest_group_keys_order() duplicates work of
preprocess_groupclause().
О©╫And so it's possible to replace it to simple counting number of the
same pathkeys in beginning of lists. Or remove most part of
preprocess_groupclause() - but it seems to me we should use first
option, preprocess_groupclause() is simpler, it doesn't operate with
pathkeys only withО©╫ SortGroupClause which is simpler.

BTW, incremental sort path provides pathkeys_common(), exactly what we
need.

I wonder if some of the new code reordering group pathkeys could/should
be moved here (not sure, maybe it's too early for those decisions). In
any case, it might be appropriate to update some of the comments before
preprocess_groupclause() which claim we don't do certain things added by
the proposed patches.

preprocess_groupclause() is too early step to use something like
group_keys_reorder_by_pathkeys() because pathkeys is not known here yet.

This probably also somewhat refutes my claim that the order of
grouping keys is currently fully determined by users (and so they
may pick the most efficient order), while the reorder-by-ndistinct
patch would make that impossible. Apparently when there's ORDER BY,
we already mess with the order of group clauses - there are ways to
get around it (subquery with OFFSET 0) but it's much less clear.

I like a moment when objections go away :)

I'm afraid that's a misunderstanding - my objections did not really go
away. I'm just saying my claim that we're not messing with order of
grouping keys is not entirely accurate, because we do that in one
particular case.

I still think we need to be careful when introducing new optimizations
in this area - reordering the grouping keys by ndistinct, ORDER BY or
whatever. In particular I don't think we should commit these patches
that may quite easily cause regressions, and then hope some hypothetical
future patch fixes the costing. Those objections still stand.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#25

Teodor Sigaev

teodor@sigaev.ru

over 7 years ago

In reply to: Tomas Vondra (#24)

Re: POC: GROUP BY optimization

I'm afraid that's a misunderstanding - my objections did not really go
away. I'm just saying my claim that we're not messing with order of
grouping keys is not entirely accurate, because we do that in one
particular case.

I still think we need to be careful when introducing new optimizations
in this area - reordering the grouping keys by ndistinct, ORDER BY or
whatever. In particular I don't think we should commit these patches
that may quite easily cause regressions, and then hope some hypothetical
future patch fixes the costing. Those objections still stand.

Pls, have a look at https://commitfest.postgresql.org/18/1706/
I tried to attack the cost_sort() issues and hope on that basis we can solve
problems with 0002 patch and improve incremental sort patch.

--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/

#26

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 7 years ago

In reply to: Teodor Sigaev (#25)

Re: POC: GROUP BY optimization

On 06/28/2018 06:52 PM, Teodor Sigaev wrote:

I'm afraid that's a misunderstanding - my objections did not really go
away. I'm just saying my claim that we're not messing with order of
grouping keys is not entirely accurate, because we do that in one
particular case.

I still think we need to be careful when introducing new optimizations
in this area - reordering the grouping keys by ndistinct, ORDER BY or
whatever. In particular I don't think we should commit these patches
that may quite easily cause regressions, and then hope some hypothetical
future patch fixes the costing. Those objections still stand.

Pls, have a look at https://commitfest.postgresql.org/18/1706/
I tried to attack the cost_sort() issues and hope on that basis we can
solve problems with 0002 patch and improve incremental sort patch.

OK, will do. Thanks for working on this!

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#27

Teodor Sigaev

teodor@sigaev.ru

over 7 years ago

In reply to: Tomas Vondra (#26)

Re: POC: GROUP BY optimization

I tried to attack the cost_sort() issues and hope on that basis we can solve
problems with 0002 patch and improve incremental sort patch.

OK, will do. Thanks for working on this!

I hope, now we have a better cost_sort(). The obvious way is a try all
combination of pathkeys in get_cheapest_group_keys_order() and choose cheapest
one by cost_sort(). But it requires N! operations and potentially could be very
expensive in case of large number of pathkeys and doesn't solve the issue with
user-knows-what-he-does pathkeys. We could suggest an order of pathkeys as patch
suggests now and if cost_sort() estimates cost is less than 80% (arbitrary
chosen) cost of user-suggested pathkeys then it use our else user pathkeys.

--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/

#28

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 7 years ago

In reply to: Teodor Sigaev (#27)

Re: POC: GROUP BY optimization

On 06/29/2018 04:51 PM, Teodor Sigaev wrote:

I tried to attack the cost_sort() issues and hope on that basis we
can solve problems with 0002 patch and improve incremental sort patch.

OK, will do. Thanks for working on this!

I hope, now we have a better cost_sort(). The obvious way is a try all
combination of pathkeys in get_cheapest_group_keys_order() and choose
cheapest one by cost_sort().

But it requires N! operations and potentially could be very
expensive in case of large number of pathkeys and doesn't solve the
issue with user-knows-what-he-does pathkeys.

Not sure. There are N! combinations, but this seems like a good
candidate for backtracking [1]https://en.wikipedia.org/wiki/Backtracking. You don't have to enumerate and evaluate
all N! combinations, just construct one and then abandon whole classes
of combinations as soon as they get more expensive than the currently
best one. That's thanks to additive nature of the comparison costing,
because appending a column to the sort key can only make it more
expensive. My guess is this will make this a non-issue.

[1]: https://en.wikipedia.org/wiki/Backtracking

We could suggest an order of pathkeys as patch suggests now and if
cost_sort() estimates cost is less than 80% (arbitrary chosen) cost
of user-suggested pathkeys then it use our else user pathkeys.

I really despise such arbitrary thresholds. I'd much rather use a more
reliable heuristics by default, even if it gets it wrong in some cases
(which it will, but that's natural).

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#29

Gavin Flower

GavinFlower@archidevsys.co.nz

over 7 years ago

In reply to: Tomas Vondra (#28)

Re: POC: GROUP BY optimization

On 30/06/18 03:03, Tomas Vondra wrote:

On 06/29/2018 04:51 PM, Teodor Sigaev wrote:

I tried to attack the cost_sort() issues and hope on that basis we
can solve problems with 0002 patch and improve incremental sort patch.

OK, will do. Thanks for working on this!

I hope, now we have a better cost_sort(). The obvious way is a try
all combination of pathkeys in get_cheapest_group_keys_order() and
choose cheapest one by cost_sort().

But it requires N! operations and potentially could be very
expensive in case of large number of pathkeys and doesn't solve the
issue with user-knows-what-he-does pathkeys.

Not sure. There are N! combinations, but this seems like a good
candidate for backtracking [1]. You don't have to enumerate and
evaluate all N! combinations, just construct one and then abandon
whole classes of combinations as soon as they get more expensive than
the currently best one. That's thanks to additive nature of the
comparison costing, because appending a column to the sort key can
only make it more expensive. My guess is this will make this a non-issue.

[1] https://en.wikipedia.org/wiki/Backtracking

We could suggest an order of pathkeys as patch suggests now and if
cost_sort() estimates cost is less than 80% (arbitrary chosen) cost
of user-suggested pathkeys then it use our else user pathkeys.

I really despise such arbitrary thresholds. I'd much rather use a more
reliable heuristics by default, even if it gets it wrong in some cases
(which it will, but that's natural).

regards

Additionally put an upper limit threshold on the number of combinations
to check, fairly large by default?

If first threshold is exceeded, could consider checking out a few more
selected at random from paths not yet checked, to avoid any bias caused
by stopping a systematic search.О©╫ This might prove important when N! is
fairly large.

Cheers,
Gavin

#30

Michael Paquier

michael@paquier.xyz

over 7 years ago

In reply to: Gavin Flower (#29)

Re: POC: GROUP BY optimization

On Sat, Jun 30, 2018 at 09:19:13AM +1200, Gavin Flower wrote:

Additionally put an upper limit threshold on the number of combinations to
check, fairly large by default?

If first threshold is exceeded, could consider checking out a few more
selected at random from paths not yet checked, to avoid any bias caused by
stopping a systematic search. This might prove important when N! is fairly
large.

Please note that the latest patch available does not apply, so this has
been moved to next CF 2018-11, waiting for input from its author.
--
Michael

#31

Dmitry Dolgov

9erthalion6@gmail.com

about 7 years ago

In reply to: Michael Paquier (#30)

Re: POC: GROUP BY optimization

On Tue, Oct 2, 2018 at 4:16 AM Michael Paquier <michael@paquier.xyz> wrote:

On Sat, Jun 30, 2018 at 09:19:13AM +1200, Gavin Flower wrote:

Additionally put an upper limit threshold on the number of combinations to
check, fairly large by default?

If first threshold is exceeded, could consider checking out a few more
selected at random from paths not yet checked, to avoid any bias caused by
stopping a systematic search. This might prove important when N! is fairly
large.

Please note that the latest patch available does not apply, so this has
been moved to next CF 2018-11, waiting for input from its author.

Unfortunately, patch still has some conflicts, could you please rebase it?

#32

Andres Freund

andres@anarazel.de

almost 7 years ago

In reply to: Dmitry Dolgov (#31)

Re: POC: GROUP BY optimization

On 2018-11-29 17:56:53 +0100, Dmitry Dolgov wrote:

On Tue, Oct 2, 2018 at 4:16 AM Michael Paquier <michael@paquier.xyz> wrote:

On Sat, Jun 30, 2018 at 09:19:13AM +1200, Gavin Flower wrote:

Additionally put an upper limit threshold on the number of combinations to
check, fairly large by default?

If first threshold is exceeded, could consider checking out a few more
selected at random from paths not yet checked, to avoid any bias caused by
stopping a systematic search. This might prove important when N! is fairly
large.

Please note that the latest patch available does not apply, so this has
been moved to next CF 2018-11, waiting for input from its author.

Unfortunately, patch still has some conflicts, could you please rebase it?

As nothing has happened since, I'm marking this as returned with
feedback.

Greetings,

Andres Freund

#33

Dmitry Dolgov

9erthalion6@gmail.com

almost 7 years ago

In reply to: Andres Freund (#32)

3 attachment(s)

Re: POC: GROUP BY optimization

On Thu, Jan 31, 2019 at 12:24 PM Andres Freund <andres@anarazel.de> wrote:

As nothing has happened since, I'm marking this as returned with
feedback.

This patch was on my radar for some time in the past and we've seen use cases
where it could be pretty useful (probably even without the incremental sort
patch). I would like to make some progress here and see if it's possible to
continue it's development. I've attached the rebased version with a small
changes, e.g. I've created a separate patch with group by reordering tests to
make it easy to see what changes were introduced, and after some experiments
removed part that seems to duplicate "group by" reordering to follow "order
by". Also looks like it's possible to make these patches independent by having
a base patch with the isolated group_keys_reorder_by_pathkeys (they're
connected via n_preordered), but I haven't done this yet.

I went through the thread to summarize the objections, that were mentioned so
far. Most of them are related to the third patch in the series, where
reordering based on "ndistincs" is implemented, and are about cost_sort (all
the possible problems that could happen without proper cost estimation due to
non uniform distribution, different comparison costs and so on) and figuring
out how to limit number of possible combinations of pathkeys to compare. I
haven't looked at the proposed backtracking approach, but taking into account
that suggested patch for cost_sort [1]https://commitfest.postgresql.org/21/1706/ is RWF, I wonder what would be the best
strategy to proceed?

[1]: https://commitfest.postgresql.org/21/1706/

Attachments:

v11-0001-Add-tests-for-group-by-optimization.patchapplication/octet-stream; name=v11-0001-Add-tests-for-group-by-optimization.patchDownload

From f56be34b7405728b2e5f0b7f411c29c3eb002db0 Mon Sep 17 00:00:00 2001
From: erthalion <9erthalion6@gmail.com>
Date: Tue, 2 Apr 2019 17:49:18 +0200
Subject: [PATCH v11 1/3] Add tests for group by optimization

---
 src/test/regress/expected/aggregates.out | 232 +++++++++++++++++++++++++++++++
 src/test/regress/sql/aggregates.sql      |  96 +++++++++++++
 2 files changed, 328 insertions(+)

diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 129c1e5075..a20c69469f 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2195,6 +2195,238 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p
+   ->  Sort
+         Sort Key: v, p
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                      QUERY PLAN                      
+------------------------------------------------------
+ GroupAggregate
+   Group Key: v, p
+   ->  Sort
+         Sort Key: v, p
+         ->  Index Only Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, c, p, d
+   ->  Sort
+         Sort Key: v, c, p, d
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Seq Scan on btg
+(5 rows)
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- test coverage for aggregate combine/serial/deserial functions
 BEGIN ISOLATION LEVEL REPEATABLE READ;
 SET parallel_setup_cost = 0;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index d4fd657188..ad73254f88 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -952,6 +952,102 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
 -- test coverage for aggregate combine/serial/deserial functions
 BEGIN ISOLATION LEVEL REPEATABLE READ;
 
-- 
2.16.4

v11-0002-Reorder-to-match-ORDER-BY-or-index.patchapplication/octet-stream; name=v11-0002-Reorder-to-match-ORDER-BY-or-index.patchDownload

From 60820aeaf67d2e32319dab94796164431ac23ad2 Mon Sep 17 00:00:00 2001
From: erthalion <9erthalion6@gmail.com>
Date: Fri, 29 Mar 2019 15:19:48 +0100
Subject: [PATCH v11 2/3] Reorder to match ORDER BY or index

---
 src/backend/optimizer/path/equivclass.c  | 13 ++++-
 src/backend/optimizer/path/pathkeys.c    | 91 ++++++++++++++++++++++++++++++++
 src/backend/optimizer/plan/planner.c     | 85 +++++++++++++++++++++--------
 src/include/optimizer/paths.h            |  3 ++
 src/test/regress/expected/aggregates.out | 44 ++++++++-------
 5 files changed, 189 insertions(+), 47 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 61b5b119b0..7ffee2c165 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -686,7 +686,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 56d839bb31..350b14b8c7 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -25,6 +25,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -326,6 +327,58 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
+ * returns new lists,  original GROUP BY lists stay untouched.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List		*new_group_pathkeys= NIL,
+				*new_group_clauses = NIL;
+	ListCell	*key;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * For each pathkey it tries to find corresponding GROUP BY pathkey and
+	 * clause.
+	 */
+	foreach(key, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(key);
+		SortGroupClause	*sgc;
+
+		/*
+		 * Pathkey should use the same allocated struct, so, equiality of
+		 * pointers is enough
+		 */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	n = list_length(new_group_pathkeys);
+
+	/*
+	 * Just append the rest of pathkeys and clauses
+	 */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+												 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
@@ -1610,6 +1663,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return 0;					/* path ordering not useful */
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered, so we don't bother about actual order in
+ * pathkeys
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	if (root->group_pathkeys == NIL)
+		return 0;				/* no special ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1624,6 +1710,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1659,6 +1748,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */ 
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index c430189572..4a27b0dc47 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6382,9 +6382,28 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			if (parse->groupingSets)
+			{
+				/*
+				 * prevent group pathkey rreordering, just check the same
+				 * order paths pathkeys and group pathkeys
+				 */
+				is_sorted = pathkeys_contained_in(group_pathkeys,
+												  path->pathkeys);
+			}
+			else
+			{
+				n_preordered_groups =
+						group_keys_reorder_by_pathkeys(path->pathkeys,
+													   &group_pathkeys,
+													   &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
+			}
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_path || is_sorted)
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
@@ -6392,7 +6411,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				/* Now decide what to stick atop it */
@@ -6413,9 +6432,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6430,7 +6449,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6451,19 +6470,26 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
+				List	   *group_pathkeys = root->group_pathkeys,
+						   *group_clauses = parse->groupClause;
+				int			n_preordered_groups;
+
+				n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																	 &group_pathkeys,
+																	 &group_clauses);
 
 				/*
 				 * Insert a Sort node, if required.  But there's no point in
 				 * sorting anything but the cheapest path.
 				 */
-				if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+				if (n_preordered_groups != list_length(group_pathkeys))
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 				}
 
@@ -6473,9 +6499,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6484,7 +6510,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 			}
@@ -6721,9 +6747,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_total_path || is_sorted)
 			{
 				/* Sort the cheapest partial path, if it isn't already */
@@ -6731,7 +6763,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -6740,9 +6772,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 											 partially_grouped_rel,
 											 path,
 											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 NIL,
 											 agg_partial_costs,
 											 dNumPartialGroups));
@@ -6751,7 +6783,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 							 create_group_path(root,
 											   partially_grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   NIL,
 											   dNumPartialGroups));
 			}
@@ -6765,17 +6797,24 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_partial_path || is_sorted)
 			{
+
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -6784,9 +6823,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 group_clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 group_clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6795,7 +6834,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   group_clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 36d12bc376..de0d076a1a 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -183,6 +183,9 @@ typedef enum
 
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index a20c69469f..265c996d5e 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2360,14 +2360,12 @@ SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
 
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY v, p;
-                      QUERY PLAN                      
-------------------------------------------------------
+                   QUERY PLAN                   
+------------------------------------------------
  GroupAggregate
-   Group Key: v, p
-   ->  Sort
-         Sort Key: v, p
-         ->  Index Only Scan using btg_p_v_idx on btg
-(5 rows)
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
 
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
@@ -2380,46 +2378,46 @@ SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
 
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY v, p, c;
-         QUERY PLAN          
------------------------------
+                   QUERY PLAN                    
+-------------------------------------------------
  GroupAggregate
-   Group Key: v, p, c
+   Group Key: p, v, c
    ->  Sort
-         Sort Key: v, p, c
-         ->  Seq Scan on btg
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
 (5 rows)
 
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
-         QUERY PLAN          
------------------------------
+                   QUERY PLAN                    
+-------------------------------------------------
  GroupAggregate
    Group Key: p, v, c
    ->  Sort
          Sort Key: p, v, c
-         ->  Seq Scan on btg
+         ->  Index Scan using btg_p_v_idx on btg
 (5 rows)
 
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY v, c, p, d;
-          QUERY PLAN          
-------------------------------
+                   QUERY PLAN                    
+-------------------------------------------------
  GroupAggregate
-   Group Key: v, c, p, d
+   Group Key: p, v, c, d
    ->  Sort
-         Sort Key: v, c, p, d
-         ->  Seq Scan on btg
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
 (5 rows)
 
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
-          QUERY PLAN          
-------------------------------
+                   QUERY PLAN                    
+-------------------------------------------------
  GroupAggregate
    Group Key: p, v, c, d
    ->  Sort
          Sort Key: p, v, c, d
-         ->  Seq Scan on btg
+         ->  Index Scan using btg_p_v_idx on btg
 (5 rows)
 
 RESET enable_hashagg;
-- 
2.16.4

v11-0003-Reorder-by-values-distribution.patchapplication/octet-stream; name=v11-0003-Reorder-by-values-distribution.patchDownload

From a591924bcd0eabb4559be07c6f5a2eb3930f222f Mon Sep 17 00:00:00 2001
From: erthalion <9erthalion6@gmail.com>
Date: Fri, 29 Mar 2019 15:20:22 +0100
Subject: [PATCH v11 3/3] Reorder by values distribution

---
 src/backend/optimizer/path/pathkeys.c    | 145 +++++++++++++++++++++++++++++++
 src/backend/optimizer/plan/planner.c     |  33 ++++++-
 src/backend/utils/misc/guc.c             |  21 ++++-
 src/include/optimizer/paths.h            |  10 +++
 src/test/regress/expected/aggregates.out |  16 ++--
 src/test/regress/expected/stats_ext.out  |  20 ++---
 6 files changed, 225 insertions(+), 20 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 350b14b8c7..cee55a3feb 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -327,6 +327,11 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/************************<DEBUG PART>*************************************/
+bool debug_group_by_reorder_by_pathkeys = true;
+bool debug_cheapest_group_by = true;
+/************************</DEBUG PART>************************************/
+
 /*
  * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
  * returns new lists,  original GROUP BY lists stay untouched.
@@ -340,6 +345,9 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	ListCell	*key;
 	int			n;
 
+	if (debug_group_by_reorder_by_pathkeys == false)
+		return 0;
+
 	if (pathkeys == NIL || *group_pathkeys == NIL)
 		return 0;
 
@@ -379,6 +387,143 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	return n;
 }
 
+/*
+ * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
+ * speedup sorting. Returns newly allocated lists, old ones stay untouched.
+ * n_preordered defines a head of list which order should be prevented.
+ */
+void
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List *target_list,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	struct
+	{
+		PathKey			*pathkey;
+		SortGroupClause	*sgc;
+		Node			*pathkeyExpr;
+	}
+				   *keys, tmp;
+	int				nkeys = list_length(*group_pathkeys) - n_preordered;
+	List		   *pathkeyExprList = NIL,
+				   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL;
+	ListCell	   *cell;
+	int				i = 0, n_keys_to_est;
+
+	if (!debug_cheapest_group_by)
+		return;
+
+	if (nkeys < 2)
+		return; /* nothing to do */
+
+	/*
+	 * Nothing to do here, since reordering of group clauses to match ORDER BY
+	 * already performed in preprocess_groupclause
+	 */
+	if (n_preordered == 0 && root->sort_pathkeys)
+		return;
+
+	keys = palloc(nkeys * sizeof(*keys));
+
+	/*
+	 * Collect information about pathkey for subsequent usage
+	 */
+	for_each_cell(cell, list_nth_cell(*group_pathkeys, n_preordered))
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		keys[i].pathkey = pathkey;
+		keys[i].sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+											  *group_clauses);
+		keys[i].pathkeyExpr = get_sortgroupclause_expr(keys[i].sgc,
+													   target_list);
+		i++;
+	}
+
+	/*
+	 * Find the cheapest to sort order of columns. We will find a first column
+	 * with bigger number of group, then pair (first column in pair is  already
+	 * defined in first step), them triple and so on.
+	 */
+	for(n_keys_to_est = 1; n_keys_to_est <= nkeys - 1; n_keys_to_est++)
+	{
+		ListCell   *tail_cell;
+		int			best_i = 0;
+		double		best_est_num_groups = -1;
+
+		/* expand list of columns and remeber last cell */
+		pathkeyExprList = lappend(pathkeyExprList, NULL);
+		tail_cell = list_tail(pathkeyExprList);
+
+		/*
+		 * Find the best last column - the best means bigger number of groups,
+		 * previous columns are already choosen
+		 */
+		for(i = n_keys_to_est - 1; i < nkeys; i++)
+		{
+			double  est_num_groups;
+
+			lfirst(tail_cell) = keys[i].pathkeyExpr;
+			est_num_groups = estimate_num_groups(root, pathkeyExprList,
+												 nrows, NULL);
+
+			if (est_num_groups > best_est_num_groups)
+			{
+				best_est_num_groups = est_num_groups;
+				best_i = i;
+			}
+		}
+
+		/* Save the best choice */
+		lfirst(tail_cell) = keys[best_i].pathkeyExpr;
+		if (best_i != n_keys_to_est - 1)
+		{
+			tmp = keys[n_keys_to_est - 1];
+			keys[n_keys_to_est - 1] = keys[best_i];
+			keys[best_i] = tmp;
+		}
+	}
+	list_free(pathkeyExprList);
+
+	/*
+	 * Construct result lists, keys array is already ordered to get a cheapest
+	 * sort
+	 */
+	i = 0;
+	foreach(cell, *group_pathkeys)
+	{
+		PathKey	   *pathkey;
+		SortGroupClause *sgc;
+
+		if (i < n_preordered)
+		{
+			pathkey = (PathKey *) lfirst(cell);
+			sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+										  *group_clauses);
+		}
+		else
+		{
+			pathkey = keys[i - n_preordered].pathkey;
+			sgc = keys[i - n_preordered].sgc;
+		}
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+
+		i++;
+	}
+
+	pfree(keys);
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses, *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 4a27b0dc47..dafa217cb3 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6384,7 +6384,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			bool		is_sorted;
 			List	   *group_pathkeys = root->group_pathkeys,
 					   *group_clauses = parse->groupClause;
-			int			n_preordered_groups;
+			int			n_preordered_groups = 0;
 
 			if (parse->groupingSets)
 			{
@@ -6408,11 +6408,20 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
 				if (!is_sorted)
+				{
+					if (!parse->groupingSets)
+						get_cheapest_group_keys_order(root,
+													  path->rows,
+													  extra->targetList,
+													  &group_pathkeys,
+													  &group_clauses,
+													  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6486,6 +6495,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
@@ -6760,11 +6775,19 @@ create_partial_grouping_paths(PlannerInfo *root,
 			{
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_path(partially_grouped_rel, (Path *)
@@ -6811,11 +6834,19 @@ create_partial_grouping_paths(PlannerInfo *root,
 
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 1766e46037..30e95e8cbb 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1949,7 +1949,26 @@ static struct config_bool ConfigureNamesBool[] =
 		false,
 		NULL, NULL, NULL
 	},
-
+/************************<DEBUG OPT GROUP BY>*********************************/
+	{
+		{"debug_group_by_reorder_by_pathkeys", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reorder GROUP BY by pathkeys"),
+			NULL
+		},
+		&debug_group_by_reorder_by_pathkeys,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("find a cheapest order of columns in GROUP BY."),
+			NULL
+		},
+		&debug_cheapest_group_by,
+		true,
+		NULL, NULL, NULL
+	},
+/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index de0d076a1a..dd7ad7f330 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -186,6 +186,16 @@ extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern int group_keys_reorder_by_pathkeys(List *pathkeys,
 										  List **group_pathkeys,
 										  List **group_clauses);
+/************************<DEBUG OPT GROUP BY>*********************************/
+extern bool debug_group_by_reorder_by_pathkeys;
+extern bool debug_cheapest_group_by;
+/************************</DEBUG OPT GROUP BY>********************************/
+extern void get_cheapest_group_keys_order(PlannerInfo *root,
+										  double nrows,
+										  List *target_list,
+										  List **group_pathkeys,
+										  List **group_clauses,
+										  int	n_preordered);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 265c996d5e..b285b6921e 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2228,9 +2228,9 @@ SELECT count(*) FROM btg GROUP BY v, p;
          QUERY PLAN          
 -----------------------------
  GroupAggregate
-   Group Key: v, p
+   Group Key: p, v
    ->  Sort
-         Sort Key: v, p
+         Sort Key: p, v
          ->  Seq Scan on btg
 (5 rows)
 
@@ -2239,9 +2239,9 @@ SELECT count(*) FROM btg GROUP BY v, p, c;
          QUERY PLAN          
 -----------------------------
  GroupAggregate
-   Group Key: v, p, c
+   Group Key: p, v, c
    ->  Sort
-         Sort Key: v, p, c
+         Sort Key: p, v, c
          ->  Seq Scan on btg
 (5 rows)
 
@@ -2261,9 +2261,9 @@ SELECT count(*) FROM btg GROUP BY v, p, d, c;
           QUERY PLAN          
 ------------------------------
  GroupAggregate
-   Group Key: v, p, d, c
+   Group Key: p, v, d, c
    ->  Sort
-         Sort Key: v, p, d, c
+         Sort Key: p, v, d, c
          ->  Seq Scan on btg
 (5 rows)
 
@@ -2318,9 +2318,9 @@ SELECT count(*) FROM btg GROUP BY p, d, e;
          QUERY PLAN          
 -----------------------------
  GroupAggregate
-   Group Key: p, d, e
+   Group Key: p, e, d
    ->  Sort
-         Sort Key: p, d, e
+         Sort Key: p, e, d
          ->  Seq Scan on btg
 (5 rows)
 
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index b32663459d..c59a0ddda5 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -249,9 +249,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c, d
+   Group Key: a, d, c, b
    ->  Sort
-         Sort Key: a, b, c, d
+         Sort Key: a, d, c, b
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -260,9 +260,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: b, c, d
+   Group Key: b, d, c
    ->  Sort
-         Sort Key: b, c, d
+         Sort Key: b, d, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -286,9 +286,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b
+   Group Key: b, a
    ->  Sort
-         Sort Key: a, b
+         Sort Key: b, a
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -297,9 +297,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c
+   Group Key: b, a, c
    ->  Sort
-         Sort Key: a, b, c
+         Sort Key: b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
@@ -308,9 +308,9 @@ EXPLAIN (COSTS off)
             QUERY PLAN             
 -----------------------------------
  GroupAggregate
-   Group Key: a, b, c, d
+   Group Key: d, b, a, c
    ->  Sort
-         Sort Key: a, b, c, d
+         Sort Key: d, b, a, c
          ->  Seq Scan on ndistinct
 (5 rows)
 
-- 
2.16.4

#34

Tomas Vondra

tomas.vondra@2ndquadrant.com

almost 7 years ago

In reply to: Dmitry Dolgov (#33)

Re: POC: GROUP BY optimization

On Thu, Apr 04, 2019 at 05:11:09PM +0200, Dmitry Dolgov wrote:

On Thu, Jan 31, 2019 at 12:24 PM Andres Freund <andres@anarazel.de> wrote:

As nothing has happened since, I'm marking this as returned with
feedback.

This patch was on my radar for some time in the past and we've seen use cases
where it could be pretty useful (probably even without the incremental sort
patch). I would like to make some progress here and see if it's possible to
continue it's development. I've attached the rebased version with a small
changes, e.g. I've created a separate patch with group by reordering tests to
make it easy to see what changes were introduced, and after some experiments
removed part that seems to duplicate "group by" reordering to follow "order
by". Also looks like it's possible to make these patches independent by having
a base patch with the isolated group_keys_reorder_by_pathkeys (they're
connected via n_preordered), but I haven't done this yet.

I went through the thread to summarize the objections, that were mentioned so
far. Most of them are related to the third patch in the series, where
reordering based on "ndistincs" is implemented, and are about cost_sort (all
the possible problems that could happen without proper cost estimation due to
non uniform distribution, different comparison costs and so on) and figuring
out how to limit number of possible combinations of pathkeys to compare. I
haven't looked at the proposed backtracking approach, but taking into account
that suggested patch for cost_sort [1] is RWF, I wonder what would be the best
strategy to proceed?

[1]: https://commitfest.postgresql.org/21/1706/

Dunno. It seems the progres on the sort-related patches was rather limited
in the PG12 cycle in general :-( There's the Incremental Sort patch, GROUP
BY optimization and then the cost_sort patch.

Not sure about the best strategy, though. One obvious option is to rely on
cost_sort patch to do all the improvements needed for the other patches,
but that assumes that patch moves reasonably fast.

So I personally would suggest to treat those patches as independent until
the very last moment, develop the costing improvements needed by each
of them, and then decide which of them are committable / in what order.

At the end of PG11 cycle I've offered my help with testing / reviewing
those patches, if there is progress. That still holds, if there are new
patch versions I'll look at them.

cheers

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#35

Dmitry Dolgov

9erthalion6@gmail.com

over 6 years ago

In reply to: Tomas Vondra (#34)

3 attachment(s)

Re: POC: GROUP BY optimization

On Tue, Apr 9, 2019 at 5:21 PM Tomas Vondra <tomas.vondra@2ndquadrant.com> wrote:

So I personally would suggest to treat those patches as independent until
the very last moment, develop the costing improvements needed by each
of them, and then decide which of them are committable / in what order.

I had the same idea, but judging from the questions, raised in this thread,
it's quite hard to go with reordering based only on frequency of values. I
hoped that the cost_sort improvement patch would be simple enough to
incorporate it here, but of course it wasn't. Having an assumption, that the
amount of work, required for performing sorting, depends only on the number of
distinct groups and how costly it is to compare a values of this data type,
I've ended up extracting get_width_multiplier and get_func_cost parts from
cost_sort patch and including them into 0003-Reorder-by-values-distribution.
This allows to take into account situations when we compare e.g. long strings
or a custom data type with high procost for comparison (but I've used this
values directly without any adjusting coefficients yet).

On Wed, Jun 13, 2018 at 6:41 PM Teodor Sigaev <teodor@sigaev.ru> wrote:

So that's a nice improvement, although I think we should also consider
non-uniform distributions (using the per-column MCV lists).

Could you clarify how to do that?

Since I'm not familiar with this topic, I would like to ask the same question,
how to do that and what are the advantages?

On Sat, Jun 16, 2018 at 5:59 PM Tomas Vondra <tomas.vondra@2ndquadrant.com> wrote:

I still think we need to be careful when introducing new optimizations
in this area - reordering the grouping keys by ndistinct, ORDER BY or
whatever. In particular I don't think we should commit these patches
that may quite easily cause regressions, and then hope some hypothetical
future patch fixes the costing.

I'm a bit concerned about this part of the discussion. There is an idea through
the whole thread about avoiding the situation, when a user knows which order is
better and we generate different one by mistake. From what I see right now even
if all the objections would be addressed, there is a chance that some
coefficients will be not good enough (e.g. width multiplier is based on an
average width, or it can suddenly happen that all the compared string have some
difference at the very beginning) and the chosen order will be not optimal.
Does it mean that in any case the implementation of such optimization should
provide a way to override it?

Attachments:

v12-0001-Add-tests-for-group-by-optimization.patchapplication/octet-stream; name=v12-0001-Add-tests-for-group-by-optimization.patchDownload

From 35f3227677f715b0ec76b583c109e62d262d1f42 Mon Sep 17 00:00:00 2001
From: erthalion <9erthalion6@gmail.com>
Date: Tue, 2 Apr 2019 17:49:18 +0200
Subject: [PATCH v12 1/3] Add tests for group by optimization

---
 src/test/regress/expected/aggregates.out | 232 +++++++++++++++++++++++++++++++
 src/test/regress/sql/aggregates.sql      |  96 +++++++++++++
 2 files changed, 328 insertions(+)

diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 129c1e5075..a20c69469f 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2195,6 +2195,238 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p
+   ->  Sort
+         Sort Key: v, p
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                      QUERY PLAN                      
+------------------------------------------------------
+ GroupAggregate
+   Group Key: v, p
+   ->  Sort
+         Sort Key: v, p
+         ->  Index Only Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, c, p, d
+   ->  Sort
+         Sort Key: v, c, p, d
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Seq Scan on btg
+(5 rows)
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- test coverage for aggregate combine/serial/deserial functions
 BEGIN ISOLATION LEVEL REPEATABLE READ;
 SET parallel_setup_cost = 0;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index d4fd657188..ad73254f88 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -952,6 +952,102 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
 -- test coverage for aggregate combine/serial/deserial functions
 BEGIN ISOLATION LEVEL REPEATABLE READ;
 
-- 
2.16.4

v12-0002-Reorder-to-match-ORDER-BY-or-index.patchapplication/octet-stream; name=v12-0002-Reorder-to-match-ORDER-BY-or-index.patchDownload

From fec5fc5a1e0e8a6c6c800e948ecab053fa67faf2 Mon Sep 17 00:00:00 2001
From: erthalion <9erthalion6@gmail.com>
Date: Fri, 29 Mar 2019 15:19:48 +0100
Subject: [PATCH v12 2/3] Reorder to match ORDER BY or index

---
 src/backend/optimizer/path/equivclass.c  | 13 ++++-
 src/backend/optimizer/path/pathkeys.c    | 91 ++++++++++++++++++++++++++++++++
 src/backend/optimizer/plan/planner.c     | 85 +++++++++++++++++++++--------
 src/include/optimizer/paths.h            |  3 ++
 src/test/regress/expected/aggregates.out | 44 ++++++++-------
 5 files changed, 189 insertions(+), 47 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 61b5b119b0..7ffee2c165 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -686,7 +686,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 68b2204b3b..65e53ef854 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -27,6 +27,7 @@
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -331,6 +332,58 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
+ * returns new lists,  original GROUP BY lists stay untouched.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List		*new_group_pathkeys= NIL,
+				*new_group_clauses = NIL;
+	ListCell	*key;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * For each pathkey it tries to find corresponding GROUP BY pathkey and
+	 * clause.
+	 */
+	foreach(key, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(key);
+		SortGroupClause	*sgc;
+
+		/*
+		 * Pathkey should use the same allocated struct, so, equiality of
+		 * pointers is enough
+		 */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	n = list_length(new_group_pathkeys);
+
+	/*
+	 * Just append the rest of pathkeys and clauses
+	 */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+												 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
@@ -1774,6 +1827,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return 0;					/* path ordering not useful */
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered, so we don't bother about actual order in
+ * pathkeys
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	if (root->group_pathkeys == NIL)
+		return 0;				/* no special ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1788,6 +1874,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1823,6 +1912,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */ 
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 0a6710c73b..247e39d6ff 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6387,9 +6387,28 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			if (parse->groupingSets)
+			{
+				/*
+				 * prevent group pathkey rreordering, just check the same
+				 * order paths pathkeys and group pathkeys
+				 */
+				is_sorted = pathkeys_contained_in(group_pathkeys,
+												  path->pathkeys);
+			}
+			else
+			{
+				n_preordered_groups =
+						group_keys_reorder_by_pathkeys(path->pathkeys,
+													   &group_pathkeys,
+													   &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
+			}
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_path || is_sorted)
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
@@ -6397,7 +6416,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				/* Now decide what to stick atop it */
@@ -6418,9 +6437,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6435,7 +6454,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6456,19 +6475,26 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
+				List	   *group_pathkeys = root->group_pathkeys,
+						   *group_clauses = parse->groupClause;
+				int			n_preordered_groups;
+
+				n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																	 &group_pathkeys,
+																	 &group_clauses);
 
 				/*
 				 * Insert a Sort node, if required.  But there's no point in
 				 * sorting anything but the cheapest path.
 				 */
-				if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+				if (n_preordered_groups != list_length(group_pathkeys))
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 				}
 
@@ -6478,9 +6504,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6489,7 +6515,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 			}
@@ -6726,9 +6752,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_total_path || is_sorted)
 			{
 				/* Sort the cheapest partial path, if it isn't already */
@@ -6736,7 +6768,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -6745,9 +6777,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 											 partially_grouped_rel,
 											 path,
 											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 NIL,
 											 agg_partial_costs,
 											 dNumPartialGroups));
@@ -6756,7 +6788,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 							 create_group_path(root,
 											   partially_grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   NIL,
 											   dNumPartialGroups));
 			}
@@ -6770,17 +6802,24 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys));
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_partial_path || is_sorted)
 			{
+
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -6789,9 +6828,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 group_clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 group_clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6800,7 +6839,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   group_clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 0e858097c8..faf6449f4d 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -183,6 +183,9 @@ typedef enum
 
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index a20c69469f..265c996d5e 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2360,14 +2360,12 @@ SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
 
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY v, p;
-                      QUERY PLAN                      
-------------------------------------------------------
+                   QUERY PLAN                   
+------------------------------------------------
  GroupAggregate
-   Group Key: v, p
-   ->  Sort
-         Sort Key: v, p
-         ->  Index Only Scan using btg_p_v_idx on btg
-(5 rows)
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
 
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
@@ -2380,46 +2378,46 @@ SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
 
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY v, p, c;
-         QUERY PLAN          
------------------------------
+                   QUERY PLAN                    
+-------------------------------------------------
  GroupAggregate
-   Group Key: v, p, c
+   Group Key: p, v, c
    ->  Sort
-         Sort Key: v, p, c
-         ->  Seq Scan on btg
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
 (5 rows)
 
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
-         QUERY PLAN          
------------------------------
+                   QUERY PLAN                    
+-------------------------------------------------
  GroupAggregate
    Group Key: p, v, c
    ->  Sort
          Sort Key: p, v, c
-         ->  Seq Scan on btg
+         ->  Index Scan using btg_p_v_idx on btg
 (5 rows)
 
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY v, c, p, d;
-          QUERY PLAN          
-------------------------------
+                   QUERY PLAN                    
+-------------------------------------------------
  GroupAggregate
-   Group Key: v, c, p, d
+   Group Key: p, v, c, d
    ->  Sort
-         Sort Key: v, c, p, d
-         ->  Seq Scan on btg
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
 (5 rows)
 
 EXPLAIN (COSTS off)
 SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
-          QUERY PLAN          
-------------------------------
+                   QUERY PLAN                    
+-------------------------------------------------
  GroupAggregate
    Group Key: p, v, c, d
    ->  Sort
          Sort Key: p, v, c, d
-         ->  Seq Scan on btg
+         ->  Index Scan using btg_p_v_idx on btg
 (5 rows)
 
 RESET enable_hashagg;
-- 
2.16.4

v12-0003-Reorder-by-values-distribution.patchapplication/octet-stream; name=v12-0003-Reorder-by-values-distribution.patchDownload

From 59d6fa8fd1dbccd60cf4a72663157fc531dbd5f9 Mon Sep 17 00:00:00 2001
From: erthalion <9erthalion6@gmail.com>
Date: Fri, 29 Mar 2019 15:20:22 +0100
Subject: [PATCH v12 3/3] Reorder by values distribution

---
 src/backend/optimizer/path/pathkeys.c    | 218 +++++++++++++++++++++++++++++++
 src/backend/optimizer/plan/planner.c     |  33 ++++-
 src/backend/utils/misc/guc.c             |  21 ++-
 src/include/optimizer/paths.h            |  10 ++
 src/test/regress/expected/aggregates.out |  16 +--
 5 files changed, 288 insertions(+), 10 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 65e53ef854..ac79bc4975 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -24,6 +24,7 @@
 #include "nodes/plannodes.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
+#include "optimizer/plancat.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
@@ -332,6 +333,11 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/************************<DEBUG PART>*************************************/
+bool debug_group_by_reorder_by_pathkeys = true;
+bool debug_cheapest_group_by = true;
+/************************</DEBUG PART>************************************/
+
 /*
  * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
  * returns new lists,  original GROUP BY lists stay untouched.
@@ -345,6 +351,9 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	ListCell	*key;
 	int			n;
 
+	if (debug_group_by_reorder_by_pathkeys == false)
+		return 0;
+
 	if (pathkeys == NIL || *group_pathkeys == NIL)
 		return 0;
 
@@ -384,6 +393,215 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	return n;
 }
 
+/*
+ * get_width_multiplier
+ *
+ *	Returns relative complexity of comparing two values based on their width.
+ *	The idea behind is that long values have more expensive comparison.
+ */
+static double
+get_width_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double	width = -1.0;
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresonding relation */
+	if (IsA(expr, Var))
+	{
+		Var		*var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo	*rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int	ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, use estimation by type */
+	if (width < 0.0)
+	{
+		Node	*node = (Node*) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Any value in pgsql is passed by Datum type, so any operation with value
+	 * could not be cheaper than operation with Datum type
+	 */
+	if (width <= sizeof(Datum))
+		return sizeof(Datum);
+
+	return width;
+}
+
+/*
+ * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
+ * speedup sorting. Returns newly allocated lists, old ones stay untouched.
+ * n_preordered defines a head of list which order should be prevented.
+ */
+void
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List *target_list,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	struct
+	{
+		PathKey			*pathkey;
+		SortGroupClause	*sgc;
+		Node			*pathkeyExpr;
+	}
+				   *keys, tmp;
+	int				nkeys = list_length(*group_pathkeys) - n_preordered;
+	List		   *pathkeyExprList = NIL,
+				   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL;
+	ListCell	   *cell;
+	int				i = 0, n_keys_to_est;
+
+	if (!debug_cheapest_group_by)
+		return;
+
+	if (nkeys < 2)
+		return; /* nothing to do */
+
+	/*
+	 * Nothing to do here, since reordering of group clauses to match ORDER BY
+	 * already performed in preprocess_groupclause
+	 */
+	if (n_preordered == 0 && root->sort_pathkeys)
+		return;
+
+	keys = palloc(nkeys * sizeof(*keys));
+
+	/*
+	 * Collect information about pathkey for subsequent usage
+	 */
+	for_each_cell(cell, list_nth_cell(*group_pathkeys, n_preordered))
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		keys[i].pathkey = pathkey;
+		keys[i].sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+											  *group_clauses);
+		keys[i].pathkeyExpr = get_sortgroupclause_expr(keys[i].sgc,
+													   target_list);
+		i++;
+	}
+
+	/*
+	 * Find the cheapest to sort order of columns. We will find a first column
+	 * with bigger number of group, then pair (first column in pair is  already
+	 * defined in first step), them triple and so on.
+	 */
+	for(n_keys_to_est = 1; n_keys_to_est <= nkeys - 1; n_keys_to_est++)
+	{
+		ListCell   *tail_cell;
+		int			best_i = 0;
+		double		best_est_num_groups = -1;
+
+		/* expand list of columns and remeber last cell */
+		pathkeyExprList = lappend(pathkeyExprList, NULL);
+		tail_cell = list_tail(pathkeyExprList);
+
+		/*
+		 * Find the best last column - the best means bigger number of groups,
+		 * previous columns are already choosen
+		 */
+		for(i = n_keys_to_est - 1; i < nkeys; i++)
+		{
+			double  est_num_groups;
+			Expr *expr = (Expr *) keys[i].pathkeyExpr;
+			PathKey *pathkey = keys[i].pathkey;
+			EquivalenceMember *em = (EquivalenceMember *)
+									linitial(pathkey->pk_eclass->ec_members);
+
+			lfirst(tail_cell) = keys[i].pathkeyExpr;
+			est_num_groups = estimate_num_groups(root, pathkeyExprList,
+												 nrows, NULL);
+			est_num_groups /= get_width_multiplier(root, expr);
+
+			if (em->em_datatype != InvalidOid)
+			{
+				Oid			sortop;
+				QualCost	costs;
+				costs.startup = costs.per_tuple = 0;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+				add_function_cost(root, get_opcode(sortop), NULL, &costs);
+
+				est_num_groups /= costs.per_tuple;
+			}
+
+			if (est_num_groups > best_est_num_groups)
+			{
+				best_est_num_groups = est_num_groups;
+				best_i = i;
+			}
+		}
+
+		/* Save the best choice */
+		lfirst(tail_cell) = keys[best_i].pathkeyExpr;
+		if (best_i != n_keys_to_est - 1)
+		{
+			tmp = keys[n_keys_to_est - 1];
+			keys[n_keys_to_est - 1] = keys[best_i];
+			keys[best_i] = tmp;
+		}
+	}
+	list_free(pathkeyExprList);
+
+	/*
+	 * Construct result lists, keys array is already ordered to get a cheapest
+	 * sort
+	 */
+	i = 0;
+	foreach(cell, *group_pathkeys)
+	{
+		PathKey	   *pathkey;
+		SortGroupClause *sgc;
+
+		if (i < n_preordered)
+		{
+			pathkey = (PathKey *) lfirst(cell);
+			sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+										  *group_clauses);
+		}
+		else
+		{
+			pathkey = keys[i - n_preordered].pathkey;
+			sgc = keys[i - n_preordered].sgc;
+		}
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+
+		i++;
+	}
+
+	pfree(keys);
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses, *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 247e39d6ff..d480551d3c 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6389,7 +6389,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			bool		is_sorted;
 			List	   *group_pathkeys = root->group_pathkeys,
 					   *group_clauses = parse->groupClause;
-			int			n_preordered_groups;
+			int			n_preordered_groups = 0;
 
 			if (parse->groupingSets)
 			{
@@ -6413,11 +6413,20 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
 				if (!is_sorted)
+				{
+					if (!parse->groupingSets)
+						get_cheapest_group_keys_order(root,
+													  path->rows,
+													  extra->targetList,
+													  &group_pathkeys,
+													  &group_clauses,
+													  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6491,6 +6500,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
@@ -6765,11 +6780,19 @@ create_partial_grouping_paths(PlannerInfo *root,
 			{
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_path(partially_grouped_rel, (Path *)
@@ -6816,11 +6839,19 @@ create_partial_grouping_paths(PlannerInfo *root,
 
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  extra->targetList,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
 													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index f7f726b5ae..f13ea2ab5d 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1952,7 +1952,26 @@ static struct config_bool ConfigureNamesBool[] =
 		false,
 		NULL, NULL, NULL
 	},
-
+/************************<DEBUG OPT GROUP BY>*********************************/
+	{
+		{"debug_group_by_reorder_by_pathkeys", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reorder GROUP BY by pathkeys"),
+			NULL
+		},
+		&debug_group_by_reorder_by_pathkeys,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("find a cheapest order of columns in GROUP BY."),
+			NULL
+		},
+		&debug_cheapest_group_by,
+		true,
+		NULL, NULL, NULL
+	},
+/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index faf6449f4d..fe4c067369 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -186,6 +186,16 @@ extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern int group_keys_reorder_by_pathkeys(List *pathkeys,
 										  List **group_pathkeys,
 										  List **group_clauses);
+/************************<DEBUG OPT GROUP BY>*********************************/
+extern bool debug_group_by_reorder_by_pathkeys;
+extern bool debug_cheapest_group_by;
+/************************</DEBUG OPT GROUP BY>********************************/
+extern void get_cheapest_group_keys_order(PlannerInfo *root,
+										  double nrows,
+										  List *target_list,
+										  List **group_pathkeys,
+										  List **group_clauses,
+										  int	n_preordered);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 265c996d5e..9afc1a827d 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2228,9 +2228,9 @@ SELECT count(*) FROM btg GROUP BY v, p;
          QUERY PLAN          
 -----------------------------
  GroupAggregate
-   Group Key: v, p
+   Group Key: p, v
    ->  Sort
-         Sort Key: v, p
+         Sort Key: p, v
          ->  Seq Scan on btg
 (5 rows)
 
@@ -2239,9 +2239,9 @@ SELECT count(*) FROM btg GROUP BY v, p, c;
          QUERY PLAN          
 -----------------------------
  GroupAggregate
-   Group Key: v, p, c
+   Group Key: p, c, v
    ->  Sort
-         Sort Key: v, p, c
+         Sort Key: p, c, v
          ->  Seq Scan on btg
 (5 rows)
 
@@ -2261,9 +2261,9 @@ SELECT count(*) FROM btg GROUP BY v, p, d, c;
           QUERY PLAN          
 ------------------------------
  GroupAggregate
-   Group Key: v, p, d, c
+   Group Key: p, d, c, v
    ->  Sort
-         Sort Key: v, p, d, c
+         Sort Key: p, d, c, v
          ->  Seq Scan on btg
 (5 rows)
 
@@ -2318,9 +2318,9 @@ SELECT count(*) FROM btg GROUP BY p, d, e;
          QUERY PLAN          
 -----------------------------
  GroupAggregate
-   Group Key: p, d, e
+   Group Key: p, e, d
    ->  Sort
-         Sort Key: p, d, e
+         Sort Key: p, e, d
          ->  Seq Scan on btg
 (5 rows)
 
-- 
2.16.4

#36

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 6 years ago

In reply to: Dmitry Dolgov (#35)

Re: POC: GROUP BY optimization

On Fri, May 03, 2019 at 10:28:21PM +0200, Dmitry Dolgov wrote:

On Tue, Apr 9, 2019 at 5:21 PM Tomas Vondra <tomas.vondra@2ndquadrant.com> wrote:

So I personally would suggest to treat those patches as independent until
the very last moment, develop the costing improvements needed by each
of them, and then decide which of them are committable / in what order.

I had the same idea, but judging from the questions, raised in this thread,
it's quite hard to go with reordering based only on frequency of values. I
hoped that the cost_sort improvement patch would be simple enough to
incorporate it here, but of course it wasn't. Having an assumption, that the
amount of work, required for performing sorting, depends only on the number of
distinct groups and how costly it is to compare a values of this data type,
I've ended up extracting get_width_multiplier and get_func_cost parts from
cost_sort patch and including them into 0003-Reorder-by-values-distribution.
This allows to take into account situations when we compare e.g. long strings
or a custom data type with high procost for comparison (but I've used this
values directly without any adjusting coefficients yet).

On Wed, Jun 13, 2018 at 6:41 PM Teodor Sigaev <teodor@sigaev.ru> wrote:

So that's a nice improvement, although I think we should also consider
non-uniform distributions (using the per-column MCV lists).

Could you clarify how to do that?

Since I'm not familiar with this topic, I would like to ask the same question,
how to do that and what are the advantages?

I don't recall the exact details of the discussion, since most of it
happened almost a year ago, but the main concern about the original
costing approach is that it very much assumes uniform distribution.

For example if you have N tuples with M groups (which are not computed
using estimate_num_groups IIRC, and we can hardly do much better), then
the patch assumed each groups is ~N/M rows and used that for computing
the number of comparisons for a particular sequence of ORDER BY clauses.

But that may result in pretty significant errors in causes with a couple
of very large groups. But hey - we can get some of that information from
MCV lists, so maybe we can compute a safer less-optimistic estimate.

I mean, we can't compute "perfect" estimate of how many comparisons
we'll have to do, because we only have per-column MCV lits and maybe
some multi-column MCV lists (but definitely not on all combinations of
columns in the ORDER BY clause).

But what I think we could do is using largest possible group instead of
the average one. So for example when estimating number of comparisons
for columns (c1,..,cN), you could look at MCV lists for these columns
and compute

m(i) = Max(largest group in MCV list for i-th column)

and then use Min(m(1), ..., m(k)) when estimating the number of
comparisons.

Of course, this is likely to be a worst-case estimate, and it's not
quite obvious that comparing worst-case estimates is necessarily safer
than comparing the regular ones. So I'm not sure about it.

It might be better to just compute the 'average group' in a different
way, not by arithmetic mean, but say as geometric mean from each MCV
list. Not sure.

I guess this needs some research and experimentation - constructing
cases that are likely to cause problems (non-uniform distributions,
columns with a small number of exceptionally large groups, ...) and then
showing how the costing deals with those.

FWIW I've mentioned MCV lists in the incrementalsort thread too, in
which case it was much clearer how to use them to improve the startup
cost estimate - it's enough to look at the first group in per-column MCV
lists (first in the ORDER BY sense, not by frequency).

On Sat, Jun 16, 2018 at 5:59 PM Tomas Vondra <tomas.vondra@2ndquadrant.com> wrote:

I still think we need to be careful when introducing new optimizations
in this area - reordering the grouping keys by ndistinct, ORDER BY or
whatever. In particular I don't think we should commit these patches
that may quite easily cause regressions, and then hope some hypothetical
future patch fixes the costing.

I'm a bit concerned about this part of the discussion. There is an idea through
the whole thread about avoiding the situation, when a user knows which order is
better and we generate different one by mistake. From what I see right now even
if all the objections would be addressed, there is a chance that some
coefficients will be not good enough (e.g. width multiplier is based on an
average width, or it can suddenly happen that all the compared string have some
difference at the very beginning) and the chosen order will be not optimal.
Does it mean that in any case the implementation of such optimization should
provide a way to override it?

I don't think we have to provide an override no matter what. Otherwise
we'd have to have an override for everything, because no optimizer
decision is perfect - it's all built on estimates, after all.

But I assume we agree that optimizations based on estimates thare are
known to be unreliable are bound to be unreliable too. And optimizations
that regularly misfire for common data sets may easily do more harm than
good (and maybe should not be called optimizations at all).

For example, the first patch relied on ndistinct estimates quite a bit
and used them to compute multi-column estimates, which we already know
is rather poor for GROUP BY estimates. The v9 uses estimate_num_groups()
which works better because it leverages extended stats, when available.
That's good, but we need to see how the rest of the costing works.

So I think it very much depends on how reliable the costing can be made,
and how bad consequences a poor choice can have.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#37

Dmitry Dolgov

9erthalion6@gmail.com

over 6 years ago

In reply to: Tomas Vondra (#36)

Re: POC: GROUP BY optimization

On Fri, May 3, 2019 at 11:55 PM Tomas Vondra <tomas.vondra@2ndquadrant.com> wrote:

I don't recall the exact details of the discussion, since most of it
happened almost a year ago, but the main concern about the original
costing approach is that it very much assumes uniform distribution.

For example if you have N tuples with M groups (which are not computed
using estimate_num_groups IIRC, and we can hardly do much better), then
the patch assumed each groups is ~N/M rows and used that for computing
the number of comparisons for a particular sequence of ORDER BY clauses.

But that may result in pretty significant errors in causes with a couple
of very large groups.

Yes, you're right, the current version of the patch assumes uniform
distribution of values between these M groups. After some thinking I've got an
idea that maybe it's better to not try to find out what would be acceptable for
both uniform and non uniform distributions, but just do not reorder at all if
there are any significant deviations from what seems to be a "best case",
namely when values distributed among groups relatively uniformly and there are
no doubts about how complicated is to compare them.

Saying that, it's possible on top of the current implementation to check:

* dependency coefficient between columns (if I understand correctly, non
uniform distributions of values between the groups a.k.a few large groups
should be visible from an extended statistics as a significant dependency)

* largest/smallest group in MCV doesn't differ too much from an expected
"average" group

* average width and comparison procost are similar

If everything fits (which I hope would be most of the time) - apply reorder,
otherwise use whatever order was specified (which leaves the room for manual
reordering for relatively rare situations). Does it makes sense?

But what I think we could do is using largest possible group instead of
the average one. So for example when estimating number of comparisons
for columns (c1,..,cN), you could look at MCV lists for these columns
and compute

m(i) = Max(largest group in MCV list for i-th column)

and then use Min(m(1), ..., m(k)) when estimating the number of
comparisons.

I see the idea, but I'm a bit confused about how to get a largest group for a
MCV list? I mean there is a list of most common values per column with
frequencies, and similar stuff for multi columns statistics, but how to get a
size of those groups?

#38

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 6 years ago

In reply to: Dmitry Dolgov (#37)

Re: POC: GROUP BY optimization

On Fri, May 24, 2019 at 02:10:48PM +0200, Dmitry Dolgov wrote:

On Fri, May 3, 2019 at 11:55 PM Tomas Vondra <tomas.vondra@2ndquadrant.com> wrote:

I don't recall the exact details of the discussion, since most of it
happened almost a year ago, but the main concern about the original
costing approach is that it very much assumes uniform distribution.

For example if you have N tuples with M groups (which are not computed
using estimate_num_groups IIRC, and we can hardly do much better), then
the patch assumed each groups is ~N/M rows and used that for computing
the number of comparisons for a particular sequence of ORDER BY clauses.

But that may result in pretty significant errors in causes with a couple
of very large groups.

Yes, you're right, the current version of the patch assumes uniform
distribution of values between these M groups. After some thinking I've got an
idea that maybe it's better to not try to find out what would be acceptable for
both uniform and non uniform distributions, but just do not reorder at all if
there are any significant deviations from what seems to be a "best case",
namely when values distributed among groups relatively uniformly and there are
no doubts about how complicated is to compare them.

Saying that, it's possible on top of the current implementation to check:

* dependency coefficient between columns (if I understand correctly, non
uniform distributions of values between the groups a.k.a few large groups
should be visible from an extended statistics as a significant dependency)

* largest/smallest group in MCV doesn't differ too much from an expected
"average" group

* average width and comparison procost are similar

If everything fits (which I hope would be most of the time) - apply reorder,
otherwise use whatever order was specified (which leaves the room for manual
reordering for relatively rare situations). Does it makes sense?

I think those are interesting sources of information. I don't know if it
will be sufficient, but I think it's worth exploring.

One of the issues with this approach however will be selecting the
threshold values. That is, how do you decide whether that a given
functional dependency is "too strong" or the largest/smallest MCV item
differs too much from the average one?

If you pick a particular threshold value, there'll be a sudden behavior
change at some point, and that's very annoying. For example, you might
pick 0.5 as a threshold for "strong" functional dependencies. There's
little reason why 0.4999 should be weak and 0.5001 should be "strong".

This may lead to sudden behavior changes after ANALYZE, for example.

So I think we need to find a way to make this part of the costing model,
which is how we deal with such cases elsewhere.

But what I think we could do is using largest possible group instead of
the average one. So for example when estimating number of comparisons
for columns (c1,..,cN), you could look at MCV lists for these columns
and compute

m(i) = Max(largest group in MCV list for i-th column)

and then use Min(m(1), ..., m(k)) when estimating the number of
comparisons.

I see the idea, but I'm a bit confused about how to get a largest group for a
MCV list? I mean there is a list of most common values per column with
frequencies, and similar stuff for multi columns statistics, but how to get a
size of those groups?

You can just multiply the frequency by the number of rows in the table,
that gives you the size of the group. Or an estimate of it, because
there might be a filter condition involved.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#39

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 5 years ago

In reply to: Tomas Vondra (#38)

Re: POC: GROUP BY optimization

Hi,

I wonder if anyone has plans to try again with this optimization in v14
cycle? The patches no longer apply thanks to the incremental sort patch,
but I suppose fixing that should not be extremely hard.

The 2020-07 CF is still a couple weeks away, but it'd be good to know if
there are any plans to revive this. I'm willing to spend some time on
reviewing / testing this, etc.

I've only quickly skimmed the old thread, but IIRC there were two main
challenges in getting the optimization right:

1) deciding which orderings are interesting / worth additional work

I think we need to consider these orderings, in addition to the one
specified in GROUP BY:

1) as specified in ORDER BY (if different from 1)

2) one with cheapest sort on unsorted input (depending on number of
distinct values, cost of comparisons, etc.)

3) one with cheapest sort on partially sorted input (essentially what we
do with the incremental sort paths, but matching the pathkeys in a more
elaborate way)

2) costing the alternative orderings

I think we've already discussed various ways to leverage as much
available info as possible (extended stats, MCVs, ...) but I think the
patch only does some of it.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#40

Dmitry Dolgov

9erthalion6@gmail.com

over 5 years ago

In reply to: Tomas Vondra (#39)

Re: POC: GROUP BY optimization

On Fri, May 15, 2020 at 01:52:20AM +0200, Tomas Vondra wrote:

I wonder if anyone has plans to try again with this optimization in v14
cycle? The patches no longer apply thanks to the incremental sort patch,
but I suppose fixing that should not be extremely hard.

The 2020-07 CF is still a couple weeks away, but it'd be good to know if
there are any plans to revive this. I'm willing to spend some time on
reviewing / testing this, etc.

Yes, if you believe that this patch has potential, I would love to pick
it up again.

I've only quickly skimmed the old thread, but IIRC there were two main
challenges in getting the optimization right:

1) deciding which orderings are interesting / worth additional work

I think we need to consider these orderings, in addition to the one
specified in GROUP BY:

1) as specified in ORDER BY (if different from 1)

What is the idea behind considering this ordering?

#41

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 5 years ago

In reply to: Dmitry Dolgov (#40)

1 attachment(s)

Re: POC: GROUP BY optimization

On Sat, May 16, 2020 at 02:24:31PM +0200, Dmitry Dolgov wrote:

On Fri, May 15, 2020 at 01:52:20AM +0200, Tomas Vondra wrote:

I wonder if anyone has plans to try again with this optimization in v14
cycle? The patches no longer apply thanks to the incremental sort patch,
but I suppose fixing that should not be extremely hard.

The 2020-07 CF is still a couple weeks away, but it'd be good to know if
there are any plans to revive this. I'm willing to spend some time on
reviewing / testing this, etc.

Yes, if you believe that this patch has potential, I would love to pick
it up again.

I think it's worth another try. I've been reminded of this limitation
when working on the incremental sort, which significantly increases the
number of orderings that we can sort efficiently. But we can't possibly
leverage that unless it matches the GROUP BY ordering.

The attached WIP patch somewhat addresses this by trying to reorder the
group_pathkeys so allow leveraging sort of existing ordering (even just
partial, with incremental sort).

I've only quickly skimmed the old thread, but IIRC there were two main
challenges in getting the optimization right:

1) deciding which orderings are interesting / worth additional work

I think we need to consider these orderings, in addition to the one
specified in GROUP BY:

1) as specified in ORDER BY (if different from 1)

What is the idea behind considering this ordering?

I'll try to answer this in general, i.e. what I think this patch needs
to consider. Hopefully that'll answer why we should look at ORDER BY
pathkeys too ...

Reordering the pathkeys has both local and global effects, and we need
to consider all of that to make the optimization reliable, otherwise
we'll inevitably end up with trivial regressions.

The local effects are trivial - it's for example reordering the pathkeys
to make the explicit sort as cheap as possible. This thread already
discussed a number of things to base this on - ndistinct for columns,
cost of comparison function, ... In any case, this is something we can
decide locally, when building the grouping paths.

The global effects are much harder to tackle, because the decision can't
be made locally when building the grouping paths. It requires changes
both below and above the point where we build grouping paths.

An example of a decision we need to make before we even get to building
a grouping path is which index paths to build. Currently we only build
index paths with "useful" pathkeys, and without tweaking that we'll
never even see the index in add_paths_to_grouping_rel().

But there are also decisions that can be made only after we build the
grouping paths. For example, we may have both GROUP BY and ORDER BY, and
there is no "always correct" way to combine those. In some cases it may
be correct to use the same pathkeys, in other cases it's better to use
different ones (which will require an extra Sort, with additional cost).

So I don't think there will be a single "interesting" grouping pathkeys
(i.e. root->group_pathkeys), but a collection of pathkeys. And we'll
need to build grouping paths for all of those, and leave the planner to
eventually pick the one giving us the cheapest plan ...

A "brute-force" approach would be to generate all possible orderings of
group_pathkeys, but that's probably not going to fly - it might easily
cause an explosion in number of paths we track, etc. So we'll need to
pick/prioritize orderings that are more likely to give us efficient
plans, and ORDER BY seems like a good example because it means we won't
need an explicit sort.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Attachments:

wip-group-by-incremental-sort-v1.patchtext/plain; charset=us-asciiDownload

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index f4d4a4df66..7a5a3bb73b 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -142,6 +142,7 @@ bool		enable_partitionwise_aggregate = false;
 bool		enable_parallel_append = true;
 bool		enable_parallel_hash = true;
 bool		enable_partition_pruning = true;
+bool		enable_groupby_reorder = true;
 
 typedef struct
 {
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ce9bf87e9b..9d0961dee2 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -22,6 +22,7 @@
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
@@ -388,6 +389,91 @@ pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
 	return (key1 == NULL);
 }
 
+/*
+ * pathkeys_maybe_reorder
+ *    Reorder pathkeys in keys1 so that the order matches keys2 as much as
+ *    possible, i.e. to maximize the common prefix length.
+ */
+List *
+pathkeys_maybe_reorder(List *keys1, List *keys2)
+{
+	ListCell   *key1,
+			   *key2;
+	List	   *keys = NIL;
+
+	int			idx = 0;
+	Bitmapset  *matched = NULL;
+
+	if (!enable_groupby_reorder)
+		return keys1;
+
+	/*
+	 * See if we can avoiding looping through both lists. This optimization
+	 * gains us several percent in planning time in a worst-case test.
+	 */
+	if (keys1 == keys2)
+	{
+		return keys1;
+	}
+	else if (keys1 == NIL)
+	{
+		return keys1;
+	}
+	else if (keys2 == NIL)
+	{
+		return keys1;
+	}
+
+	/*
+	 * First add all keys from keys2 with a match in keys1. We only care
+	 * about prefix, so we stop at the first key without a keys1 match.
+	 */
+	foreach(key2, keys2)
+	{
+		bool		found = false;
+		PathKey    *pathkey2 = (PathKey *) lfirst(key2);
+
+		idx = 0;
+		foreach(key1, keys1)
+		{
+			PathKey    *pathkey1 = (PathKey *) lfirst(key1);
+
+			if (pathkey1 == pathkey2)
+			{
+				found = true;
+				keys = lappend(keys, pathkey1);
+				matched = bms_add_member(matched, idx);
+			}
+
+			idx++;
+		}
+
+		/* No match in keys1, so we're done. */
+		if (!found)
+			break;
+	}
+
+	/* add all remaining unmatched keys from keys1 */
+	idx = 0;
+	foreach(key1, keys1)
+	{
+		PathKey    *pathkey1 = (PathKey *) lfirst(key1);
+
+		if (!bms_is_member(idx, matched))
+			keys = lappend(keys, pathkey1);
+
+		idx++;
+	}
+
+	/*
+	 * Simple cross-check that we have all the keys. It might be a good
+	 * idea to actually check that all keys from keys1 are included.
+	 */
+	Assert(list_length(keys1) == list_length(keys));
+
+	return keys;
+}
+
 /*
  * get_cheapest_path_for_pathkeys
  *	  Find the cheapest path (according to the specified criterion) that
@@ -1862,6 +1948,41 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for meeting the
+ *		query's requested output ordering.
+ *
+ * Because we the have the possibility of incremental sort, a prefix list of
+ * keys is potentially useful for improving the performance of the requested
+ * ordering. Thus we return 0, if no valuable keys are found, or the number
+ * of leading keys shared by the list and the requested ordering..
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	int			n_common_pathkeys;
+	List	   *group_pathkeys;
+
+	if (root->group_pathkeys == NIL)
+		return 0;				/* no special ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	/*
+	 * FIXME Building the reordered list is unnecessary - we can simply
+	 * count how many keys at the start of pathkeys are also found
+	 * anywhere in root->group_pathkeys.
+	 */
+	group_pathkeys = pathkeys_maybe_reorder(root->group_pathkeys, pathkeys);
+
+	(void) pathkeys_count_contained_in(group_pathkeys, pathkeys,
+									   &n_common_pathkeys);
+
+	return n_common_pathkeys;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1879,6 +2000,10 @@ truncate_useless_pathkeys(PlannerInfo *root,
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+
 	/*
 	 * Note: not safe to modify input list destructively, but we can avoid
 	 * copying the list if we're not actually going to change it
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 357850624c..3265ec400a 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6507,8 +6507,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			Path	   *path_original = path;
 			bool		is_sorted;
 			int			presorted_keys;
+			List	   *group_pathkeys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+			group_pathkeys = pathkeys_maybe_reorder(root->group_pathkeys,
+													path->pathkeys);
+
+			is_sorted = pathkeys_count_contained_in(group_pathkeys,
 													path->pathkeys,
 													&presorted_keys);
 
@@ -6519,7 +6523,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				/* Now decide what to stick atop it */
@@ -6592,7 +6596,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			path = (Path *) create_incremental_sort_path(root,
 														 grouped_rel,
 														 path,
-														 root->group_pathkeys,
+														 group_pathkeys,
 														 presorted_keys,
 														 -1.0);
 
@@ -6654,8 +6658,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				Path	   *path_original = path;
 				bool		is_sorted;
 				int			presorted_keys;
+				List	   *group_pathkeys;
+
+				group_pathkeys = pathkeys_maybe_reorder(root->group_pathkeys,
+														path->pathkeys);
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+				is_sorted = pathkeys_count_contained_in(group_pathkeys,
 														path->pathkeys,
 														&presorted_keys);
 
@@ -6670,7 +6678,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 				}
 
@@ -6720,7 +6728,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				path = (Path *) create_incremental_sort_path(root,
 															 grouped_rel,
 															 path,
-															 root->group_pathkeys,
+															 group_pathkeys,
 															 presorted_keys,
 															 -1.0);
 
@@ -6980,6 +6988,10 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = NIL;
+
+			group_pathkeys = pathkeys_maybe_reorder(root->group_pathkeys,
+													path->pathkeys);
 
 			is_sorted = pathkeys_contained_in(root->group_pathkeys,
 											  path->pathkeys);
@@ -6990,7 +7002,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -7030,8 +7042,12 @@ create_partial_grouping_paths(PlannerInfo *root,
 				Path	   *path = (Path *) lfirst(lc);
 				bool		is_sorted;
 				int			presorted_keys;
+				List	   *group_pathkeys;
+
+				group_pathkeys = pathkeys_maybe_reorder(root->group_pathkeys,
+														path->pathkeys);
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+				is_sorted = pathkeys_count_contained_in(group_pathkeys,
 														path->pathkeys,
 														&presorted_keys);
 
@@ -7046,7 +7062,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 				path = (Path *) create_incremental_sort_path(root,
 															 partially_grouped_rel,
 															 path,
-															 root->group_pathkeys,
+															 group_pathkeys,
 															 presorted_keys,
 															 -1.0);
 
@@ -7084,8 +7100,13 @@ create_partial_grouping_paths(PlannerInfo *root,
 			Path	   *path_original = path;
 			bool		is_sorted;
 			int			presorted_keys;
+			List	   *group_pathkeys;
+
+			group_pathkeys = pathkeys_maybe_reorder(root->group_pathkeys,
+													path->pathkeys);
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+
+			is_sorted = pathkeys_count_contained_in(group_pathkeys,
 													path->pathkeys,
 													&presorted_keys);
 
@@ -7096,7 +7117,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 
 				if (parse->hasAggs)
@@ -7145,7 +7166,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			path = (Path *) create_incremental_sort_path(root,
 														 partially_grouped_rel,
 														 path,
-														 root->group_pathkeys,
+														 group_pathkeys,
 														 presorted_keys,
 														 -1.0);
 
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 2f3e0a70e0..50abf2344f 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2060,6 +2060,15 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"enable_groupby_reorder", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables reodering of expressions in the GROUP BY clause."),
+		},
+		&enable_groupby_reorder,
+		true,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 9710e5c0a4..ef58998409 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -67,6 +67,7 @@ extern PGDLLIMPORT bool enable_partitionwise_aggregate;
 extern PGDLLIMPORT bool enable_parallel_append;
 extern PGDLLIMPORT bool enable_parallel_hash;
 extern PGDLLIMPORT bool enable_partition_pruning;
+extern PGDLLIMPORT bool enable_groupby_reorder;
 extern PGDLLIMPORT int constraint_exclusion;
 
 extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 10b6e81079..65200a106f 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -189,6 +189,7 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern List *pathkeys_maybe_reorder(List *keys1, List *keys2);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,

#42

Dmitry Dolgov

9erthalion6@gmail.com

over 5 years ago

In reply to: Tomas Vondra (#41)

Re: POC: GROUP BY optimization

On Sat, May 16, 2020 at 04:56:09PM +0200, Tomas Vondra wrote:

So I don't think there will be a single "interesting" grouping pathkeys
(i.e. root->group_pathkeys), but a collection of pathkeys. And we'll
need to build grouping paths for all of those, and leave the planner to
eventually pick the one giving us the cheapest plan ...

A "brute-force" approach would be to generate all possible orderings of
group_pathkeys, but that's probably not going to fly - it might easily
cause an explosion in number of paths we track, etc. So we'll need to
pick/prioritize orderings that are more likely to give us efficient
plans, and ORDER BY seems like a good example because it means we won't
need an explicit sort.

Yes, I see. I've already rebased the original version of patch and now
working on your suggestions. In the meantime one question:

But there are also decisions that can be made only after we build the
grouping paths. For example, we may have both GROUP BY and ORDER BY, and
there is no "always correct" way to combine those. In some cases it may
be correct to use the same pathkeys, in other cases it's better to use
different ones (which will require an extra Sort, with additional cost).

Do I understand correctly, your idea is that in some cases it's cheaper
to use different order for GROUP BY than in ORDER BY even with an extra
sorting? In the current patch implementation there is an assumption that
says it's always better to match the order of pathkeys for both GROUP
BY/ORDER BY (which means that the only degree of freedom there is to
reorder the tail, which in turn makes both "unsorted input" and
"partially sorted input" cases from your original email essentially the
same). Can you show such an example when this assumption is invalid?

I've only quickly skimmed the old thread, but IIRC there were two main
challenges in getting the optimization right:

1) deciding which orderings are interesting / worth additional work

I think we need to consider these orderings, in addition to the one
specified in GROUP BY:

Yes, looks like the current patch implementation together with
preprocess_groupclause already implements this, although maybe not that
flexible.

2) costing the alternative orderings

I think we've already discussed various ways to leverage as much
available info as possible (extended stats, MCVs, ...) but I think the
patch only does some of it.

Right, there were couple of ideas what to do in case of a few groups
which are too big they can invalidate current assumptions about costs.
E.g. do not apply reordering if we detected such situation, or use
"conservative" approach and take the biggest group for estimations.

#43

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 5 years ago

In reply to: Dmitry Dolgov (#42)

Re: POC: GROUP BY optimization

On Sat, Jun 20, 2020 at 04:30:10PM +0200, Dmitry Dolgov wrote:

On Sat, May 16, 2020 at 04:56:09PM +0200, Tomas Vondra wrote:

So I don't think there will be a single "interesting" grouping pathkeys
(i.e. root->group_pathkeys), but a collection of pathkeys. And we'll
need to build grouping paths for all of those, and leave the planner to
eventually pick the one giving us the cheapest plan ...

A "brute-force" approach would be to generate all possible orderings of
group_pathkeys, but that's probably not going to fly - it might easily
cause an explosion in number of paths we track, etc. So we'll need to
pick/prioritize orderings that are more likely to give us efficient
plans, and ORDER BY seems like a good example because it means we won't
need an explicit sort.

Yes, I see. I've already rebased the original version of patch and now
working on your suggestions. In the meantime one question:

But there are also decisions that can be made only after we build the
grouping paths. For example, we may have both GROUP BY and ORDER BY, and
there is no "always correct" way to combine those. In some cases it may
be correct to use the same pathkeys, in other cases it's better to use
different ones (which will require an extra Sort, with additional cost).

Do I understand correctly, your idea is that in some cases it's cheaper
to use different order for GROUP BY than in ORDER BY even with an extra
sorting? In the current patch implementation there is an assumption that
says it's always better to match the order of pathkeys for both GROUP
BY/ORDER BY (which means that the only degree of freedom there is to
reorder the tail, which in turn makes both "unsorted input" and
"partially sorted input" cases from your original email essentially the
same). Can you show such an example when this assumption is invalid?

Yes, that is mostly the point - I don't think we can assume simply using
the ORDER BY pathkeys (if specified) for grouping is optimal. As a
trivial counter-example, consider this

CREATE TABLE t (
a int,
b int,
c int);

INSERT INTO t SELECT 1000 * random(), 1000 * random(), i
FROM generate_series(1,10000000) s(i);

CREATE INDEX ON t (a, b, c);

VACUUM ANALYZE t;

And now some simple queries (you may need to tweak the planning options
a bit, to get these plans - disable hashagg etc.).

test=# explain analyze select a, b, count(c) from t group by a, b;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
GroupAggregate (cost=0.43..389008.55 rows=1000018 width=16) (actual time=0.037..10509.505 rows=1001923 loops=1)
Group Key: a, b
-> Index Only Scan using t_a_b_c_idx on t (cost=0.43..304007.06 rows=10000175 width=12) (actual time=0.024..5189.908 rows=10000000 loops=1)
Heap Fetches: 0
Planning Time: 0.113 ms
Execution Time: 10941.677 ms
(6 rows)

test=# explain analyze select a,b, count(c) from t group by a, b order by a, b;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
GroupAggregate (cost=0.43..389008.55 rows=1000018 width=16) (actual time=0.033..10606.925 rows=1001923 loops=1)
Group Key: a, b
-> Index Only Scan using t_a_b_c_idx on t (cost=0.43..304007.06 rows=10000175 width=12) (actual time=0.020..5240.332 rows=10000000 loops=1)
Heap Fetches: 0
Planning Time: 0.100 ms
Execution Time: 11043.358 ms
(6 rows)

test=# explain analyze select a,b, count(c) from t group by a, b order by b, a;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------
GroupAggregate (cost=1658556.19..1768558.12 rows=1000018 width=16) (actual time=14707.676..25533.053 rows=1001923 loops=1)
Group Key: b, a
-> Sort (cost=1658556.19..1683556.63 rows=10000175 width=12) (actual time=14707.659..20011.024 rows=10000000 loops=1)
Sort Key: b, a
Sort Method: external merge Disk: 219200kB
-> Seq Scan on t (cost=0.00..154056.75 rows=10000175 width=12) (actual time=0.028..4751.244 rows=10000000 loops=1)
Planning Time: 0.103 ms
Execution Time: 25989.412 ms
(8 rows)

test=# explain analyze select * from (select a,b, count(c) from t group by a, b offset 0) foo order by b,a;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=515759.00..518259.04 rows=1000018 width=16) (actual time=11281.094..11783.920 rows=1001923 loops=1)
Sort Key: t.b, t.a
Sort Method: external merge Disk: 34880kB
-> GroupAggregate (cost=0.43..389008.55 rows=1000018 width=16) (actual time=0.064..10507.256 rows=1001923 loops=1)
Group Key: t.a, t.b
-> Index Only Scan using t_a_b_c_idx on t (cost=0.43..304007.06 rows=10000175 width=12) (actual time=0.052..5209.939 rows=10000000 loops=1)
Heap Fetches: 0
Planning Time: 0.111 ms
Execution Time: 12218.370 ms
(9 rows)

To sum this up:

grouping (a,b): 10941 ms
grouping (a,b) + ordering (a,b): 11043
grouping (b,a) + ordering (b,a): 25989
grouping (a,b) + ordering (b,a): 12218

So, it's fast as long as we can use the IOS, even if we have to do an
extra Sort at the end. This obviously relies on the grouping to reduce
the number of rows (in this case from 10M to 1M), which makes the extra
cost much cheaper.

I'm pretty sure I could construct similar examples e.g. with incremental
sort, and so on.

Does this explain why I think we can't make the assumption that it's
always better to match the pathkeys?

FWIW, I think the change of plan for the third query (where we specify
"GROUP BY a,b" but the planner decides to just change that) is rather
annoying, and it clearly makes things worse :-(

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#44

Robert Haas

robertmhaas@gmail.com

over 5 years ago

In reply to: Tomas Vondra (#41)

Re: POC: GROUP BY optimization

On Sat, May 16, 2020 at 10:56 AM Tomas Vondra
<tomas.vondra@2ndquadrant.com> wrote:

The local effects are trivial - it's for example reordering the pathkeys
to make the explicit sort as cheap as possible. This thread already
discussed a number of things to base this on - ndistinct for columns,
cost of comparison function, ... In any case, this is something we can
decide locally, when building the grouping paths.

The global effects are much harder to tackle, because the decision can't
be made locally when building the grouping paths. It requires changes
both below and above the point where we build grouping paths.

An example of a decision we need to make before we even get to building
a grouping path is which index paths to build. Currently we only build
index paths with "useful" pathkeys, and without tweaking that we'll
never even see the index in add_paths_to_grouping_rel().

But there are also decisions that can be made only after we build the
grouping paths. For example, we may have both GROUP BY and ORDER BY, and
there is no "always correct" way to combine those. In some cases it may
be correct to use the same pathkeys, in other cases it's better to use
different ones (which will require an extra Sort, with additional cost).

So I don't think there will be a single "interesting" grouping pathkeys
(i.e. root->group_pathkeys), but a collection of pathkeys. And we'll
need to build grouping paths for all of those, and leave the planner to
eventually pick the one giving us the cheapest plan ...

I agree with all of this and I think it's really good analysis. Part
of the reason why the planner isn't that sophisticated in this area is
that, for a long time, we didn't use paths at this level, and so it
was much harder to write any kind of viable patch to consider
alternatives. With Tom's planner path-ification word there should be a
lot more potential for optimization here, but, as you say, we need to
do that by leveraging the existing costing machinery, not just via
simple heuristics. It also strikes me that one of the problems in this
area is that the statistics we currently gather don't seem to be
entirely useful or reliable for aggregate planning. I wonder if there
are extensions to the extended statistics mechanism, or even just more
things we should gather during a routine ANALYZE, that would enable us
to estimate things better here. The most obvious thing is that
n_distinct is often wildly inaccurate, but it's deeper than that. For
instance, we need some way to estimate how many groups you're going to
get when you filter on a and then group by b that doesn't assume
uniform distribution. And we need also need something that doesn't
just output a number of groups, but gives you some inkling of the
sizes of those groups: 1 giant group and a bunch of little ones isn't
the same as a bunch of equally sized groups. I don't know that these
are things we really have much chance of figuring out with the
currently-available information, though.

Sorry if this is hijacking the thread a bit; I don't mean to
discourage work on this specific patch. I'm just wondering if we need
to think a little bigger to see our way to a good solution.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#45

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 5 years ago

In reply to: Robert Haas (#44)

Re: POC: GROUP BY optimization

On Mon, Jun 22, 2020 at 11:50:49AM -0400, Robert Haas wrote:

On Sat, May 16, 2020 at 10:56 AM Tomas Vondra
<tomas.vondra@2ndquadrant.com> wrote:

The local effects are trivial - it's for example reordering the
pathkeys to make the explicit sort as cheap as possible. This thread
already discussed a number of things to base this on - ndistinct for
columns, cost of comparison function, ... In any case, this is
something we can decide locally, when building the grouping paths.

The global effects are much harder to tackle, because the decision
can't be made locally when building the grouping paths. It requires
changes both below and above the point where we build grouping paths.

An example of a decision we need to make before we even get to
building a grouping path is which index paths to build. Currently we
only build index paths with "useful" pathkeys, and without tweaking
that we'll never even see the index in add_paths_to_grouping_rel().

But there are also decisions that can be made only after we build the
grouping paths. For example, we may have both GROUP BY and ORDER BY,
and there is no "always correct" way to combine those. In some cases
it may be correct to use the same pathkeys, in other cases it's
better to use different ones (which will require an extra Sort, with
additional cost).

So I don't think there will be a single "interesting" grouping
pathkeys (i.e. root->group_pathkeys), but a collection of pathkeys.
And we'll need to build grouping paths for all of those, and leave
the planner to eventually pick the one giving us the cheapest plan
...

I agree with all of this and I think it's really good analysis. Part
of the reason why the planner isn't that sophisticated in this area is
that, for a long time, we didn't use paths at this level, and so it
was much harder to write any kind of viable patch to consider
alternatives. With Tom's planner path-ification word there should be
a lot more potential for optimization here, but, as you say, we need
to do that by leveraging the existing costing machinery, not just via
simple heuristics.

Agreed.

It also strikes me that one of the problems in this area is that the
statistics we currently gather don't seem to be entirely useful or
reliable for aggregate planning.

I don't think that's an immediate issue. I don't think anyone started
implementing these optimizations and decided not to do that because of
lack of statistics.

The reason why we don't have those optimizations is more that we decided
not to even consider those optimizations, possibly because of planner
limitations. Of course, the optimizations may require additional stats,
but that's only step #2.

I wonder if there are extensions to the extended statistics mechanism,
or even just more things we should gather during a routine ANALYZE,
that would enable us to estimate things better here. The most obvious
thing is that n_distinct is often wildly inaccurate, but it's deeper
than that. For instance, we need some way to estimate how many groups
you're going to get when you filter on a and then group by b that
doesn't assume uniform distribution. And we need also need something
that doesn't just output a number of groups, but gives you some inkling
of the sizes of those groups: 1 giant group and a bunch of little ones
isn't the same as a bunch of equally sized groups. I don't know that
these are things we really have much chance of figuring out with the
currently-available information, though.

Not sure. I think the extended stats we have now (ndistinct coeffs,
multi-column MCV lists) should be good basis for these decisions, even
with skewed data. Of course, we may need to add something else, but I'm
not sure what would that be.

The main limitation of extended stats is it's at the per-table level.
Once you do a join, it's all over - we don't have that capability :-(

Sorry if this is hijacking the thread a bit; I don't mean to
discourage work on this specific patch. I'm just wondering if we need
to think a little bigger to see our way to a good solution.

I think it's fine continuing on this optimization. Either we'll be able
to get it done with existing stats, or we'll find what other stats we
need to sufficiently good heuristics.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#46

Pavel Borisov

pashkin.elfe@gmail.com

about 5 years ago

In reply to: Tomas Vondra (#39)

1 attachment(s)

Re: POC: GROUP BY optimization

I wonder if anyone has plans to try again with this optimization in v14
cycle? The patches no longer apply thanks to the incremental sort patch,
but I suppose fixing that should not be extremely hard.

I found this feature interesting and I'd like to join.

PFA the updated version of the patch made fully compatible with changes in
v13 and I suppose it is ready to be reviewed for v14.

Main things changed:
1. Patch is made compatible with new lists representation ( 1cff1b95ab6d )
2. Patch is made compatible with the incremental sort ( d2d8a229bc58 and
ba3e76cc571e )
3. Tests are changed to represent changes in keys numbering and
naming convention in the plan ( 55a1954da16 and 6ef77cf46e8 )

As always your ideas are very much welcome!

--
Best regards,
Pavel Borisov

Postgres Professional: http://postgrespro.com <http://www.postgrespro.com>

Attachments:

v11-0001-GROUP-BY-optimization-made-compatible-with-chang.patchapplication/octet-stream; name=v11-0001-GROUP-BY-optimization-made-compatible-with-chang.patchDownload

From 5d9c6119c19210714124eb6aa302dae00d6d19b9 Mon Sep 17 00:00:00 2001
From: Pavel Borisov <pashkin.elfe@gmail.com>
Date: Mon, 26 Oct 2020 11:47:24 +0400
Subject: [PATCH v11] GROUP BY optimization made compatible with changes in PG
 v13

    1. with changes in naming and numbering of keys in a plan
     ( 55a1954da16 and 6ef77cf46e8 )
    2. with list processing functions changes
     ( 1cff1b95ab6d )
    3. with incremental sort
     ( d2d8a229bc58 and ba3e76cc571e )
---
 .../postgres_fdw/expected/postgres_fdw.out    | 199 +++++++---
 src/backend/optimizer/path/costsize.c         | 333 +++++++++++++++--
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 347 ++++++++++++++++++
 src/backend/optimizer/plan/planner.c          | 124 +++++--
 src/backend/optimizer/util/pathnode.c         |   2 +-
 src/backend/utils/adt/selfuncs.c              |  35 +-
 src/backend/utils/misc/guc.c                  |  29 ++
 src/include/optimizer/cost.h                  |   4 +-
 src/include/optimizer/paths.h                 |  13 +
 src/include/utils/selfuncs.h                  |   3 +
 src/test/regress/expected/aggregates.out      | 239 +++++++++++-
 .../regress/expected/incremental_sort.out     |   2 +-
 src/test/regress/expected/join.out            |  51 ++-
 .../regress/expected/partition_aggregate.out  | 136 ++++---
 src/test/regress/expected/partition_join.out  |  79 ++--
 src/test/regress/expected/union.out           |  60 ++-
 src/test/regress/sql/aggregates.sql           |  97 +++++
 src/test/regress/sql/incremental_sort.sql     |   2 +-
 19 files changed, 1468 insertions(+), 300 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 2d88d06358..b6427f323e 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -1048,13 +1048,15 @@ ANALYZE ft5;
 -- join two tables
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                                                                                       QUERY PLAN                                                                                                       
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                        QUERY PLAN                                                                                        
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.c1, t2.c1, t1.c3
+         Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+         Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+(6 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
  c1  | c1  
@@ -1074,13 +1076,15 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 -- join three tables
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10;
-                                                                                                                                   QUERY PLAN                                                                                                                                    
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                     QUERY PLAN                                                                                                                     
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c2, t3.c3, t1.c3
-   Relations: ((public.ft1 t1) INNER JOIN (public.ft2 t2)) INNER JOIN (public.ft4 t3)
-   Remote SQL: SELECT r1."C 1", r2.c2, r4.c3, r1.c3 FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) INNER JOIN "S 1"."T 3" r4 ON (((r1."C 1" = r4.c1)))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 10::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.c1, t2.c2, t3.c3, t1.c3
+         Relations: ((public.ft1 t1) INNER JOIN (public.ft2 t2)) INNER JOIN (public.ft4 t3)
+         Remote SQL: SELECT r1."C 1", r2.c2, r4.c3, r1.c3 FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) INNER JOIN "S 1"."T 3" r4 ON (((r1."C 1" = r4.c1)))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+(6 rows)
 
 SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10;
  c1 | c2 |   c3   
@@ -1687,13 +1691,33 @@ ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw');
 -- tests whole-row reference for row marks
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1;
-                                                                                                                                                                                                                           QUERY PLAN                                                                                                                                                                                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                               QUERY PLAN                                                                                                                                                                                                                
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR UPDATE OF r1
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1;
  c1  | c1  
@@ -1712,13 +1736,33 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;
-                                                                                                                                                                                                                                    QUERY PLAN                                                                                                                                                                                                                                    
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                                        QUERY PLAN                                                                                                                                                                                                                        
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR UPDATE OF r1 FOR UPDATE OF r2
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1 FOR UPDATE OF r2
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;
  c1  | c1  
@@ -1738,13 +1782,33 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 -- join two tables with FOR SHARE clause
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1;
-                                                                                                                                                                                                                           QUERY PLAN                                                                                                                                                                                                                           
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                               QUERY PLAN                                                                                                                                                                                                               
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR SHARE OF r1
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1;
  c1  | c1  
@@ -1763,13 +1827,33 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
-                                                                                                                                                                                                                                   QUERY PLAN                                                                                                                                                                                                                                   
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                                       QUERY PLAN                                                                                                                                                                                                                       
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR SHARE OF r1 FOR SHARE OF r2
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1 FOR SHARE OF r2
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
  c1  | c1  
@@ -1823,13 +1907,15 @@ WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t
 -- ctid with whole-row reference
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.ctid, t1, t2, t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                                                                                                                                                                                                  QUERY PLAN                                                                                                                                                                                                                   
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                   QUERY PLAN                                                                                                                                                                                                    
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1.ctid, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END, r1."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3
+         Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+         Remote SQL: SELECT r1.ctid, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END, r1."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+(6 rows)
 
 -- SEMI JOIN, not pushed down
 EXPLAIN (VERBOSE, COSTS OFF)
@@ -1900,13 +1986,15 @@ SELECT t1.c1 FROM ft1 t1 WHERE NOT EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2
 -- CROSS JOIN can be pushed down
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
-                                                                                           QUERY PLAN                                                                                            
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                            QUERY PLAN                                                                             
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) ORDER BY r1."C 1" ASC NULLS LAST, r2."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.c1, t2.c1
+         Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+         Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) ORDER BY r1."C 1" ASC NULLS LAST, r2."C 1" ASC NULLS LAST
+(6 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
  c1 | c1  
@@ -2610,16 +2698,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
- Sort
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Foreign Scan
    Output: (count(c2)), c2, 5, 7.0, 9
-   Sort Key: ft1.c2
-   ->  Foreign Scan
-         Output: (count(c2)), c2, 5, 7.0, 9
-         Relations: Aggregate on (public.ft1)
-         Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
-(7 rows)
+   Relations: Aggregate on (public.ft1)
+   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+(4 rows)
 
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
   w  | x | y |  z  
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 733f7ea543..11e56fdef2 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1656,6 +1656,289 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 									rterm->pathtarget->width);
 }
 
+/*
+ * is_fake_var
+ *		Workaround for generate_append_tlist() which generates fake Vars with
+ *		varno == 0, that will cause a fail of estimate_num_group() call
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ *		Returns relative complexity of comparing two values based on it's width.
+ * The idea behind - long values have more expensive comparison. Return value is
+ * in cpu_operator_cost unit.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double	width = -1.0; /* fake value */
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresponding relation */
+	if (IsA(expr, Var))
+	{
+		Var		*var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo	*rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int	ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, use estimation by type */
+	if (width < 0.0)
+	{
+		Node	*node = (Node*) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Any value in pgsql is passed by Datum type, so any operation with value
+	 * could not be cheaper than operation with Datum type
+	 */
+	if (width <= sizeof(Datum))
+		return 1.0;
+
+	/*
+	 * Seems, cost of comparision is not directly proportional to args width,
+	 * because comparing args could be differ width (we known only average over
+	 * column) and difference often could be defined only by looking on first
+	 * bytes. So, use log16(width) as estimation.
+	 */
+	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ *		compute CPU cost of sort (i.e. in-memory)
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys.  In this case, it will fallback to
+ * simple comparison cost estimate.
+ *
+ * Estimation algorithm is based on ideas from course Algorithms,
+ * Robert Sedgewick, Kevin Wayne, https://algs4.cs.princeton.edu/home/ and paper
+ * "Quicksort Is Optimal For Many Equal Keys", Sebastian Wild,
+ * arXiv:1608.04906v4 [cs.DS] 1 Nov 2017.
+ *
+ * In term of that papers, let N - number of tuples, Xi - number of tuples with
+ * key Ki, then estimation is:
+ * log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ * In our case all Xi are the same because now we don't have an estimation of
+ * group sizes, we have only estimation of number of groups. In this case,
+ * formula becomes: N * log(NumberOfGroups). Next, to support correct estimation
+ * of multi-column sort we need separately compute each column, so, let k is a
+ * column number, Gk - number of groups  defined by k columns:
+ * N * sum( Fk * log(Gk) )
+ * Fk is a function costs (including width) for k columns.
+ */
+
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+					  Cost comparison_cost, double tuples, double output_tuples,
+					  bool heapSort)
+{
+	Cost		per_tuple_cost = 0.0;
+	ListCell	*lc;
+	List		*pathkeyExprs = NIL;
+	double		tuplesPerPrevGroup = tuples;
+	double		totalFuncCost = 1.0;
+	bool		has_fake_var = false;
+	int			i = 0;
+	Oid			prev_datatype = InvalidOid;
+	Cost		funcCost = 0.;
+	List		*cache_varinfos = NIL;
+
+	/* fallback if pathkeys is unknown */
+	if (list_length(pathkeys) == 0)
+	{
+		/*
+		 * If we'll use a bounded heap-sort keeping just K tuples in memory, for
+		 * a total number of tuple comparisons of N log2 K; but the constant
+		 * factor is a bit higher than for quicksort.  Tweak it so that the
+		 * cost curve is continuous at the crossover point.
+		 */
+		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+		/* add cost provided by caller */
+		per_tuple_cost += comparison_cost;
+
+		return per_tuple_cost * tuples;
+	}
+
+	/*
+	 * Computing total cost of sorting takes into account:
+	 * - per column comparison function cost
+	 * - we try to compute needed number of comparison per column
+	 */
+
+	foreach(lc, pathkeys)
+	{
+		PathKey				*pathkey = (PathKey*)lfirst(lc);
+		EquivalenceMember	*em;
+		double				 nGroups,
+							 correctedNGroups;
+
+		/*
+		 * We believe than equivalence members aren't very  different, so, to
+		 * estimate cost we take just first member
+		 */
+		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+		if (em->em_datatype != InvalidOid)
+		{
+			/* do not lookup funcCost if data type is the same as previous */
+			if (prev_datatype != em->em_datatype)
+			{
+				Oid			sortop;
+				QualCost	cost;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+
+				cost.startup = 0;
+				cost.per_tuple = 0;
+				add_function_cost(root, get_opcode(sortop), NULL, &cost);
+				/* we need procost, not product of procost and cpu_operator_cost */
+				funcCost = cost.per_tuple / cpu_operator_cost;
+				prev_datatype = em->em_datatype;
+			}
+		}
+		else
+			funcCost = 1.0; /* fallback */
+
+		/* Try to take into account actual width fee */
+		funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+		totalFuncCost += funcCost;
+
+		/* remeber if we have a fake var in pathkeys */
+		has_fake_var |= is_fake_var(em->em_expr);
+		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+		/*
+		 * Prevent call estimate_num_groups() with fake Var. Note,
+		 * pathkeyExprs contains only previous columns
+		 */
+		if (has_fake_var == false)
+			/*
+			 * Recursively compute number of group in group from previous step
+			 */
+			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+													  tuplesPerPrevGroup, NULL,
+													  &cache_varinfos,
+													  list_length(pathkeyExprs) - 1);
+		else if (tuples > 4.0)
+			/*
+			 * Use geometric mean as estimation if there is no any stats.
+			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
+			 * column while here we try to estimate number of groups over
+			 * set of columns.
+			 */
+			nGroups = ceil(2.0 + sqrt(tuples) *
+				list_length(pathkeyExprs) / list_length(pathkeys));
+		else
+			nGroups = tuples;
+
+		/*
+		 * Presorted keys aren't participated in comparison but still checked
+		 * by qsort comparator.
+		 */
+		if (i >= nPresortedKeys)
+		{
+			if (heapSort)
+			{
+				if (tuplesPerPrevGroup < output_tuples)
+					/* comparing only inside output_tuples */
+					correctedNGroups =
+						ceil(2.0 * output_tuples / (tuplesPerPrevGroup / nGroups));
+				else
+					/* two groups - in output and out */
+					correctedNGroups = 2.0;
+			}
+			else
+				correctedNGroups = nGroups;
+
+			if (correctedNGroups <= 1.0)
+				correctedNGroups = 2.0;
+			else
+				correctedNGroups = ceil(correctedNGroups);
+			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+		}
+
+		i++;
+
+		/*
+		 * Real-world distribution isn't uniform but now we don't have a way to
+		 * determine that, so, add multiplier to get closer to worst case.
+		 */
+		tuplesPerPrevGroup = ceil(1.5 * tuplesPerPrevGroup / nGroups);
+
+		/*
+		 * We could skip all followed columns for cost estimation, because we
+		 * believe that tuples are unique by set ot previous columns
+		 */
+		if (tuplesPerPrevGroup <= 1.0)
+			break;
+	}
+
+	list_free(pathkeyExprs);
+
+	/* per_tuple_cost is in cpu_operator_cost units */
+	per_tuple_cost *= cpu_operator_cost;
+
+	/*
+	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E.
+	 * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation
+	 * formula has additional term proportional to number of tuples (See Chapter
+	 * 8.2 and Theorem 4.1). It has meaning with low number of tuples,
+	 * approximately less that 1e4. Of course, it could be implemented as
+	 * additional multiplier under logarithm, but use more complicated formula
+	 * which takes into account number of unique tuples and it isn't clear how
+	 * to combine multiplier with groups. Estimate it as 10 in cpu_operator_cost
+	 * unit.
+	 */
+	per_tuple_cost += 10 * cpu_operator_cost;
+
+	per_tuple_cost += comparison_cost;
+
+	return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+				   double tuples)
+{
+	return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+								0, tuples, tuples, false);
+}
+
 /*
  * cost_tuplesort
  *	  Determines and returns the cost of sorting a relation using tuplesort,
@@ -1672,7 +1955,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * number of initial runs formed and M is the merge order used by tuplesort.c.
  * Since the average initial run should be about sort_mem, we have
  *		disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *		cpu = comparison_cost * t * log2(t)
+ * 		and cpu cost (computed by compute_cpu_sort_cost()).
  *
  * If the sort is bounded (i.e., only the first k result tuples are needed)
  * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1691,9 +1974,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * 'comparison_cost' is the extra cost per comparison, if any
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later.
  */
 static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
 			   double limit_tuples)
@@ -1710,9 +1995,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
-
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
 	{
@@ -1736,12 +2018,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 		double		log_runs;
 		double		npageaccesses;
 
-		/*
-		 * CPU costs
-		 *
-		 * Assume about N log2 N comparisons
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		/* CPU costs */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 
 		/* Disk costs */
 
@@ -1757,18 +2037,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	}
 	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
 	{
-		/*
-		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
-		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		/* We'll use a bounded heap-sort keeping just K tuples in memory. */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  output_tuples, true);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 	}
 
 	/*
@@ -1801,8 +2080,8 @@ cost_incremental_sort(Path *path,
 					  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 					  double limit_tuples)
 {
-	Cost		startup_cost = 0,
-				run_cost = 0,
+	Cost		startup_cost,
+				run_cost,
 				input_run_cost = input_total_cost - input_startup_cost;
 	double		group_tuples,
 				input_groups;
@@ -1886,7 +2165,7 @@ cost_incremental_sort(Path *path,
 	 * pessimistic about incremental sort performance and increase its average
 	 * group size by half.
 	 */
-	cost_tuplesort(&group_startup_cost, &group_run_cost,
+	cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
 				   1.5 * group_tuples, width, comparison_cost, sort_mem,
 				   limit_tuples);
 
@@ -1894,7 +2173,7 @@ cost_incremental_sort(Path *path,
 	 * Startup cost of incremental sort is the startup cost of its first group
 	 * plus the cost of its input.
 	 */
-	startup_cost += group_startup_cost
+	startup_cost = group_startup_cost
 		+ input_startup_cost + group_input_run_cost;
 
 	/*
@@ -1903,7 +2182,7 @@ cost_incremental_sort(Path *path,
 	 * group, plus the total cost to process the remaining groups, plus the
 	 * remaining cost of input.
 	 */
-	run_cost += group_run_cost
+	run_cost = group_run_cost
 		+ (group_run_cost + group_startup_cost) * (input_groups - 1)
 		+ group_input_run_cost * (input_groups - 1);
 
@@ -1943,7 +2222,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	Cost		startup_cost;
 	Cost		run_cost;
 
-	cost_tuplesort(&startup_cost, &run_cost,
+	cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
 				   limit_tuples);
@@ -2041,7 +2320,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2109,7 +2388,7 @@ cost_append(AppendPath *apath)
 					 * any child.
 					 */
 					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
+							  root,
 							  pathkeys,
 							  subpath->total_cost,
 							  subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 690b753369..16a0648e13 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -679,7 +679,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ce9bf87e9b..b900b84d3f 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,16 +17,19 @@
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
 #include "access/stratnum.h"
 #include "catalog/pg_opfamily.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -334,6 +337,312 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/************************<DEBUG PART>*************************************/
+bool debug_group_by_reorder_by_pathkeys = true;
+bool debug_group_by_match_order_by = true;
+bool debug_cheapest_group_by = true;
+/************************</DEBUG PART>************************************/
+
+/*
+ * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
+ * returns new lists,  original GROUP BY lists stay untouched.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List		*new_group_pathkeys= NIL,
+				*new_group_clauses = NIL;
+	ListCell	*key;
+	int			n;
+
+	if (debug_group_by_reorder_by_pathkeys == false)
+		return 0;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * For each pathkey it tries to find corresponding GROUP BY pathkey and
+	 * clause.
+	 */
+	foreach(key, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(key);
+		SortGroupClause	*sgc;
+
+		/*
+		 * Pathkey should use the same allocated struct, so, equiality of
+		 * pointers is enough
+		 */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	n = list_length(new_group_pathkeys);
+
+	/*
+	 * Just append the rest of pathkeys and clauses
+	 */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+												 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+typedef struct MutatorState {
+	List		*elemsList;
+	ListCell	**elemCells;
+	void		**elems;
+	int			*positions;
+	int			 mutatorNColumns;
+	int			 count;
+} MutatorState;
+
+static void
+initMutator(MutatorState *state, List *elems, int start, int end)
+{
+	int i;
+	int			n = end - start;
+	ListCell	*lc;
+
+	memset(state, 0, sizeof(*state));
+
+	state->mutatorNColumns = n;
+
+	state->elemsList = list_copy(elems);
+
+	state->elems = palloc(sizeof(void*) * n);
+	state->elemCells = palloc(sizeof(ListCell*) * n);
+	state->positions = palloc(sizeof(int) * n);
+
+	i = 0;
+	for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+	{
+		state->elemCells[i] = lc;
+		state->elems[i] = lfirst(lc);
+		state->positions[i] = i + 1;
+		i++;
+		if (i >= n)
+			break;
+	}
+}
+
+static void
+swap(int *a, int i, int j)
+{
+  int s = a[i];
+
+  a[i] = a[j];
+  a[j] = s;
+}
+
+static bool
+getNextSet(int *a, int n)
+{
+	int j, k, l, r;
+
+	j = n - 2;
+	while (j >= 0 && a[j] >= a[j + 1])
+		j--;
+	if (j < 0)
+		return false;
+
+	k = n - 1;
+	while (k >= 0 && a[j] >= a[k])
+		k--;
+	swap(a, j, k);
+
+	l = j + 1;
+	r = n - 1;
+	while (l < r)
+		swap(a, l++, r--);
+
+
+	return true;
+}
+
+static List*
+doMutator(MutatorState *state)
+{
+	int	i;
+
+	state->count++;
+
+	/* first set is original set */
+	if (state->count == 1)
+		return state->elemsList;
+
+	if (getNextSet(state->positions, state->mutatorNColumns) == false)
+	{
+		pfree(state->elems);
+		pfree(state->elemCells);
+		pfree(state->positions);
+		list_free(state->elemsList);
+
+		return NIL;
+	}
+
+	for(i=0; i<state->mutatorNColumns; i++)
+		lfirst(state->elemCells[i]) =
+			(void*) state->elems[ state->positions[i] - 1 ];
+
+	return state->elemsList;
+}
+
+typedef struct {
+	Cost cost;
+	PathKey *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+	const PathkeySortCost *a = (PathkeySortCost *) _a;
+	const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+	if (a->cost < b->cost)
+		return -1;
+	else if (a->cost == b->cost)
+		return 0;
+	return 1;
+}
+/*
+ * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
+ * speedup sorting. Returns newly allocated lists, old ones stay untouched.
+ * n_preordered defines a head of list which order should be prevented.
+ */
+void
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	List		   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL,
+				   *var_group_pathkeys;
+
+	ListCell	   *cell;
+	MutatorState	mstate;
+	double			cheapest_sort_cost = -1.0;
+
+	int nFreeKeys;
+	int nToPermute;
+	int i;
+
+	if (list_length(*group_pathkeys) - n_preordered < 2)
+		return; /* nothing to do */
+
+	/*
+	 * Will try to match ORDER BY pathkeys in hope that one sort is cheaper than
+	 * two
+	 */
+	if (debug_group_by_match_order_by &&
+		n_preordered == 0 && root->sort_pathkeys)
+	{
+		bool _save_debug_group_by_reorder_by_pathkeys =
+			debug_group_by_reorder_by_pathkeys;  /* DEBUG ONLY, to be removed */
+
+		debug_group_by_reorder_by_pathkeys = true;
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  group_pathkeys,
+													  group_clauses);
+		debug_group_by_reorder_by_pathkeys = _save_debug_group_by_reorder_by_pathkeys;
+
+		if (list_length(*group_pathkeys) - n_preordered < 2)
+			return; /* nothing to do */
+	}
+
+	/*
+	 * Try all permutations of at most 4 cheapeast pathkeys.
+	 */
+	nFreeKeys = list_length(*group_pathkeys) - n_preordered;
+	nToPermute = 4;
+	if (nFreeKeys > nToPermute)
+	{
+		/*
+		 * Sort the remaining pathkeys cheapest first.
+		 */
+		PathkeySortCost *costs = palloc(sizeof(*costs) * nFreeKeys);
+		cell = list_nth_cell(*group_pathkeys, n_preordered);
+		for (i = 0; cell != NULL; i++, (cell = lnext(*group_pathkeys, cell)))
+		{
+			List *to_cost = list_make1(lfirst(cell));
+
+			Assert(i < nFreeKeys);
+
+			costs[i].pathkey = lfirst(cell);
+			costs[i].cost = cost_sort_estimate(root, to_cost, 0, nrows);
+
+			pfree(to_cost);
+		}
+		qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+		/* Construct the sorted list. First, the preordered pathkeys. */
+		new_group_pathkeys = list_truncate(list_copy(*group_pathkeys), n_preordered);
+
+		/* The rest, ordered by increasing cost */
+		for (i = 0; i < nFreeKeys; i++)
+			new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+		pfree(costs);
+	}
+	else
+	{
+		/*
+		 * Since v13 list_free() can clean list elements so for original list not to be modified it should be copied to
+		 * a new one which can then be cleaned safely if needed.
+		 */
+		new_group_pathkeys = list_copy(*group_pathkeys);
+		nToPermute = nFreeKeys;
+	}
+
+	if (!debug_cheapest_group_by)
+		return;
+
+	initMutator(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
+
+	while((var_group_pathkeys = doMutator(&mstate)) != NIL)
+	{
+		Cost	cost;
+
+		cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+		if (cost < cheapest_sort_cost || cheapest_sort_cost < 0)
+		{
+			list_free(new_group_pathkeys);
+			new_group_pathkeys = list_copy(var_group_pathkeys);
+			cheapest_sort_cost = cost;
+		}
+	}
+
+	/*
+	 * repeat order of pathkeys for clauses
+	 */
+	foreach(cell, new_group_pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		new_group_clauses = lappend(new_group_clauses,
+						get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+												*group_clauses));
+	}
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+											   *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1862,6 +2171,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered, so we don't bother about actual order in
+ * pathkeys
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	if (root->group_pathkeys == NIL)
+		return 0;				/* no special ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1876,6 +2218,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1911,6 +2256,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 986d7a52e3..5f9dd2835c 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6523,21 +6523,49 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
 			bool		is_sorted;
-			int			presorted_keys;
+			int			presorted_keys = 0;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups = 0;
+
+			if (parse->groupingSets)
+			{
+				/*
+				 * prevent group pathkey reordering, just check the same
+				 * order paths pathkeys and group pathkeys
+				 */
+				is_sorted = pathkeys_count_contained_in(group_pathkeys,
+												  path->pathkeys,
+												  &presorted_keys);
+			}
+			else
+			{
+				n_preordered_groups =
+						group_keys_reorder_by_pathkeys(path->pathkeys,
+													   &group_pathkeys,
+													   &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
+			}
 
 			if (path == cheapest_path || is_sorted)
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
 				if (!is_sorted)
+				{
+					if (!parse->groupingSets)
+						get_cheapest_group_keys_order(root,
+													  path->rows,
+													  &group_pathkeys,
+													  &group_clauses,
+													  n_preordered_groups);
+
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6557,14 +6585,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
 				}
-				else if (parse->groupClause)
+				else if (group_clauses)
 				{
 					/*
 					 * We have GROUP BY without aggregation or grouping sets.
@@ -6574,7 +6602,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6670,11 +6698,18 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				Path	   *path = (Path *) lfirst(lc);
 				Path	   *path_original = path;
 				bool		is_sorted;
-				int			presorted_keys;
+				int			presorted_keys = 0;
+				List	   *group_pathkeys = root->group_pathkeys,
+						   *group_clauses = parse->groupClause;
+				int			n_preordered_groups;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+				is_sorted = pathkeys_count_contained_in(group_pathkeys,
 														path->pathkeys,
 														&presorted_keys);
+				n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																	 &group_pathkeys,
+																	 &group_clauses);
+				is_sorted = is_sorted || (n_preordered_groups == list_length(group_pathkeys));
 
 				/*
 				 * Insert a Sort node, if required.  But there's no point in
@@ -6684,10 +6719,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 				}
 
@@ -6697,9 +6737,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6708,7 +6748,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 
@@ -6973,18 +7013,32 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys)) ||
+				pathkeys_contained_in(group_pathkeys, path->pathkeys);
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_total_path || is_sorted)
 			{
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_path(partially_grouped_rel, (Path *)
@@ -6992,9 +7046,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 											 partially_grouped_rel,
 											 path,
 											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 NIL,
 											 agg_partial_costs,
 											 dNumPartialGroups));
@@ -7003,7 +7057,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 							 create_group_path(root,
 											   partially_grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   NIL,
 											   dNumPartialGroups));
 			}
@@ -7076,21 +7130,37 @@ create_partial_grouping_paths(PlannerInfo *root,
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
 			bool		is_sorted;
-			int			presorted_keys;
+			int			presorted_keys = 0;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+			is_sorted = pathkeys_count_contained_in(group_pathkeys,
 													path->pathkeys,
 													&presorted_keys);
 
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = is_sorted || (n_preordered_groups == list_length(group_pathkeys));
+
 			if (path == cheapest_partial_path || is_sorted)
 			{
+
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
@@ -7098,9 +7168,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 group_clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 group_clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -7109,7 +7179,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   group_clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 5281a2f998..5bef994eeb 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1315,7 +1315,7 @@ create_append_path(PlannerInfo *root,
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index bec357fcef..3e61b7ac65 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3292,7 +3292,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 }
 
 /*
- * estimate_num_groups		- Estimate number of groups in a grouped query
+ * estimate_num_groups/estimate_num_groups_incremental
+ *		- Estimate number of groups in a grouped query.
+ *		  _incremental variant is performance optimization for
+ *		  case of adding one-by-one column
  *
  * Given a query having a GROUP BY clause, estimate how many groups there
  * will be --- ie, the number of distinct combinations of the GROUP BY
@@ -3360,11 +3363,20 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, NULL, 0);
+}
+
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows,
+					List **pgset, List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i, j;
 
 	/*
 	 * We don't ever want to return an estimate of zero groups, as that tends
@@ -3391,7 +3403,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3400,6 +3412,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++; /* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3461,7 +3481,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3478,6 +3502,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a62d64eaa4..75debf981f 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2036,6 +2036,35 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+/************************<DEBUG OPT GROUP BY>*********************************/
+	{
+		{"debug_group_by_reorder_by_pathkeys", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reorder GROUP BY by pathkeys"),
+			NULL
+		},
+		&debug_group_by_reorder_by_pathkeys,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_group_by_match_order_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable matching GROUP BY by ORDER BY."),
+			NULL
+		},
+		&debug_group_by_match_order_by,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("find a cheapest order of columns in GROUP BY."),
+			NULL
+		},
+		&debug_cheapest_group_by,
+		true,
+		NULL, NULL, NULL
+	},
+/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 6141654e47..22ac1c3522 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -107,7 +107,9 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *path);
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+							   int nPresortedKeys, double tuples);
+extern void cost_append(AppendPath *path, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 10b6e81079..fc7d698dca 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -189,6 +189,19 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
+/************************<DEBUG OPT GROUP BY>*********************************/
+extern bool debug_group_by_reorder_by_pathkeys;
+extern bool debug_group_by_match_order_by;
+extern bool debug_cheapest_group_by;
+/************************</DEBUG OPT GROUP BY>********************************/
+extern void get_cheapest_group_keys_order(PlannerInfo *root,
+										  double nrows,
+										  List **group_pathkeys,
+										  List **group_clauses,
+										  int	n_preordered);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 7ac4a06391..05e8bd9f3e 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -195,6 +195,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause,
 
 extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset);
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows, List **pgset,
+					List **cache_varinfos, int prevNExprs);
 
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 477fd1205c..127901cf2b 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1193,7 +1193,8 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+   Group Key: $0, $1
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1216,10 +1217,8 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+   ->  Result
+(25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -2401,6 +2400,236 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, d, c, v
+   ->  Sort
+         Sort Key: p, d, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index e376ea1276..19b07d07f1 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1411,7 +1411,7 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index a46b1573bd..7db55d25f5 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1932,8 +1932,8 @@ USING (name);
 ------+----+----
  bb   | 12 | 13
  cc   | 22 | 23
- dd   |    | 33
  ee   | 42 |   
+ dd   |    | 33
 (4 rows)
 
 -- Cases with non-nullable expressions in subquery results;
@@ -1967,8 +1967,8 @@ NATURAL FULL JOIN
 ------+------+------+------+------
  bb   |   12 |    2 |   13 |    3
  cc   |   22 |    2 |   23 |    3
- dd   |      |      |   33 |    3
  ee   |   42 |    2 |      |     
+ dd   |      |      |   33 |    3
 (4 rows)
 
 SELECT * FROM
@@ -4433,18 +4433,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
@@ -6026,44 +6028,39 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Unique
+   ->  HashAggregate
          Output: j3.id
-         ->  Sort
+         Group Key: j3.id
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(13 rows)
+(11 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Group
+   ->  HashAggregate
          Output: j3.id
          Group Key: j3.id
-         ->  Sort
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(14 rows)
+(11 rows)
 
 drop table j1;
 drop table j2;
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index 45c698daf4..9457854886 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -952,32 +952,30 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 --------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Gather
-         Workers Planned: 2
-         ->  Parallel Append
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml.a
-                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml.a
-                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_5.a
-                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_5.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_2.a
-                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_2.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(27 rows)
+   ->  Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(25 rows)
 
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
  a  | sum  |  array_agg  | count 
@@ -996,34 +994,32 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 -- Without ORDER BY clause, to test Gather at top-most path
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                                QUERY PLAN                                 
----------------------------------------------------------------------------
- Gather
-   Workers Planned: 2
-   ->  Parallel Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(25 rows)
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Append
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml.a
+         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml.a
+               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_2.a
+         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_2.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_5.a
+         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_5.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(23 rows)
 
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -1379,28 +1375,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
 -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
 EXPLAIN (COSTS OFF)
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize GroupAggregate
+   ->  Finalize HashAggregate
          Group Key: pagg_tab_para.y
          Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather Merge
+         ->  Gather
                Workers Planned: 2
-               ->  Sort
-                     Sort Key: pagg_tab_para.y
-                     ->  Parallel Append
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_1.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_2.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+               ->  Parallel Append
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_1.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_2.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
 
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
  y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 0057f41caa..e3fdfb3e31 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -466,52 +466,41 @@ EXPLAIN (COSTS OFF)
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
   GROUP BY 1, 2 ORDER BY 1, 2;
-                                                   QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Group
    Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Merge Append
+   ->  Sort
          Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Group
-               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
-                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1.a, prt1.b
-                                 ->  Seq Scan on prt1_p1 prt1
-                           ->  Sort
-                                 Sort Key: p2.a, p2.b
-                                 ->  Seq Scan on prt2_p1 p2
-         ->  Group
-               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_1.a, prt1_1.b
-                                 ->  Seq Scan on prt1_p2 prt1_1
-                           ->  Sort
-                                 Sort Key: p2_1.a, p2_1.b
-                                 ->  Seq Scan on prt2_p2 p2_1
-         ->  Group
-               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_2.a, prt1_2.b
-                                 ->  Seq Scan on prt1_p3 prt1_2
-                           ->  Sort
-                                 Sort Key: p2_2.a, p2_2.b
-                                 ->  Seq Scan on prt2_p3 p2_2
-(43 rows)
+         ->  Append
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_1.a, prt1_1.b
+                           ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Sort
+                           Sort Key: p2_1.a, p2_1.b
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_2.a, prt1_2.b
+                           ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Sort
+                           Sort Key: p2_2.a, p2_2.b
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a))
+                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_3.b, prt1_3.a
+                           ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Sort
+                           Sort Key: p2_3.b, p2_3.a
+                           ->  Seq Scan on prt2_p3 p2_3
+(32 rows)
 
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
@@ -1339,7 +1328,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1483,7 +1472,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 6e72e92d80..17629ac4de 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -923,24 +923,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2;
-                        QUERY PLAN                        
-----------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: (q2 IS NOT NULL)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: (q2 IS NOT NULL)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: (q2 IS NOT NULL)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: (q2 IS NOT NULL)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
@@ -959,24 +957,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2;
-                       QUERY PLAN                       
---------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                    QUERY PLAN                    
+--------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: ((- q1) = q2)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: ((- q1) = q2)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: ((- q1) = q2)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: ((- q1) = q2)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 54f5cf7ecc..d73d8a3c00 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -991,6 +991,103 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index 9c040c90e6..179cec2e7d 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -206,7 +206,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 
-- 
2.28.0

#47

Dmitry Dolgov

9erthalion6@gmail.com

about 5 years ago

In reply to: Pavel Borisov (#46)

Re: POC: GROUP BY optimization

On Mon, Oct 26, 2020 at 12:44:58PM +0400, Pavel Borisov wrote:

I wonder if anyone has plans to try again with this optimization in v14
cycle? The patches no longer apply thanks to the incremental sort patch,
but I suppose fixing that should not be extremely hard.

I found this feature interesting and I'd like to join.

PFA the updated version of the patch made fully compatible with changes in
v13 and I suppose it is ready to be reviewed for v14.

Main things changed:
1. Patch is made compatible with new lists representation ( 1cff1b95ab6d )
2. Patch is made compatible with the incremental sort ( d2d8a229bc58 and
ba3e76cc571e )
3. Tests are changed to represent changes in keys numbering and
naming convention in the plan ( 55a1954da16 and 6ef77cf46e8 )

As always your ideas are very much welcome!

Hi,

Thanks for your interest! FYI there is a new thread about this topic [1]/messages/by-id/CA+q6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag@mail.gmail.com
with the next version of the patch and more commentaries (I've created
it for visibility purposes, but probably it also created some confusion,
sorry for that).

[1]: /messages/by-id/CA+q6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag@mail.gmail.com

#48

Pavel Borisov

pashkin.elfe@gmail.com

about 5 years ago

In reply to: Dmitry Dolgov (#47)

Re: POC: GROUP BY optimization

Thanks for your interest! FYI there is a new thread about this topic [1]
with the next version of the patch and more commentaries (I've created
it for visibility purposes, but probably it also created some confusion,
sorry for that).

Thanks!

I made a very quick look at your updates and noticed that it is intended to
be simple and some parts of the code are removed as they have little test
coverage. I'd propose vice versa to increase test coverage to enjoy more
precise cost calculation and probably partial grouping.

Or maybe it's worth to benchmark both patches and then re-decide what we
want more to have a more complicated or a simpler version.

Good to know that this feature is not stuck anymore and we have more than
one proposal.
Thanks!

--
Best regards,
Pavel Borisov

Postgres Professional: http://postgrespro.com <http://www.postgrespro.com>

#49

Dmitry Dolgov

9erthalion6@gmail.com

about 5 years ago

In reply to: Pavel Borisov (#48)

Re: POC: GROUP BY optimization

On Mon, Oct 26, 2020 at 01:28:59PM +0400, Pavel Borisov wrote:

Thanks for your interest! FYI there is a new thread about this topic [1]
with the next version of the patch and more commentaries (I've created
it for visibility purposes, but probably it also created some confusion,
sorry for that).

Thanks!

I made a very quick look at your updates and noticed that it is intended to
be simple and some parts of the code are removed as they have little test
coverage. I'd propose vice versa to increase test coverage to enjoy more
precise cost calculation and probably partial grouping.

Or maybe it's worth to benchmark both patches and then re-decide what we
want more to have a more complicated or a simpler version.

Good to know that this feature is not stuck anymore and we have more than
one proposal.
Thanks!

Just to clarify, the patch that I've posted in another thread mentioned
above is not an alternative proposal, but a development of the same
patch I had posted in this thread. As mentioned in [1]/messages/by-id/CA+q6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag@mail.gmail.com, reduce of
functionality is an attempt to reduce the scope, and as soon as the base
functionality looks good enough it will be returned back.

[1]: /messages/by-id/CA+q6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag@mail.gmail.com

#50

Tomas Vondra

tomas.vondra@2ndquadrant.com

about 5 years ago

In reply to: Dmitry Dolgov (#49)

Re: POC: GROUP BY optimization

On Mon, Oct 26, 2020 at 11:40:40AM +0100, Dmitry Dolgov wrote:

On Mon, Oct 26, 2020 at 01:28:59PM +0400, Pavel Borisov wrote:

Thanks for your interest! FYI there is a new thread about this topic [1]
with the next version of the patch and more commentaries (I've created
it for visibility purposes, but probably it also created some confusion,
sorry for that).

Thanks!

I made a very quick look at your updates and noticed that it is intended to
be simple and some parts of the code are removed as they have little test
coverage. I'd propose vice versa to increase test coverage to enjoy more
precise cost calculation and probably partial grouping.

Or maybe it's worth to benchmark both patches and then re-decide what we
want more to have a more complicated or a simpler version.

Good to know that this feature is not stuck anymore and we have more than
one proposal.
Thanks!

Just to clarify, the patch that I've posted in another thread mentioned
above is not an alternative proposal, but a development of the same
patch I had posted in this thread. As mentioned in [1], reduce of
functionality is an attempt to reduce the scope, and as soon as the base
functionality looks good enough it will be returned back.

I find it hard to follow two similar threads trying to do the same (or
very similar) things in different ways. Is there any chance to join
forces and produce a single patch series merging the changes? With the
"basic" functionality at the beginning, then patches with the more
complex stuff. That's the usual way, I think.

As I said in my response on the other thread [1]/messages/by-id/20200901210743.lutgvnfzntvhuban@development, I think constructing
additional paths with alternative orderings of pathkeys is the right
approach. Otherwise we can't really deal with optimizations above the
place where we consider this optimization.

That's essentially what I was trying in explain May 16 response [2]/messages/by-id/20200516145609.vm7nrqy7frj4ha6r@development
when I actually said this:

So I don't think there will be a single "interesting" grouping
pathkeys (i.e. root->group_pathkeys), but a collection of pathkeys.
And we'll need to build grouping paths for all of those, and leave
the planner to eventually pick the one giving us the cheapest plan.

I wouldn't go as far as saying the approach in this patch (i.e. picking
one particular ordering) is doomed, but it's going to be very hard to
make it work reliably. Even if we get the costing *at this node* right,
who knows how it'll affect costing of the nodes above us?

So if I can suggest something, I'd merge the two patches, adopting the
path-based approach. With the very basic functionality/costing in the
first patch, and the more advanced stuff in additional patches.

Does that make sense?

regards

[1]: /messages/by-id/20200901210743.lutgvnfzntvhuban@development

[2]: /messages/by-id/20200516145609.vm7nrqy7frj4ha6r@development

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#51

Dmitry Dolgov

9erthalion6@gmail.com

about 5 years ago

In reply to: Tomas Vondra (#50)

Re: POC: GROUP BY optimization

On Tue, Oct 27, 2020 at 09:19:51PM +0100, Tomas Vondra wrote:
On Mon, Oct 26, 2020 at 11:40:40AM +0100, Dmitry Dolgov wrote:

On Mon, Oct 26, 2020 at 01:28:59PM +0400, Pavel Borisov wrote:

Thanks for your interest! FYI there is a new thread about this topic [1]
with the next version of the patch and more commentaries (I've created
it for visibility purposes, but probably it also created some confusion,
sorry for that).

Thanks!

I made a very quick look at your updates and noticed that it is intended to
be simple and some parts of the code are removed as they have little test
coverage. I'd propose vice versa to increase test coverage to enjoy more
precise cost calculation and probably partial grouping.

Or maybe it's worth to benchmark both patches and then re-decide what we
want more to have a more complicated or a simpler version.

Good to know that this feature is not stuck anymore and we have more than
one proposal.
Thanks!

Just to clarify, the patch that I've posted in another thread mentioned
above is not an alternative proposal, but a development of the same
patch I had posted in this thread. As mentioned in [1], reduce of
functionality is an attempt to reduce the scope, and as soon as the base
functionality looks good enough it will be returned back.

I find it hard to follow two similar threads trying to do the same (or
very similar) things in different ways. Is there any chance to join
forces and produce a single patch series merging the changes? With the
"basic" functionality at the beginning, then patches with the more
complex stuff. That's the usual way, I think.

As I said in my response on the other thread [1], I think constructing
additional paths with alternative orderings of pathkeys is the right
approach. Otherwise we can't really deal with optimizations above the
place where we consider this optimization.

That's essentially what I was trying in explain May 16 response [2]
when I actually said this:

So I don't think there will be a single "interesting" grouping
pathkeys (i.e. root->group_pathkeys), but a collection of pathkeys.
And we'll need to build grouping paths for all of those, and leave
the planner to eventually pick the one giving us the cheapest plan.

I wouldn't go as far as saying the approach in this patch (i.e. picking
one particular ordering) is doomed, but it's going to be very hard to
make it work reliably. Even if we get the costing *at this node* right,
who knows how it'll affect costing of the nodes above us?

So if I can suggest something, I'd merge the two patches, adopting the
path-based approach. With the very basic functionality/costing in the
first patch, and the more advanced stuff in additional patches.

Does that make sense?

Yes, and from what I understand it's already what had happened in the
newer thread [1]/messages/by-id/CA+q6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag@mail.gmail.com. To avoid any confusion, there are no "two patches" at
least from my side, and what I've posted in [1]/messages/by-id/CA+q6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag@mail.gmail.com is the continuation of
this work, but with path-based approach adopted and a bit less
functionality (essentially I've dropped everything what was not covered
with tests in the original patch).

In case if I'm missing something and Pavel's proposal is significantly
different from the original patch (if I understand correctly, at the
moment the latest patch posted here is a rebase and adjusting the old
patch to work with the latest changes in master, right?), then indeed
they could be merged, but please in the newer thread [1]/messages/by-id/CA+q6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag@mail.gmail.com.

[1]: /messages/by-id/CA+q6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag@mail.gmail.com

#52

Pavel Borisov

pashkin.elfe@gmail.com

about 5 years ago

In reply to: Dmitry Dolgov (#51)

Re: POC: GROUP BY optimization

In case if I'm missing something and Pavel's proposal is significantly
different from the original patch (if I understand correctly, at the
moment the latest patch posted here is a rebase and adjusting the old
patch to work with the latest changes in master, right?), then indeed
they could be merged, but please in the newer thread [1].

Sure, my patch has the only difference from the previous Theodor's code
for compatibility with v.13, though it is not small, and I appreciate the
changes in paths processing. The only thing that caused my notice, is that
some useful changes which I've mentioned before, are discarded now. But as
long as they are planned to be put in later it is completely fine. I agree
to discuss the thing in any thread, though I don't quite understand the
reason for a switch.

Still I don't see a problem.

--
Best regards,
Pavel Borisov

Postgres Professional: http://postgrespro.com <http://www.postgrespro.com>

#53

Ibrar Ahmed

ibrar.ahmad@gmail.com

almost 5 years ago

In reply to: Pavel Borisov (#52)

1 attachment(s)

Re: POC: GROUP BY optimization

On Thu, Oct 29, 2020 at 8:15 PM Pavel Borisov <pashkin.elfe@gmail.com>
wrote:

In case if I'm missing something and Pavel's proposal is significantly

different from the original patch (if I understand correctly, at the
moment the latest patch posted here is a rebase and adjusting the old
patch to work with the latest changes in master, right?), then indeed
they could be merged, but please in the newer thread [1].

Sure, my patch has the only difference from the previous Theodor's code
for compatibility with v.13, though it is not small, and I appreciate the
changes in paths processing. The only thing that caused my notice, is that
some useful changes which I've mentioned before, are discarded now. But as
long as they are planned to be put in later it is completely fine. I agree
to discuss the thing in any thread, though I don't quite understand the
reason for a switch.

Still I don't see a problem.

--
Best regards,
Pavel Borisov

Postgres Professional: http://postgrespro.com <http://www.postgrespro.com>

Regression (stats_ext) is failing because you forgot to drop the table
created in a test
case (aggregates), It's a bit minor change so the attached patch fixes
that issue.

https://cirrus-ci.com/task/6704792446697472

--
Ibrar Ahmed

Attachments:

v12-0001-GROUP-BY-optimization-made-compatible-with-chang.patchapplication/octet-stream; name=v12-0001-GROUP-BY-optimization-made-compatible-with-chang.patchDownload

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 0649b6b81c..5209d89b32 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -1077,13 +1077,15 @@ ANALYZE ft5;
 -- join two tables
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                                                                                       QUERY PLAN                                                                                                       
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                        QUERY PLAN                                                                                        
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.c1, t2.c1, t1.c3
+         Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+         Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+(6 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
  c1  | c1  
@@ -1103,13 +1105,15 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 -- join three tables
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10;
-                                                                                                                                   QUERY PLAN                                                                                                                                    
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                     QUERY PLAN                                                                                                                     
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c2, t3.c3, t1.c3
-   Relations: ((public.ft1 t1) INNER JOIN (public.ft2 t2)) INNER JOIN (public.ft4 t3)
-   Remote SQL: SELECT r1."C 1", r2.c2, r4.c3, r1.c3 FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) INNER JOIN "S 1"."T 3" r4 ON (((r1."C 1" = r4.c1)))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 10::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.c1, t2.c2, t3.c3, t1.c3
+         Relations: ((public.ft1 t1) INNER JOIN (public.ft2 t2)) INNER JOIN (public.ft4 t3)
+         Remote SQL: SELECT r1."C 1", r2.c2, r4.c3, r1.c3 FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) INNER JOIN "S 1"."T 3" r4 ON (((r1."C 1" = r4.c1)))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+(6 rows)
 
 SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10;
  c1 | c2 |   c3   
@@ -1716,13 +1720,33 @@ ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw');
 -- tests whole-row reference for row marks
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1;
-                                                                                                                                                                                                                           QUERY PLAN                                                                                                                                                                                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                               QUERY PLAN                                                                                                                                                                                                                
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR UPDATE OF r1
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1;
  c1  | c1  
@@ -1741,13 +1765,33 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;
-                                                                                                                                                                                                                                    QUERY PLAN                                                                                                                                                                                                                                    
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                                        QUERY PLAN                                                                                                                                                                                                                        
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR UPDATE OF r1 FOR UPDATE OF r2
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1 FOR UPDATE OF r2
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;
  c1  | c1  
@@ -1767,13 +1811,33 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 -- join two tables with FOR SHARE clause
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1;
-                                                                                                                                                                                                                           QUERY PLAN                                                                                                                                                                                                                           
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                               QUERY PLAN                                                                                                                                                                                                               
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR SHARE OF r1
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1;
  c1  | c1  
@@ -1792,13 +1856,33 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
-                                                                                                                                                                                                                                   QUERY PLAN                                                                                                                                                                                                                                   
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                                       QUERY PLAN                                                                                                                                                                                                                       
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR SHARE OF r1 FOR SHARE OF r2
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1 FOR SHARE OF r2
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
  c1  | c1  
@@ -1852,13 +1936,15 @@ WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t
 -- ctid with whole-row reference
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.ctid, t1, t2, t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                                                                                                                                                                                                  QUERY PLAN                                                                                                                                                                                                                   
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                   QUERY PLAN                                                                                                                                                                                                    
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1.ctid, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END, r1."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3
+         Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+         Remote SQL: SELECT r1.ctid, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END, r1."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+(6 rows)
 
 -- SEMI JOIN, not pushed down
 EXPLAIN (VERBOSE, COSTS OFF)
@@ -1929,13 +2015,15 @@ SELECT t1.c1 FROM ft1 t1 WHERE NOT EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2
 -- CROSS JOIN can be pushed down
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
-                                                                                           QUERY PLAN                                                                                            
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                            QUERY PLAN                                                                             
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) ORDER BY r1."C 1" ASC NULLS LAST, r2."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.c1, t2.c1
+         Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+         Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) ORDER BY r1."C 1" ASC NULLS LAST, r2."C 1" ASC NULLS LAST
+(6 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
  c1 | c1  
@@ -2639,16 +2727,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
- Sort
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Foreign Scan
    Output: (count(c2)), c2, 5, 7.0, 9
-   Sort Key: ft1.c2
-   ->  Foreign Scan
-         Output: (count(c2)), c2, 5, 7.0, 9
-         Relations: Aggregate on (public.ft1)
-         Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
-(7 rows)
+   Relations: Aggregate on (public.ft1)
+   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+(4 rows)
 
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
   w  | x | y |  z  
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index a25b674a19..1639258aaf 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1751,6 +1751,289 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 									rterm->pathtarget->width);
 }
 
+/*
+ * is_fake_var
+ *		Workaround for generate_append_tlist() which generates fake Vars with
+ *		varno == 0, that will cause a fail of estimate_num_group() call
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ *		Returns relative complexity of comparing two values based on it's width.
+ * The idea behind - long values have more expensive comparison. Return value is
+ * in cpu_operator_cost unit.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double	width = -1.0; /* fake value */
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresponding relation */
+	if (IsA(expr, Var))
+	{
+		Var		*var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo	*rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int	ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, use estimation by type */
+	if (width < 0.0)
+	{
+		Node	*node = (Node*) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Any value in pgsql is passed by Datum type, so any operation with value
+	 * could not be cheaper than operation with Datum type
+	 */
+	if (width <= sizeof(Datum))
+		return 1.0;
+
+	/*
+	 * Seems, cost of comparision is not directly proportional to args width,
+	 * because comparing args could be differ width (we known only average over
+	 * column) and difference often could be defined only by looking on first
+	 * bytes. So, use log16(width) as estimation.
+	 */
+	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ *		compute CPU cost of sort (i.e. in-memory)
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys.  In this case, it will fallback to
+ * simple comparison cost estimate.
+ *
+ * Estimation algorithm is based on ideas from course Algorithms,
+ * Robert Sedgewick, Kevin Wayne, https://algs4.cs.princeton.edu/home/ and paper
+ * "Quicksort Is Optimal For Many Equal Keys", Sebastian Wild,
+ * arXiv:1608.04906v4 [cs.DS] 1 Nov 2017.
+ *
+ * In term of that papers, let N - number of tuples, Xi - number of tuples with
+ * key Ki, then estimation is:
+ * log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ * In our case all Xi are the same because now we don't have an estimation of
+ * group sizes, we have only estimation of number of groups. In this case,
+ * formula becomes: N * log(NumberOfGroups). Next, to support correct estimation
+ * of multi-column sort we need separately compute each column, so, let k is a
+ * column number, Gk - number of groups  defined by k columns:
+ * N * sum( Fk * log(Gk) )
+ * Fk is a function costs (including width) for k columns.
+ */
+
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+					  Cost comparison_cost, double tuples, double output_tuples,
+					  bool heapSort)
+{
+	Cost		per_tuple_cost = 0.0;
+	ListCell	*lc;
+	List		*pathkeyExprs = NIL;
+	double		tuplesPerPrevGroup = tuples;
+	double		totalFuncCost = 1.0;
+	bool		has_fake_var = false;
+	int			i = 0;
+	Oid			prev_datatype = InvalidOid;
+	Cost		funcCost = 0.;
+	List		*cache_varinfos = NIL;
+
+	/* fallback if pathkeys is unknown */
+	if (list_length(pathkeys) == 0)
+	{
+		/*
+		 * If we'll use a bounded heap-sort keeping just K tuples in memory, for
+		 * a total number of tuple comparisons of N log2 K; but the constant
+		 * factor is a bit higher than for quicksort.  Tweak it so that the
+		 * cost curve is continuous at the crossover point.
+		 */
+		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+		/* add cost provided by caller */
+		per_tuple_cost += comparison_cost;
+
+		return per_tuple_cost * tuples;
+	}
+
+	/*
+	 * Computing total cost of sorting takes into account:
+	 * - per column comparison function cost
+	 * - we try to compute needed number of comparison per column
+	 */
+
+	foreach(lc, pathkeys)
+	{
+		PathKey				*pathkey = (PathKey*)lfirst(lc);
+		EquivalenceMember	*em;
+		double				 nGroups,
+							 correctedNGroups;
+
+		/*
+		 * We believe than equivalence members aren't very  different, so, to
+		 * estimate cost we take just first member
+		 */
+		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+		if (em->em_datatype != InvalidOid)
+		{
+			/* do not lookup funcCost if data type is the same as previous */
+			if (prev_datatype != em->em_datatype)
+			{
+				Oid			sortop;
+				QualCost	cost;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+
+				cost.startup = 0;
+				cost.per_tuple = 0;
+				add_function_cost(root, get_opcode(sortop), NULL, &cost);
+				/* we need procost, not product of procost and cpu_operator_cost */
+				funcCost = cost.per_tuple / cpu_operator_cost;
+				prev_datatype = em->em_datatype;
+			}
+		}
+		else
+			funcCost = 1.0; /* fallback */
+
+		/* Try to take into account actual width fee */
+		funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+		totalFuncCost += funcCost;
+
+		/* remeber if we have a fake var in pathkeys */
+		has_fake_var |= is_fake_var(em->em_expr);
+		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+		/*
+		 * Prevent call estimate_num_groups() with fake Var. Note,
+		 * pathkeyExprs contains only previous columns
+		 */
+		if (has_fake_var == false)
+			/*
+			 * Recursively compute number of group in group from previous step
+			 */
+			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+													  tuplesPerPrevGroup, NULL,
+													  &cache_varinfos,
+													  list_length(pathkeyExprs) - 1);
+		else if (tuples > 4.0)
+			/*
+			 * Use geometric mean as estimation if there is no any stats.
+			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
+			 * column while here we try to estimate number of groups over
+			 * set of columns.
+			 */
+			nGroups = ceil(2.0 + sqrt(tuples) *
+				list_length(pathkeyExprs) / list_length(pathkeys));
+		else
+			nGroups = tuples;
+
+		/*
+		 * Presorted keys aren't participated in comparison but still checked
+		 * by qsort comparator.
+		 */
+		if (i >= nPresortedKeys)
+		{
+			if (heapSort)
+			{
+				if (tuplesPerPrevGroup < output_tuples)
+					/* comparing only inside output_tuples */
+					correctedNGroups =
+						ceil(2.0 * output_tuples / (tuplesPerPrevGroup / nGroups));
+				else
+					/* two groups - in output and out */
+					correctedNGroups = 2.0;
+			}
+			else
+				correctedNGroups = nGroups;
+
+			if (correctedNGroups <= 1.0)
+				correctedNGroups = 2.0;
+			else
+				correctedNGroups = ceil(correctedNGroups);
+			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+		}
+
+		i++;
+
+		/*
+		 * Real-world distribution isn't uniform but now we don't have a way to
+		 * determine that, so, add multiplier to get closer to worst case.
+		 */
+		tuplesPerPrevGroup = ceil(1.5 * tuplesPerPrevGroup / nGroups);
+
+		/*
+		 * We could skip all followed columns for cost estimation, because we
+		 * believe that tuples are unique by set ot previous columns
+		 */
+		if (tuplesPerPrevGroup <= 1.0)
+			break;
+	}
+
+	list_free(pathkeyExprs);
+
+	/* per_tuple_cost is in cpu_operator_cost units */
+	per_tuple_cost *= cpu_operator_cost;
+
+	/*
+	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E.
+	 * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation
+	 * formula has additional term proportional to number of tuples (See Chapter
+	 * 8.2 and Theorem 4.1). It has meaning with low number of tuples,
+	 * approximately less that 1e4. Of course, it could be implemented as
+	 * additional multiplier under logarithm, but use more complicated formula
+	 * which takes into account number of unique tuples and it isn't clear how
+	 * to combine multiplier with groups. Estimate it as 10 in cpu_operator_cost
+	 * unit.
+	 */
+	per_tuple_cost += 10 * cpu_operator_cost;
+
+	per_tuple_cost += comparison_cost;
+
+	return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+				   double tuples)
+{
+	return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+								0, tuples, tuples, false);
+}
+
 /*
  * cost_tuplesort
  *	  Determines and returns the cost of sorting a relation using tuplesort,
@@ -1767,7 +2050,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * number of initial runs formed and M is the merge order used by tuplesort.c.
  * Since the average initial run should be about sort_mem, we have
  *		disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *		cpu = comparison_cost * t * log2(t)
+ * 		and cpu cost (computed by compute_cpu_sort_cost()).
  *
  * If the sort is bounded (i.e., only the first k result tuples are needed)
  * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1786,9 +2069,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * 'comparison_cost' is the extra cost per comparison, if any
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later.
  */
 static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
 			   double limit_tuples)
@@ -1805,9 +2090,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
-
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
 	{
@@ -1831,12 +2113,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 		double		log_runs;
 		double		npageaccesses;
 
-		/*
-		 * CPU costs
-		 *
-		 * Assume about N log2 N comparisons
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		/* CPU costs */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 
 		/* Disk costs */
 
@@ -1852,18 +2132,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	}
 	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
 	{
-		/*
-		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
-		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		/* We'll use a bounded heap-sort keeping just K tuples in memory. */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  output_tuples, true);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 	}
 
 	/*
@@ -1896,8 +2175,8 @@ cost_incremental_sort(Path *path,
 					  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 					  double limit_tuples)
 {
-	Cost		startup_cost = 0,
-				run_cost = 0,
+	Cost		startup_cost,
+				run_cost,
 				input_run_cost = input_total_cost - input_startup_cost;
 	double		group_tuples,
 				input_groups;
@@ -1981,7 +2260,7 @@ cost_incremental_sort(Path *path,
 	 * pessimistic about incremental sort performance and increase its average
 	 * group size by half.
 	 */
-	cost_tuplesort(&group_startup_cost, &group_run_cost,
+	cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
 				   1.5 * group_tuples, width, comparison_cost, sort_mem,
 				   limit_tuples);
 
@@ -1989,7 +2268,7 @@ cost_incremental_sort(Path *path,
 	 * Startup cost of incremental sort is the startup cost of its first group
 	 * plus the cost of its input.
 	 */
-	startup_cost += group_startup_cost
+	startup_cost = group_startup_cost
 		+ input_startup_cost + group_input_run_cost;
 
 	/*
@@ -1998,7 +2277,7 @@ cost_incremental_sort(Path *path,
 	 * group, plus the total cost to process the remaining groups, plus the
 	 * remaining cost of input.
 	 */
-	run_cost += group_run_cost
+	run_cost = group_run_cost
 		+ (group_run_cost + group_startup_cost) * (input_groups - 1)
 		+ group_input_run_cost * (input_groups - 1);
 
@@ -2038,7 +2317,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	Cost		startup_cost;
 	Cost		run_cost;
 
-	cost_tuplesort(&startup_cost, &run_cost,
+	cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
 				   limit_tuples);
@@ -2136,7 +2415,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2204,7 +2483,7 @@ cost_append(AppendPath *apath)
 					 * any child.
 					 */
 					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
+							  root,
 							  pathkeys,
 							  subpath->total_cost,
 							  subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 0188c1e9a1..9e3e02a0a8 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -680,7 +680,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index bd9a176d7d..7beb32488a 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,16 +17,19 @@
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
 #include "access/stratnum.h"
 #include "catalog/pg_opfamily.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -334,6 +337,312 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/************************<DEBUG PART>*************************************/
+bool debug_group_by_reorder_by_pathkeys = true;
+bool debug_group_by_match_order_by = true;
+bool debug_cheapest_group_by = true;
+/************************</DEBUG PART>************************************/
+
+/*
+ * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
+ * returns new lists,  original GROUP BY lists stay untouched.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List		*new_group_pathkeys= NIL,
+				*new_group_clauses = NIL;
+	ListCell	*key;
+	int			n;
+
+	if (debug_group_by_reorder_by_pathkeys == false)
+		return 0;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * For each pathkey it tries to find corresponding GROUP BY pathkey and
+	 * clause.
+	 */
+	foreach(key, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(key);
+		SortGroupClause	*sgc;
+
+		/*
+		 * Pathkey should use the same allocated struct, so, equiality of
+		 * pointers is enough
+		 */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	n = list_length(new_group_pathkeys);
+
+	/*
+	 * Just append the rest of pathkeys and clauses
+	 */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+												 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+typedef struct MutatorState {
+	List		*elemsList;
+	ListCell	**elemCells;
+	void		**elems;
+	int			*positions;
+	int			 mutatorNColumns;
+	int			 count;
+} MutatorState;
+
+static void
+initMutator(MutatorState *state, List *elems, int start, int end)
+{
+	int i;
+	int			n = end - start;
+	ListCell	*lc;
+
+	memset(state, 0, sizeof(*state));
+
+	state->mutatorNColumns = n;
+
+	state->elemsList = list_copy(elems);
+
+	state->elems = palloc(sizeof(void*) * n);
+	state->elemCells = palloc(sizeof(ListCell*) * n);
+	state->positions = palloc(sizeof(int) * n);
+
+	i = 0;
+	for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+	{
+		state->elemCells[i] = lc;
+		state->elems[i] = lfirst(lc);
+		state->positions[i] = i + 1;
+		i++;
+		if (i >= n)
+			break;
+	}
+}
+
+static void
+swap(int *a, int i, int j)
+{
+  int s = a[i];
+
+  a[i] = a[j];
+  a[j] = s;
+}
+
+static bool
+getNextSet(int *a, int n)
+{
+	int j, k, l, r;
+
+	j = n - 2;
+	while (j >= 0 && a[j] >= a[j + 1])
+		j--;
+	if (j < 0)
+		return false;
+
+	k = n - 1;
+	while (k >= 0 && a[j] >= a[k])
+		k--;
+	swap(a, j, k);
+
+	l = j + 1;
+	r = n - 1;
+	while (l < r)
+		swap(a, l++, r--);
+
+
+	return true;
+}
+
+static List*
+doMutator(MutatorState *state)
+{
+	int	i;
+
+	state->count++;
+
+	/* first set is original set */
+	if (state->count == 1)
+		return state->elemsList;
+
+	if (getNextSet(state->positions, state->mutatorNColumns) == false)
+	{
+		pfree(state->elems);
+		pfree(state->elemCells);
+		pfree(state->positions);
+		list_free(state->elemsList);
+
+		return NIL;
+	}
+
+	for(i=0; i<state->mutatorNColumns; i++)
+		lfirst(state->elemCells[i]) =
+			(void*) state->elems[ state->positions[i] - 1 ];
+
+	return state->elemsList;
+}
+
+typedef struct {
+	Cost cost;
+	PathKey *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+	const PathkeySortCost *a = (PathkeySortCost *) _a;
+	const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+	if (a->cost < b->cost)
+		return -1;
+	else if (a->cost == b->cost)
+		return 0;
+	return 1;
+}
+/*
+ * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
+ * speedup sorting. Returns newly allocated lists, old ones stay untouched.
+ * n_preordered defines a head of list which order should be prevented.
+ */
+void
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	List		   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL,
+				   *var_group_pathkeys;
+
+	ListCell	   *cell;
+	MutatorState	mstate;
+	double			cheapest_sort_cost = -1.0;
+
+	int nFreeKeys;
+	int nToPermute;
+	int i;
+
+	if (list_length(*group_pathkeys) - n_preordered < 2)
+		return; /* nothing to do */
+
+	/*
+	 * Will try to match ORDER BY pathkeys in hope that one sort is cheaper than
+	 * two
+	 */
+	if (debug_group_by_match_order_by &&
+		n_preordered == 0 && root->sort_pathkeys)
+	{
+		bool _save_debug_group_by_reorder_by_pathkeys =
+			debug_group_by_reorder_by_pathkeys;  /* DEBUG ONLY, to be removed */
+
+		debug_group_by_reorder_by_pathkeys = true;
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  group_pathkeys,
+													  group_clauses);
+		debug_group_by_reorder_by_pathkeys = _save_debug_group_by_reorder_by_pathkeys;
+
+		if (list_length(*group_pathkeys) - n_preordered < 2)
+			return; /* nothing to do */
+	}
+
+	/*
+	 * Try all permutations of at most 4 cheapeast pathkeys.
+	 */
+	nFreeKeys = list_length(*group_pathkeys) - n_preordered;
+	nToPermute = 4;
+	if (nFreeKeys > nToPermute)
+	{
+		/*
+		 * Sort the remaining pathkeys cheapest first.
+		 */
+		PathkeySortCost *costs = palloc(sizeof(*costs) * nFreeKeys);
+		cell = list_nth_cell(*group_pathkeys, n_preordered);
+		for (i = 0; cell != NULL; i++, (cell = lnext(*group_pathkeys, cell)))
+		{
+			List *to_cost = list_make1(lfirst(cell));
+
+			Assert(i < nFreeKeys);
+
+			costs[i].pathkey = lfirst(cell);
+			costs[i].cost = cost_sort_estimate(root, to_cost, 0, nrows);
+
+			pfree(to_cost);
+		}
+		qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+		/* Construct the sorted list. First, the preordered pathkeys. */
+		new_group_pathkeys = list_truncate(list_copy(*group_pathkeys), n_preordered);
+
+		/* The rest, ordered by increasing cost */
+		for (i = 0; i < nFreeKeys; i++)
+			new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+		pfree(costs);
+	}
+	else
+	{
+		/*
+		 * Since v13 list_free() can clean list elements so for original list not to be modified it should be copied to
+		 * a new one which can then be cleaned safely if needed.
+		 */
+		new_group_pathkeys = list_copy(*group_pathkeys);
+		nToPermute = nFreeKeys;
+	}
+
+	if (!debug_cheapest_group_by)
+		return;
+
+	initMutator(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
+
+	while((var_group_pathkeys = doMutator(&mstate)) != NIL)
+	{
+		Cost	cost;
+
+		cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+		if (cost < cheapest_sort_cost || cheapest_sort_cost < 0)
+		{
+			list_free(new_group_pathkeys);
+			new_group_pathkeys = list_copy(var_group_pathkeys);
+			cheapest_sort_cost = cost;
+		}
+	}
+
+	/*
+	 * repeat order of pathkeys for clauses
+	 */
+	foreach(cell, new_group_pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		new_group_clauses = lappend(new_group_clauses,
+						get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+												*group_clauses));
+	}
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+											   *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1862,6 +2171,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered, so we don't bother about actual order in
+ * pathkeys
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	if (root->group_pathkeys == NIL)
+		return 0;				/* no special ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1876,6 +2218,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1911,6 +2256,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 545b56bcaf..6e82d5ddac 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6517,21 +6517,49 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
 			bool		is_sorted;
-			int			presorted_keys;
+			int			presorted_keys = 0;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups = 0;
+
+			if (parse->groupingSets)
+			{
+				/*
+				 * prevent group pathkey reordering, just check the same
+				 * order paths pathkeys and group pathkeys
+				 */
+				is_sorted = pathkeys_count_contained_in(group_pathkeys,
+												  path->pathkeys,
+												  &presorted_keys);
+			}
+			else
+			{
+				n_preordered_groups =
+						group_keys_reorder_by_pathkeys(path->pathkeys,
+													   &group_pathkeys,
+													   &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
+			}
 
 			if (path == cheapest_path || is_sorted)
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
 				if (!is_sorted)
+				{
+					if (!parse->groupingSets)
+						get_cheapest_group_keys_order(root,
+													  path->rows,
+													  &group_pathkeys,
+													  &group_clauses,
+													  n_preordered_groups);
+
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6551,14 +6579,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
 				}
-				else if (parse->groupClause)
+				else if (group_clauses)
 				{
 					/*
 					 * We have GROUP BY without aggregation or grouping sets.
@@ -6568,7 +6596,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6664,11 +6692,18 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				Path	   *path = (Path *) lfirst(lc);
 				Path	   *path_original = path;
 				bool		is_sorted;
-				int			presorted_keys;
+				int			presorted_keys = 0;
+				List	   *group_pathkeys = root->group_pathkeys,
+						   *group_clauses = parse->groupClause;
+				int			n_preordered_groups;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+				is_sorted = pathkeys_count_contained_in(group_pathkeys,
 														path->pathkeys,
 														&presorted_keys);
+				n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																	 &group_pathkeys,
+																	 &group_clauses);
+				is_sorted = is_sorted || (n_preordered_groups == list_length(group_pathkeys));
 
 				/*
 				 * Insert a Sort node, if required.  But there's no point in
@@ -6678,10 +6713,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 				}
 
@@ -6691,9 +6731,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6702,7 +6742,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 
@@ -6959,18 +6999,32 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys)) ||
+				pathkeys_contained_in(group_pathkeys, path->pathkeys);
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_total_path || is_sorted)
 			{
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_path(partially_grouped_rel, (Path *)
@@ -6978,9 +7032,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 											 partially_grouped_rel,
 											 path,
 											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 NIL,
 											 agg_partial_costs,
 											 dNumPartialGroups));
@@ -6989,7 +7043,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 							 create_group_path(root,
 											   partially_grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   NIL,
 											   dNumPartialGroups));
 			}
@@ -7062,21 +7116,37 @@ create_partial_grouping_paths(PlannerInfo *root,
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
 			bool		is_sorted;
-			int			presorted_keys;
+			int			presorted_keys = 0;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+			is_sorted = pathkeys_count_contained_in(group_pathkeys,
 													path->pathkeys,
 													&presorted_keys);
 
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = is_sorted || (n_preordered_groups == list_length(group_pathkeys));
+
 			if (path == cheapest_partial_path || is_sorted)
 			{
+
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
@@ -7084,9 +7154,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 group_clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 group_clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -7095,7 +7165,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   group_clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 69b83071cf..0bfb638abc 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1342,7 +1342,7 @@ create_append_path(PlannerInfo *root,
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 52314d3aa1..80fc597630 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3292,7 +3292,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 }
 
 /*
- * estimate_num_groups		- Estimate number of groups in a grouped query
+ * estimate_num_groups/estimate_num_groups_incremental
+ *		- Estimate number of groups in a grouped query.
+ *		  _incremental variant is performance optimization for
+ *		  case of adding one-by-one column
  *
  * Given a query having a GROUP BY clause, estimate how many groups there
  * will be --- ie, the number of distinct combinations of the GROUP BY
@@ -3360,11 +3363,20 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, NULL, 0);
+}
+
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows,
+					List **pgset, List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i, j;
 
 	/*
 	 * We don't ever want to return an estimate of zero groups, as that tends
@@ -3391,7 +3403,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3400,6 +3412,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++; /* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3461,7 +3481,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3478,6 +3502,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 3fd1a5fbe2..c6d8c37d74 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2049,6 +2049,35 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+/************************<DEBUG OPT GROUP BY>*********************************/
+	{
+		{"debug_group_by_reorder_by_pathkeys", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reorder GROUP BY by pathkeys"),
+			NULL
+		},
+		&debug_group_by_reorder_by_pathkeys,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_group_by_match_order_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable matching GROUP BY by ORDER BY."),
+			NULL
+		},
+		&debug_group_by_match_order_by,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("find a cheapest order of columns in GROUP BY."),
+			NULL
+		},
+		&debug_cheapest_group_by,
+		true,
+		NULL, NULL, NULL
+	},
+/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 1be93be098..baf6ee79b4 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -110,7 +110,9 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *path);
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+							   int nPresortedKeys, double tuples);
+extern void cost_append(AppendPath *path, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 035d3e1206..a43898c929 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -196,6 +196,19 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
+/************************<DEBUG OPT GROUP BY>*********************************/
+extern bool debug_group_by_reorder_by_pathkeys;
+extern bool debug_group_by_match_order_by;
+extern bool debug_cheapest_group_by;
+/************************</DEBUG OPT GROUP BY>********************************/
+extern void get_cheapest_group_keys_order(PlannerInfo *root,
+										  double nrows,
+										  List **group_pathkeys,
+										  List **group_clauses,
+										  int	n_preordered);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index f9be539602..03e25011b9 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -198,6 +198,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause,
 
 extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset);
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows, List **pgset,
+					List **cache_varinfos, int prevNExprs);
 
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 477fd1205c..4c420bca4e 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1193,7 +1193,8 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+   Group Key: $0, $1
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1216,10 +1217,8 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+   ->  Result
+(25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -2401,6 +2400,237 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, d, c, v
+   ->  Sort
+         Sort Key: p, d, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 68ca321163..859db97ca4 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1437,7 +1437,7 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 5c7528c029..2e9617351d 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1932,8 +1932,8 @@ USING (name);
 ------+----+----
  bb   | 12 | 13
  cc   | 22 | 23
- dd   |    | 33
  ee   | 42 |   
+ dd   |    | 33
 (4 rows)
 
 -- Cases with non-nullable expressions in subquery results;
@@ -1967,8 +1967,8 @@ NATURAL FULL JOIN
 ------+------+------+------+------
  bb   |   12 |    2 |   13 |    3
  cc   |   22 |    2 |   23 |    3
- dd   |      |      |   33 |    3
  ee   |   42 |    2 |      |     
+ dd   |      |      |   33 |    3
 (4 rows)
 
 SELECT * FROM
@@ -4534,18 +4534,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
@@ -6213,44 +6215,39 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Unique
+   ->  HashAggregate
          Output: j3.id
-         ->  Sort
+         Group Key: j3.id
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(13 rows)
+(11 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Group
+   ->  HashAggregate
          Output: j3.id
          Group Key: j3.id
-         ->  Sort
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(14 rows)
+(11 rows)
 
 drop table j1;
 drop table j2;
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index dfa4b036b5..a08a3825ff 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -952,32 +952,30 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 --------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Gather
-         Workers Planned: 2
-         ->  Parallel Append
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml.a
-                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml.a
-                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_5.a
-                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_5.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_2.a
-                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_2.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(27 rows)
+   ->  Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(25 rows)
 
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
  a  | sum  |  array_agg  | count 
@@ -996,34 +994,32 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 -- Without ORDER BY clause, to test Gather at top-most path
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                                QUERY PLAN                                 
----------------------------------------------------------------------------
- Gather
-   Workers Planned: 2
-   ->  Parallel Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(25 rows)
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Append
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml.a
+         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml.a
+               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_2.a
+         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_2.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_5.a
+         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_5.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(23 rows)
 
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -1379,28 +1375,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
 -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
 EXPLAIN (COSTS OFF)
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize GroupAggregate
+   ->  Finalize HashAggregate
          Group Key: pagg_tab_para.y
          Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather Merge
+         ->  Gather
                Workers Planned: 2
-               ->  Sort
-                     Sort Key: pagg_tab_para.y
-                     ->  Parallel Append
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_1.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_2.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+               ->  Parallel Append
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_1.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_2.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
 
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
  y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 0057f41caa..e3fdfb3e31 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -466,52 +466,41 @@ EXPLAIN (COSTS OFF)
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
   GROUP BY 1, 2 ORDER BY 1, 2;
-                                                   QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Group
    Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Merge Append
+   ->  Sort
          Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Group
-               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
-                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1.a, prt1.b
-                                 ->  Seq Scan on prt1_p1 prt1
-                           ->  Sort
-                                 Sort Key: p2.a, p2.b
-                                 ->  Seq Scan on prt2_p1 p2
-         ->  Group
-               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_1.a, prt1_1.b
-                                 ->  Seq Scan on prt1_p2 prt1_1
-                           ->  Sort
-                                 Sort Key: p2_1.a, p2_1.b
-                                 ->  Seq Scan on prt2_p2 p2_1
-         ->  Group
-               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_2.a, prt1_2.b
-                                 ->  Seq Scan on prt1_p3 prt1_2
-                           ->  Sort
-                                 Sort Key: p2_2.a, p2_2.b
-                                 ->  Seq Scan on prt2_p3 p2_2
-(43 rows)
+         ->  Append
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_1.a, prt1_1.b
+                           ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Sort
+                           Sort Key: p2_1.a, p2_1.b
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_2.a, prt1_2.b
+                           ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Sort
+                           Sort Key: p2_2.a, p2_2.b
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a))
+                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_3.b, prt1_3.a
+                           ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Sort
+                           Sort Key: p2_3.b, p2_3.a
+                           ->  Seq Scan on prt2_p3 p2_3
+(32 rows)
 
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
@@ -1339,7 +1328,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1483,7 +1472,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 75f78db8f5..63b52c065f 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1291,24 +1291,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2;
-                        QUERY PLAN                        
-----------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: (q2 IS NOT NULL)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: (q2 IS NOT NULL)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: (q2 IS NOT NULL)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: (q2 IS NOT NULL)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
@@ -1327,24 +1325,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2;
-                       QUERY PLAN                       
---------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                    QUERY PLAN                    
+--------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: ((- q1) = q2)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: ((- q1) = q2)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: ((- q1) = q2)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: ((- q1) = q2)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 54f5cf7ecc..e694cc8875 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -991,6 +991,105 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index 81429164d4..40e5d30f84 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -213,7 +213,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;

#54

Pavel Borisov

pashkin.elfe@gmail.com

almost 5 years ago

In reply to: Ibrar Ahmed (#53)

Re: POC: GROUP BY optimization

Regression (stats_ext) is failing because you forgot to drop the table
created in a test
case (aggregates), It's a bit minor change so the attached patch fixes
that issue.

https://cirrus-ci.com/task/6704792446697472

Thank you very much!

--
Best regards,
Pavel Borisov

Postgres Professional: http://postgrespro.com <http://www.postgrespro.com>

#55

Tomas Vondra

tomas.vondra@enterprisedb.com

almost 5 years ago

In reply to: Pavel Borisov (#54)

2 attachment(s)

Re: POC: GROUP BY optimization

Hi,

I take a look at the patch today. Attached is the v12 and a separate
patch with some comment tweaks and review comments.

1) I see the patch results in some plan changes in postgres_fdw. I
assume it's somehow related to the sort costing changes, but I find it a
bit suspicious. Why should the plan change from this:

Foreign Scan
Remote SQL: ... ORDER BY ... OFFSET 100 LIMIT 10;

Limit
Foreign Scan
Remote SQL: ... ORDER BY ...

But even if this is due to changes to order by costing, postponing the
limit seems like a net loss - the sort happens before the limit, and by
postponing the limit after foreign scan we need to transfer 100 more
rows. So what's causing this?

The difference in plan cost seems fairly substantial (old: 196.52, new:
241.94). But maybe that's OK and expected.

2) I wonder how much more expensive (in terms of CPU) is the new sort
costing code. It's iterative, and it's calling functions to calculate
number of groups etc. I wonder what's the impact simple queries?

3) It's not clear to me why we need "fake var" protection? Why would the
estimate_num_groups() fail for that?

4) In one of the places a comment says DEFAULT_NUM_DISTINCT is not used
because we're dealing with multiple columns. That makes sense, but maybe
we should use DEFAULT_NUM_DISTINCT at least to limit the range. That is,
with N columns we should restrict the nGroups estimate by:

min = DEFAULT_NUM_DISTINCT
max = Min(pow(DEFAULT_NUM_DISTINCT, ncolumns), tuples)

Also, it's not clear what's the logic behind the formula:

nGroups = ceil(2.0 + sqrt(tuples) *
list_length(pathkeyExprs) / list_length(pathkeys));

A comment explaining that would be helpful.

5) The compute_cpu_sort_cost comment includes two references (in a quite
mixed-up way), and then there's another reference in the code. I suggest
we list all of them in the function comment.

6) There's a bunch of magic values (various multipliers etc.). It's not
clear which values are meant to be equal, etc. Let's introduce nicer
constants with reasonable names.

7) The comment at get_cheapest_group_keys_order is a bit misleading,
because it talks about "uniqueness" - but that's not what we're looking
at here (I mean, is the column unique or not). We're looking at number
of distinct values in the column, which is a different thing. Also, it'd
be good to roughly explain what get_cheapest_group_keys_order does - how
it calculates the order (permutations up to 4 values, etc.).

8) The comment at compute_cpu_sort_cost should also explain basics of
the algorithm. I tried doing something like that, but I'm not sure I got
all the details right and it probably needs further improvements.

9) The main concern I have is still about the changes in planner.c, and
I think I've already shared it before. The code takes the grouping
pathkeys, and just reshuffles them to what it believes is a better order
(cheaper, ...). That is, there's on input pathkey list, and it gets
transformed into another pathkey list. The problem is that even if this
optimizes the sort, it may work against some optimization in the upper
part of the plan.

Imagine a query that does something like this:

SELECT a, b, count(*) FROM (
SELECT DISTINCT a, b, c, d FROM x
) GROUP BY a, b;

Now, from the costing perspective, it may be cheaper to do the inner
grouping by "c, d, a, b" for example. But that works directly against
the second grouping, which would benefit from having the input sorted by
"a, b". How exactly would this behaves depends on the number of distinct
values in the columns, how expensive the comparisons are for each
column, and so on. But you get the idea.

I admit I haven't managed to construct a reasonably query that'd be
significantly harmed by this, but I haven't spent a lot of time on it.

I'm pretty sure I used this trick in the past, when doing aggregations
on large data sets, where it was much better to "pipe" the data through
multiple GroupAggregate nodes.

For this reason I think the code generating paths should work a bit like
get_useful_pathkeys_for_relation and generate_useful_gather_paths.

* group_keys_reorder_by_pathkeys should not produce just one list of
pathkeys, but multiple interesting options. At the moment it should
produce at least the input grouping pathkeys (as specified in the
query), and the "optimal" pathkeys (by lowest cost).

* the places calling group_keys_reorder_by_pathkeys should loop on the
result, and generate separate path for each option.

I'd guess in the future we'll "peek forward" in the plan and see if
there are other interesting pathkeys (that's the expectation for
get_useful_pathkeys_for_relation).

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

Attachments:

0001-v12-20210309.patchtext/x-patch; charset=UTF-8; name=0001-v12-20210309.patchDownload

From 0d8a7ddc441a3b26a498f6d3d4805fe5d305ebfc Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Tue, 9 Mar 2021 18:44:23 +0100
Subject: [PATCH 1/2] v12

---
 .../postgres_fdw/expected/postgres_fdw.out    | 199 +++++++---
 src/backend/optimizer/path/costsize.c         | 333 +++++++++++++++--
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 347 ++++++++++++++++++
 src/backend/optimizer/plan/planner.c          | 124 +++++--
 src/backend/optimizer/util/pathnode.c         |   2 +-
 src/backend/utils/adt/selfuncs.c              |  35 +-
 src/backend/utils/misc/guc.c                  |  29 ++
 src/include/optimizer/cost.h                  |   4 +-
 src/include/optimizer/paths.h                 |  13 +
 src/include/utils/selfuncs.h                  |   3 +
 src/test/modules/unsafe_tests/Makefile        |   2 +-
 src/test/regress/expected/aggregates.out      | 240 +++++++++++-
 .../regress/expected/incremental_sort.out     |   2 +-
 src/test/regress/expected/join.out            |  51 ++-
 .../regress/expected/partition_aggregate.out  | 136 ++++---
 src/test/regress/expected/partition_join.out  |  79 ++--
 src/test/regress/expected/union.out           |  60 ++-
 src/test/regress/sql/aggregates.sql           |  99 +++++
 src/test/regress/sql/incremental_sort.sql     |   2 +-
 20 files changed, 1472 insertions(+), 301 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 0649b6b81c..5209d89b32 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -1077,13 +1077,15 @@ ANALYZE ft5;
 -- join two tables
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                                                                                       QUERY PLAN                                                                                                       
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                        QUERY PLAN                                                                                        
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.c1, t2.c1, t1.c3
+         Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+         Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+(6 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
  c1  | c1  
@@ -1103,13 +1105,15 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 -- join three tables
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10;
-                                                                                                                                   QUERY PLAN                                                                                                                                    
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                     QUERY PLAN                                                                                                                     
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c2, t3.c3, t1.c3
-   Relations: ((public.ft1 t1) INNER JOIN (public.ft2 t2)) INNER JOIN (public.ft4 t3)
-   Remote SQL: SELECT r1."C 1", r2.c2, r4.c3, r1.c3 FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) INNER JOIN "S 1"."T 3" r4 ON (((r1."C 1" = r4.c1)))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 10::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.c1, t2.c2, t3.c3, t1.c3
+         Relations: ((public.ft1 t1) INNER JOIN (public.ft2 t2)) INNER JOIN (public.ft4 t3)
+         Remote SQL: SELECT r1."C 1", r2.c2, r4.c3, r1.c3 FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) INNER JOIN "S 1"."T 3" r4 ON (((r1."C 1" = r4.c1)))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+(6 rows)
 
 SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10;
  c1 | c2 |   c3   
@@ -1716,13 +1720,33 @@ ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw');
 -- tests whole-row reference for row marks
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1;
-                                                                                                                                                                                                                           QUERY PLAN                                                                                                                                                                                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                               QUERY PLAN                                                                                                                                                                                                                
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR UPDATE OF r1
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1;
  c1  | c1  
@@ -1741,13 +1765,33 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;
-                                                                                                                                                                                                                                    QUERY PLAN                                                                                                                                                                                                                                    
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                                        QUERY PLAN                                                                                                                                                                                                                        
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR UPDATE OF r1 FOR UPDATE OF r2
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1 FOR UPDATE OF r2
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;
  c1  | c1  
@@ -1767,13 +1811,33 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 -- join two tables with FOR SHARE clause
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1;
-                                                                                                                                                                                                                           QUERY PLAN                                                                                                                                                                                                                           
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                               QUERY PLAN                                                                                                                                                                                                               
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR SHARE OF r1
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1;
  c1  | c1  
@@ -1792,13 +1856,33 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
-                                                                                                                                                                                                                                   QUERY PLAN                                                                                                                                                                                                                                   
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                                       QUERY PLAN                                                                                                                                                                                                                       
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR SHARE OF r1 FOR SHARE OF r2
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1 FOR SHARE OF r2
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
  c1  | c1  
@@ -1852,13 +1936,15 @@ WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t
 -- ctid with whole-row reference
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.ctid, t1, t2, t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                                                                                                                                                                                                  QUERY PLAN                                                                                                                                                                                                                   
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                   QUERY PLAN                                                                                                                                                                                                    
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1.ctid, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END, r1."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3
+         Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+         Remote SQL: SELECT r1.ctid, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END, r1."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+(6 rows)
 
 -- SEMI JOIN, not pushed down
 EXPLAIN (VERBOSE, COSTS OFF)
@@ -1929,13 +2015,15 @@ SELECT t1.c1 FROM ft1 t1 WHERE NOT EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2
 -- CROSS JOIN can be pushed down
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
-                                                                                           QUERY PLAN                                                                                            
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                            QUERY PLAN                                                                             
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) ORDER BY r1."C 1" ASC NULLS LAST, r2."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.c1, t2.c1
+         Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+         Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) ORDER BY r1."C 1" ASC NULLS LAST, r2."C 1" ASC NULLS LAST
+(6 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
  c1 | c1  
@@ -2639,16 +2727,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
- Sort
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Foreign Scan
    Output: (count(c2)), c2, 5, 7.0, 9
-   Sort Key: ft1.c2
-   ->  Foreign Scan
-         Output: (count(c2)), c2, 5, 7.0, 9
-         Relations: Aggregate on (public.ft1)
-         Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
-(7 rows)
+   Relations: Aggregate on (public.ft1)
+   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+(4 rows)
 
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
   w  | x | y |  z  
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index a25b674a19..1639258aaf 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1751,6 +1751,289 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 									rterm->pathtarget->width);
 }
 
+/*
+ * is_fake_var
+ *		Workaround for generate_append_tlist() which generates fake Vars with
+ *		varno == 0, that will cause a fail of estimate_num_group() call
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ *		Returns relative complexity of comparing two values based on it's width.
+ * The idea behind - long values have more expensive comparison. Return value is
+ * in cpu_operator_cost unit.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double	width = -1.0; /* fake value */
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresponding relation */
+	if (IsA(expr, Var))
+	{
+		Var		*var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo	*rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int	ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, use estimation by type */
+	if (width < 0.0)
+	{
+		Node	*node = (Node*) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Any value in pgsql is passed by Datum type, so any operation with value
+	 * could not be cheaper than operation with Datum type
+	 */
+	if (width <= sizeof(Datum))
+		return 1.0;
+
+	/*
+	 * Seems, cost of comparision is not directly proportional to args width,
+	 * because comparing args could be differ width (we known only average over
+	 * column) and difference often could be defined only by looking on first
+	 * bytes. So, use log16(width) as estimation.
+	 */
+	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ *		compute CPU cost of sort (i.e. in-memory)
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys.  In this case, it will fallback to
+ * simple comparison cost estimate.
+ *
+ * Estimation algorithm is based on ideas from course Algorithms,
+ * Robert Sedgewick, Kevin Wayne, https://algs4.cs.princeton.edu/home/ and paper
+ * "Quicksort Is Optimal For Many Equal Keys", Sebastian Wild,
+ * arXiv:1608.04906v4 [cs.DS] 1 Nov 2017.
+ *
+ * In term of that papers, let N - number of tuples, Xi - number of tuples with
+ * key Ki, then estimation is:
+ * log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ * In our case all Xi are the same because now we don't have an estimation of
+ * group sizes, we have only estimation of number of groups. In this case,
+ * formula becomes: N * log(NumberOfGroups). Next, to support correct estimation
+ * of multi-column sort we need separately compute each column, so, let k is a
+ * column number, Gk - number of groups  defined by k columns:
+ * N * sum( Fk * log(Gk) )
+ * Fk is a function costs (including width) for k columns.
+ */
+
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+					  Cost comparison_cost, double tuples, double output_tuples,
+					  bool heapSort)
+{
+	Cost		per_tuple_cost = 0.0;
+	ListCell	*lc;
+	List		*pathkeyExprs = NIL;
+	double		tuplesPerPrevGroup = tuples;
+	double		totalFuncCost = 1.0;
+	bool		has_fake_var = false;
+	int			i = 0;
+	Oid			prev_datatype = InvalidOid;
+	Cost		funcCost = 0.;
+	List		*cache_varinfos = NIL;
+
+	/* fallback if pathkeys is unknown */
+	if (list_length(pathkeys) == 0)
+	{
+		/*
+		 * If we'll use a bounded heap-sort keeping just K tuples in memory, for
+		 * a total number of tuple comparisons of N log2 K; but the constant
+		 * factor is a bit higher than for quicksort.  Tweak it so that the
+		 * cost curve is continuous at the crossover point.
+		 */
+		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+		/* add cost provided by caller */
+		per_tuple_cost += comparison_cost;
+
+		return per_tuple_cost * tuples;
+	}
+
+	/*
+	 * Computing total cost of sorting takes into account:
+	 * - per column comparison function cost
+	 * - we try to compute needed number of comparison per column
+	 */
+
+	foreach(lc, pathkeys)
+	{
+		PathKey				*pathkey = (PathKey*)lfirst(lc);
+		EquivalenceMember	*em;
+		double				 nGroups,
+							 correctedNGroups;
+
+		/*
+		 * We believe than equivalence members aren't very  different, so, to
+		 * estimate cost we take just first member
+		 */
+		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+		if (em->em_datatype != InvalidOid)
+		{
+			/* do not lookup funcCost if data type is the same as previous */
+			if (prev_datatype != em->em_datatype)
+			{
+				Oid			sortop;
+				QualCost	cost;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+
+				cost.startup = 0;
+				cost.per_tuple = 0;
+				add_function_cost(root, get_opcode(sortop), NULL, &cost);
+				/* we need procost, not product of procost and cpu_operator_cost */
+				funcCost = cost.per_tuple / cpu_operator_cost;
+				prev_datatype = em->em_datatype;
+			}
+		}
+		else
+			funcCost = 1.0; /* fallback */
+
+		/* Try to take into account actual width fee */
+		funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+		totalFuncCost += funcCost;
+
+		/* remeber if we have a fake var in pathkeys */
+		has_fake_var |= is_fake_var(em->em_expr);
+		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+		/*
+		 * Prevent call estimate_num_groups() with fake Var. Note,
+		 * pathkeyExprs contains only previous columns
+		 */
+		if (has_fake_var == false)
+			/*
+			 * Recursively compute number of group in group from previous step
+			 */
+			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+													  tuplesPerPrevGroup, NULL,
+													  &cache_varinfos,
+													  list_length(pathkeyExprs) - 1);
+		else if (tuples > 4.0)
+			/*
+			 * Use geometric mean as estimation if there is no any stats.
+			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
+			 * column while here we try to estimate number of groups over
+			 * set of columns.
+			 */
+			nGroups = ceil(2.0 + sqrt(tuples) *
+				list_length(pathkeyExprs) / list_length(pathkeys));
+		else
+			nGroups = tuples;
+
+		/*
+		 * Presorted keys aren't participated in comparison but still checked
+		 * by qsort comparator.
+		 */
+		if (i >= nPresortedKeys)
+		{
+			if (heapSort)
+			{
+				if (tuplesPerPrevGroup < output_tuples)
+					/* comparing only inside output_tuples */
+					correctedNGroups =
+						ceil(2.0 * output_tuples / (tuplesPerPrevGroup / nGroups));
+				else
+					/* two groups - in output and out */
+					correctedNGroups = 2.0;
+			}
+			else
+				correctedNGroups = nGroups;
+
+			if (correctedNGroups <= 1.0)
+				correctedNGroups = 2.0;
+			else
+				correctedNGroups = ceil(correctedNGroups);
+			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+		}
+
+		i++;
+
+		/*
+		 * Real-world distribution isn't uniform but now we don't have a way to
+		 * determine that, so, add multiplier to get closer to worst case.
+		 */
+		tuplesPerPrevGroup = ceil(1.5 * tuplesPerPrevGroup / nGroups);
+
+		/*
+		 * We could skip all followed columns for cost estimation, because we
+		 * believe that tuples are unique by set ot previous columns
+		 */
+		if (tuplesPerPrevGroup <= 1.0)
+			break;
+	}
+
+	list_free(pathkeyExprs);
+
+	/* per_tuple_cost is in cpu_operator_cost units */
+	per_tuple_cost *= cpu_operator_cost;
+
+	/*
+	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E.
+	 * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation
+	 * formula has additional term proportional to number of tuples (See Chapter
+	 * 8.2 and Theorem 4.1). It has meaning with low number of tuples,
+	 * approximately less that 1e4. Of course, it could be implemented as
+	 * additional multiplier under logarithm, but use more complicated formula
+	 * which takes into account number of unique tuples and it isn't clear how
+	 * to combine multiplier with groups. Estimate it as 10 in cpu_operator_cost
+	 * unit.
+	 */
+	per_tuple_cost += 10 * cpu_operator_cost;
+
+	per_tuple_cost += comparison_cost;
+
+	return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+				   double tuples)
+{
+	return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+								0, tuples, tuples, false);
+}
+
 /*
  * cost_tuplesort
  *	  Determines and returns the cost of sorting a relation using tuplesort,
@@ -1767,7 +2050,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * number of initial runs formed and M is the merge order used by tuplesort.c.
  * Since the average initial run should be about sort_mem, we have
  *		disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *		cpu = comparison_cost * t * log2(t)
+ * 		and cpu cost (computed by compute_cpu_sort_cost()).
  *
  * If the sort is bounded (i.e., only the first k result tuples are needed)
  * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1786,9 +2069,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * 'comparison_cost' is the extra cost per comparison, if any
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later.
  */
 static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
 			   double limit_tuples)
@@ -1805,9 +2090,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
-
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
 	{
@@ -1831,12 +2113,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 		double		log_runs;
 		double		npageaccesses;
 
-		/*
-		 * CPU costs
-		 *
-		 * Assume about N log2 N comparisons
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		/* CPU costs */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 
 		/* Disk costs */
 
@@ -1852,18 +2132,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	}
 	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
 	{
-		/*
-		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
-		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		/* We'll use a bounded heap-sort keeping just K tuples in memory. */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  output_tuples, true);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 	}
 
 	/*
@@ -1896,8 +2175,8 @@ cost_incremental_sort(Path *path,
 					  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 					  double limit_tuples)
 {
-	Cost		startup_cost = 0,
-				run_cost = 0,
+	Cost		startup_cost,
+				run_cost,
 				input_run_cost = input_total_cost - input_startup_cost;
 	double		group_tuples,
 				input_groups;
@@ -1981,7 +2260,7 @@ cost_incremental_sort(Path *path,
 	 * pessimistic about incremental sort performance and increase its average
 	 * group size by half.
 	 */
-	cost_tuplesort(&group_startup_cost, &group_run_cost,
+	cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
 				   1.5 * group_tuples, width, comparison_cost, sort_mem,
 				   limit_tuples);
 
@@ -1989,7 +2268,7 @@ cost_incremental_sort(Path *path,
 	 * Startup cost of incremental sort is the startup cost of its first group
 	 * plus the cost of its input.
 	 */
-	startup_cost += group_startup_cost
+	startup_cost = group_startup_cost
 		+ input_startup_cost + group_input_run_cost;
 
 	/*
@@ -1998,7 +2277,7 @@ cost_incremental_sort(Path *path,
 	 * group, plus the total cost to process the remaining groups, plus the
 	 * remaining cost of input.
 	 */
-	run_cost += group_run_cost
+	run_cost = group_run_cost
 		+ (group_run_cost + group_startup_cost) * (input_groups - 1)
 		+ group_input_run_cost * (input_groups - 1);
 
@@ -2038,7 +2317,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	Cost		startup_cost;
 	Cost		run_cost;
 
-	cost_tuplesort(&startup_cost, &run_cost,
+	cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
 				   limit_tuples);
@@ -2136,7 +2415,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2204,7 +2483,7 @@ cost_append(AppendPath *apath)
 					 * any child.
 					 */
 					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
+							  root,
 							  pathkeys,
 							  subpath->total_cost,
 							  subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 0188c1e9a1..9e3e02a0a8 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -680,7 +680,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index bd9a176d7d..7beb32488a 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,16 +17,19 @@
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
 #include "access/stratnum.h"
 #include "catalog/pg_opfamily.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -334,6 +337,312 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/************************<DEBUG PART>*************************************/
+bool debug_group_by_reorder_by_pathkeys = true;
+bool debug_group_by_match_order_by = true;
+bool debug_cheapest_group_by = true;
+/************************</DEBUG PART>************************************/
+
+/*
+ * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function
+ * returns new lists,  original GROUP BY lists stay untouched.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List		*new_group_pathkeys= NIL,
+				*new_group_clauses = NIL;
+	ListCell	*key;
+	int			n;
+
+	if (debug_group_by_reorder_by_pathkeys == false)
+		return 0;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * For each pathkey it tries to find corresponding GROUP BY pathkey and
+	 * clause.
+	 */
+	foreach(key, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(key);
+		SortGroupClause	*sgc;
+
+		/*
+		 * Pathkey should use the same allocated struct, so, equiality of
+		 * pointers is enough
+		 */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	n = list_length(new_group_pathkeys);
+
+	/*
+	 * Just append the rest of pathkeys and clauses
+	 */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+												 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+typedef struct MutatorState {
+	List		*elemsList;
+	ListCell	**elemCells;
+	void		**elems;
+	int			*positions;
+	int			 mutatorNColumns;
+	int			 count;
+} MutatorState;
+
+static void
+initMutator(MutatorState *state, List *elems, int start, int end)
+{
+	int i;
+	int			n = end - start;
+	ListCell	*lc;
+
+	memset(state, 0, sizeof(*state));
+
+	state->mutatorNColumns = n;
+
+	state->elemsList = list_copy(elems);
+
+	state->elems = palloc(sizeof(void*) * n);
+	state->elemCells = palloc(sizeof(ListCell*) * n);
+	state->positions = palloc(sizeof(int) * n);
+
+	i = 0;
+	for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+	{
+		state->elemCells[i] = lc;
+		state->elems[i] = lfirst(lc);
+		state->positions[i] = i + 1;
+		i++;
+		if (i >= n)
+			break;
+	}
+}
+
+static void
+swap(int *a, int i, int j)
+{
+  int s = a[i];
+
+  a[i] = a[j];
+  a[j] = s;
+}
+
+static bool
+getNextSet(int *a, int n)
+{
+	int j, k, l, r;
+
+	j = n - 2;
+	while (j >= 0 && a[j] >= a[j + 1])
+		j--;
+	if (j < 0)
+		return false;
+
+	k = n - 1;
+	while (k >= 0 && a[j] >= a[k])
+		k--;
+	swap(a, j, k);
+
+	l = j + 1;
+	r = n - 1;
+	while (l < r)
+		swap(a, l++, r--);
+
+
+	return true;
+}
+
+static List*
+doMutator(MutatorState *state)
+{
+	int	i;
+
+	state->count++;
+
+	/* first set is original set */
+	if (state->count == 1)
+		return state->elemsList;
+
+	if (getNextSet(state->positions, state->mutatorNColumns) == false)
+	{
+		pfree(state->elems);
+		pfree(state->elemCells);
+		pfree(state->positions);
+		list_free(state->elemsList);
+
+		return NIL;
+	}
+
+	for(i=0; i<state->mutatorNColumns; i++)
+		lfirst(state->elemCells[i]) =
+			(void*) state->elems[ state->positions[i] - 1 ];
+
+	return state->elemsList;
+}
+
+typedef struct {
+	Cost cost;
+	PathKey *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+	const PathkeySortCost *a = (PathkeySortCost *) _a;
+	const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+	if (a->cost < b->cost)
+		return -1;
+	else if (a->cost == b->cost)
+		return 0;
+	return 1;
+}
+/*
+ * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
+ * speedup sorting. Returns newly allocated lists, old ones stay untouched.
+ * n_preordered defines a head of list which order should be prevented.
+ */
+void
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	List		   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL,
+				   *var_group_pathkeys;
+
+	ListCell	   *cell;
+	MutatorState	mstate;
+	double			cheapest_sort_cost = -1.0;
+
+	int nFreeKeys;
+	int nToPermute;
+	int i;
+
+	if (list_length(*group_pathkeys) - n_preordered < 2)
+		return; /* nothing to do */
+
+	/*
+	 * Will try to match ORDER BY pathkeys in hope that one sort is cheaper than
+	 * two
+	 */
+	if (debug_group_by_match_order_by &&
+		n_preordered == 0 && root->sort_pathkeys)
+	{
+		bool _save_debug_group_by_reorder_by_pathkeys =
+			debug_group_by_reorder_by_pathkeys;  /* DEBUG ONLY, to be removed */
+
+		debug_group_by_reorder_by_pathkeys = true;
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  group_pathkeys,
+													  group_clauses);
+		debug_group_by_reorder_by_pathkeys = _save_debug_group_by_reorder_by_pathkeys;
+
+		if (list_length(*group_pathkeys) - n_preordered < 2)
+			return; /* nothing to do */
+	}
+
+	/*
+	 * Try all permutations of at most 4 cheapeast pathkeys.
+	 */
+	nFreeKeys = list_length(*group_pathkeys) - n_preordered;
+	nToPermute = 4;
+	if (nFreeKeys > nToPermute)
+	{
+		/*
+		 * Sort the remaining pathkeys cheapest first.
+		 */
+		PathkeySortCost *costs = palloc(sizeof(*costs) * nFreeKeys);
+		cell = list_nth_cell(*group_pathkeys, n_preordered);
+		for (i = 0; cell != NULL; i++, (cell = lnext(*group_pathkeys, cell)))
+		{
+			List *to_cost = list_make1(lfirst(cell));
+
+			Assert(i < nFreeKeys);
+
+			costs[i].pathkey = lfirst(cell);
+			costs[i].cost = cost_sort_estimate(root, to_cost, 0, nrows);
+
+			pfree(to_cost);
+		}
+		qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+		/* Construct the sorted list. First, the preordered pathkeys. */
+		new_group_pathkeys = list_truncate(list_copy(*group_pathkeys), n_preordered);
+
+		/* The rest, ordered by increasing cost */
+		for (i = 0; i < nFreeKeys; i++)
+			new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+		pfree(costs);
+	}
+	else
+	{
+		/*
+		 * Since v13 list_free() can clean list elements so for original list not to be modified it should be copied to
+		 * a new one which can then be cleaned safely if needed.
+		 */
+		new_group_pathkeys = list_copy(*group_pathkeys);
+		nToPermute = nFreeKeys;
+	}
+
+	if (!debug_cheapest_group_by)
+		return;
+
+	initMutator(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
+
+	while((var_group_pathkeys = doMutator(&mstate)) != NIL)
+	{
+		Cost	cost;
+
+		cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+		if (cost < cheapest_sort_cost || cheapest_sort_cost < 0)
+		{
+			list_free(new_group_pathkeys);
+			new_group_pathkeys = list_copy(var_group_pathkeys);
+			cheapest_sort_cost = cost;
+		}
+	}
+
+	/*
+	 * repeat order of pathkeys for clauses
+	 */
+	foreach(cell, new_group_pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		new_group_clauses = lappend(new_group_clauses,
+						get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+												*group_clauses));
+	}
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+											   *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1862,6 +2171,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered, so we don't bother about actual order in
+ * pathkeys
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	if (root->group_pathkeys == NIL)
+		return 0;				/* no special ordering requested */
+
+	if (pathkeys == NIL)
+		return 0;				/* unordered path */
+
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1876,6 +2218,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1911,6 +2256,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 545b56bcaf..6e82d5ddac 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6517,21 +6517,49 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
 			bool		is_sorted;
-			int			presorted_keys;
+			int			presorted_keys = 0;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups = 0;
+
+			if (parse->groupingSets)
+			{
+				/*
+				 * prevent group pathkey reordering, just check the same
+				 * order paths pathkeys and group pathkeys
+				 */
+				is_sorted = pathkeys_count_contained_in(group_pathkeys,
+												  path->pathkeys,
+												  &presorted_keys);
+			}
+			else
+			{
+				n_preordered_groups =
+						group_keys_reorder_by_pathkeys(path->pathkeys,
+													   &group_pathkeys,
+													   &group_clauses);
+				is_sorted = (n_preordered_groups == list_length(group_pathkeys));
+			}
 
 			if (path == cheapest_path || is_sorted)
 			{
 				/* Sort the cheapest-total path if it isn't already sorted */
 				if (!is_sorted)
+				{
+					if (!parse->groupingSets)
+						get_cheapest_group_keys_order(root,
+													  path->rows,
+													  &group_pathkeys,
+													  &group_clauses,
+													  n_preordered_groups);
+
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6551,14 +6579,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
 				}
-				else if (parse->groupClause)
+				else if (group_clauses)
 				{
 					/*
 					 * We have GROUP BY without aggregation or grouping sets.
@@ -6568,7 +6596,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6664,11 +6692,18 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				Path	   *path = (Path *) lfirst(lc);
 				Path	   *path_original = path;
 				bool		is_sorted;
-				int			presorted_keys;
+				int			presorted_keys = 0;
+				List	   *group_pathkeys = root->group_pathkeys,
+						   *group_clauses = parse->groupClause;
+				int			n_preordered_groups;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+				is_sorted = pathkeys_count_contained_in(group_pathkeys,
 														path->pathkeys,
 														&presorted_keys);
+				n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																	 &group_pathkeys,
+																	 &group_clauses);
+				is_sorted = is_sorted || (n_preordered_groups == list_length(group_pathkeys));
 
 				/*
 				 * Insert a Sort node, if required.  But there's no point in
@@ -6678,10 +6713,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				{
 					if (path != partially_grouped_rel->cheapest_total_path)
 						continue;
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
 				}
 
@@ -6691,9 +6731,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6702,7 +6742,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   havingQual,
 											   dNumGroups));
 
@@ -6959,18 +6999,32 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			Path	   *path = (Path *) lfirst(lc);
 			bool		is_sorted;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
+
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = (n_preordered_groups == list_length(group_pathkeys)) ||
+				pathkeys_contained_in(group_pathkeys, path->pathkeys);
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
 			if (path == cheapest_total_path || is_sorted)
 			{
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_path(partially_grouped_rel, (Path *)
@@ -6978,9 +7032,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 											 partially_grouped_rel,
 											 path,
 											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 group_clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
+											 group_clauses,
 											 NIL,
 											 agg_partial_costs,
 											 dNumPartialGroups));
@@ -6989,7 +7043,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 							 create_group_path(root,
 											   partially_grouped_rel,
 											   path,
-											   parse->groupClause,
+											   group_clauses,
 											   NIL,
 											   dNumPartialGroups));
 			}
@@ -7062,21 +7116,37 @@ create_partial_grouping_paths(PlannerInfo *root,
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
 			bool		is_sorted;
-			int			presorted_keys;
+			int			presorted_keys = 0;
+			List	   *group_pathkeys = root->group_pathkeys,
+					   *group_clauses = parse->groupClause;
+			int			n_preordered_groups;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+			is_sorted = pathkeys_count_contained_in(group_pathkeys,
 													path->pathkeys,
 													&presorted_keys);
 
+			n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys,
+																 &group_pathkeys,
+																 &group_clauses);
+			is_sorted = is_sorted || (n_preordered_groups == list_length(group_pathkeys));
+
 			if (path == cheapest_partial_path || is_sorted)
 			{
+
 				/* Sort the cheapest partial path, if it isn't already */
 				if (!is_sorted)
+				{
+					get_cheapest_group_keys_order(root,
+												  path->rows,
+												  &group_pathkeys,
+												  &group_clauses,
+												  n_preordered_groups);
 					path = (Path *) create_sort_path(root,
 													 partially_grouped_rel,
 													 path,
-													 root->group_pathkeys,
+													 group_pathkeys,
 													 -1.0);
+				}
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
@@ -7084,9 +7154,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 group_clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 group_clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -7095,7 +7165,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   group_clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 69b83071cf..0bfb638abc 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1342,7 +1342,7 @@ create_append_path(PlannerInfo *root,
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 52314d3aa1..80fc597630 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3292,7 +3292,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 }
 
 /*
- * estimate_num_groups		- Estimate number of groups in a grouped query
+ * estimate_num_groups/estimate_num_groups_incremental
+ *		- Estimate number of groups in a grouped query.
+ *		  _incremental variant is performance optimization for
+ *		  case of adding one-by-one column
  *
  * Given a query having a GROUP BY clause, estimate how many groups there
  * will be --- ie, the number of distinct combinations of the GROUP BY
@@ -3360,11 +3363,20 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, NULL, 0);
+}
+
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows,
+					List **pgset, List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i, j;
 
 	/*
 	 * We don't ever want to return an estimate of zero groups, as that tends
@@ -3391,7 +3403,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3400,6 +3412,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++; /* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3461,7 +3481,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3478,6 +3502,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 3fd1a5fbe2..c6d8c37d74 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2049,6 +2049,35 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+/************************<DEBUG OPT GROUP BY>*********************************/
+	{
+		{"debug_group_by_reorder_by_pathkeys", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reorder GROUP BY by pathkeys"),
+			NULL
+		},
+		&debug_group_by_reorder_by_pathkeys,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_group_by_match_order_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable matching GROUP BY by ORDER BY."),
+			NULL
+		},
+		&debug_group_by_match_order_by,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("find a cheapest order of columns in GROUP BY."),
+			NULL
+		},
+		&debug_cheapest_group_by,
+		true,
+		NULL, NULL, NULL
+	},
+/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 1be93be098..baf6ee79b4 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -110,7 +110,9 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *path);
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+							   int nPresortedKeys, double tuples);
+extern void cost_append(AppendPath *path, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 035d3e1206..a43898c929 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -196,6 +196,19 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
+/************************<DEBUG OPT GROUP BY>*********************************/
+extern bool debug_group_by_reorder_by_pathkeys;
+extern bool debug_group_by_match_order_by;
+extern bool debug_cheapest_group_by;
+/************************</DEBUG OPT GROUP BY>********************************/
+extern void get_cheapest_group_keys_order(PlannerInfo *root,
+										  double nrows,
+										  List **group_pathkeys,
+										  List **group_clauses,
+										  int	n_preordered);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index f9be539602..03e25011b9 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -198,6 +198,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause,
 
 extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset);
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows, List **pgset,
+					List **cache_varinfos, int prevNExprs);
 
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
diff --git a/src/test/modules/unsafe_tests/Makefile b/src/test/modules/unsafe_tests/Makefile
index 3ecf5fcfc5..2cf710eb2c 100644
--- a/src/test/modules/unsafe_tests/Makefile
+++ b/src/test/modules/unsafe_tests/Makefile
@@ -1,6 +1,6 @@
 # src/test/modules/unsafe_tests/Makefile
 
-REGRESS = rolenames alter_system_table
+REGRESS = alter_system_table
 
 ifdef USE_PGXS
 PG_CONFIG = pg_config
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 2c818d9253..a970ef45a1 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1200,7 +1200,8 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+   Group Key: $0, $1
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1223,10 +1224,8 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+   ->  Result
+(25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -2408,6 +2407,237 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, d, c, v
+   ->  Sort
+         Sort Key: p, d, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Sort
+         Sort Key: p, v, c
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Sort
+         Sort Key: p, v, c, d
+         ->  Index Scan using btg_p_v_idx on btg
+(5 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 68ca321163..859db97ca4 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1437,7 +1437,7 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 5c7528c029..2e9617351d 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1932,8 +1932,8 @@ USING (name);
 ------+----+----
  bb   | 12 | 13
  cc   | 22 | 23
- dd   |    | 33
  ee   | 42 |   
+ dd   |    | 33
 (4 rows)
 
 -- Cases with non-nullable expressions in subquery results;
@@ -1967,8 +1967,8 @@ NATURAL FULL JOIN
 ------+------+------+------+------
  bb   |   12 |    2 |   13 |    3
  cc   |   22 |    2 |   23 |    3
- dd   |      |      |   33 |    3
  ee   |   42 |    2 |      |     
+ dd   |      |      |   33 |    3
 (4 rows)
 
 SELECT * FROM
@@ -4534,18 +4534,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
@@ -6213,44 +6215,39 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Unique
+   ->  HashAggregate
          Output: j3.id
-         ->  Sort
+         Group Key: j3.id
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(13 rows)
+(11 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Group
+   ->  HashAggregate
          Output: j3.id
          Group Key: j3.id
-         ->  Sort
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(14 rows)
+(11 rows)
 
 drop table j1;
 drop table j2;
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index dfa4b036b5..a08a3825ff 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -952,32 +952,30 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 --------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Gather
-         Workers Planned: 2
-         ->  Parallel Append
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml.a
-                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml.a
-                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_5.a
-                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_5.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_2.a
-                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_2.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(27 rows)
+   ->  Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(25 rows)
 
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
  a  | sum  |  array_agg  | count 
@@ -996,34 +994,32 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 -- Without ORDER BY clause, to test Gather at top-most path
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                                QUERY PLAN                                 
----------------------------------------------------------------------------
- Gather
-   Workers Planned: 2
-   ->  Parallel Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(25 rows)
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Append
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml.a
+         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml.a
+               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_2.a
+         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_2.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_5.a
+         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_5.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(23 rows)
 
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -1379,28 +1375,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
 -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
 EXPLAIN (COSTS OFF)
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize GroupAggregate
+   ->  Finalize HashAggregate
          Group Key: pagg_tab_para.y
          Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather Merge
+         ->  Gather
                Workers Planned: 2
-               ->  Sort
-                     Sort Key: pagg_tab_para.y
-                     ->  Parallel Append
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_1.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_2.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+               ->  Parallel Append
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_1.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_2.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
 
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
  y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 0057f41caa..e3fdfb3e31 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -466,52 +466,41 @@ EXPLAIN (COSTS OFF)
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
   GROUP BY 1, 2 ORDER BY 1, 2;
-                                                   QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Group
    Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Merge Append
+   ->  Sort
          Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Group
-               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
-                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1.a, prt1.b
-                                 ->  Seq Scan on prt1_p1 prt1
-                           ->  Sort
-                                 Sort Key: p2.a, p2.b
-                                 ->  Seq Scan on prt2_p1 p2
-         ->  Group
-               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_1.a, prt1_1.b
-                                 ->  Seq Scan on prt1_p2 prt1_1
-                           ->  Sort
-                                 Sort Key: p2_1.a, p2_1.b
-                                 ->  Seq Scan on prt2_p2 p2_1
-         ->  Group
-               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_2.a, prt1_2.b
-                                 ->  Seq Scan on prt1_p3 prt1_2
-                           ->  Sort
-                                 Sort Key: p2_2.a, p2_2.b
-                                 ->  Seq Scan on prt2_p3 p2_2
-(43 rows)
+         ->  Append
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_1.a, prt1_1.b
+                           ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Sort
+                           Sort Key: p2_1.a, p2_1.b
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_2.a, prt1_2.b
+                           ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Sort
+                           Sort Key: p2_2.a, p2_2.b
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a))
+                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_3.b, prt1_3.a
+                           ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Sort
+                           Sort Key: p2_3.b, p2_3.a
+                           ->  Seq Scan on prt2_p3 p2_3
+(32 rows)
 
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
@@ -1339,7 +1328,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1483,7 +1472,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c, t2.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 75f78db8f5..63b52c065f 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1291,24 +1291,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2;
-                        QUERY PLAN                        
-----------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: (q2 IS NOT NULL)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: (q2 IS NOT NULL)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: (q2 IS NOT NULL)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: (q2 IS NOT NULL)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
@@ -1327,24 +1325,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2;
-                       QUERY PLAN                       
---------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                    QUERY PLAN                    
+--------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: ((- q1) = q2)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: ((- q1) = q2)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: ((- q1) = q2)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: ((- q1) = q2)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index f9579af19a..cb6434b096 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -999,6 +999,105 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index 81429164d4..40e5d30f84 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -213,7 +213,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 
-- 
2.29.2

0002-review-20210309.patchtext/x-patch; charset=UTF-8; name=0002-review-20210309.patchDownload

From 9a43cf0fb74e242f56c9d290c5544743b4b7375d Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Tue, 9 Mar 2021 21:09:14 +0100
Subject: [PATCH 2/2] review

---
 src/backend/optimizer/path/costsize.c | 77 ++++++++++++++++++++-------
 src/backend/optimizer/path/pathkeys.c | 14 ++++-
 2 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1639258aaf..f55a4f20e5 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1755,6 +1755,8 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * is_fake_var
  *		Workaround for generate_append_tlist() which generates fake Vars with
  *		varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
  */
 static bool
 is_fake_var(Expr *expr)
@@ -1828,27 +1830,53 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
  * compute_cpu_sort_cost
  *		compute CPU cost of sort (i.e. in-memory)
  *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, the columns may have very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algoritm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from two sources:
+ *
+ * 1) "Algorithms" (course), Robert Sedgewick, Kevin Wayne [https://algs4.cs.princeton.edu/home/]
+ *
+ * 2) "Quicksort Is Optimal For Many Equal Keys" (paper), Sebastian Wild,
+ * arXiv:1608.04906v4 [cs.DS] 1 Nov 2017. [https://arxiv.org/abs/1608.04906]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of tuples with
+ * key Ki, then the estimate of number of comparisons is:
+ *
+ *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ *
+ * In our case all Xi are the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ *	N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparions on ck is roughly
+ *
+ *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ *	N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column witdth, not just the comparator cost.
+ *
  * NOTE: some callers currently pass NIL for pathkeys because they
  * can't conveniently supply the sort keys.  In this case, it will fallback to
  * simple comparison cost estimate.
- *
- * Estimation algorithm is based on ideas from course Algorithms,
- * Robert Sedgewick, Kevin Wayne, https://algs4.cs.princeton.edu/home/ and paper
- * "Quicksort Is Optimal For Many Equal Keys", Sebastian Wild,
- * arXiv:1608.04906v4 [cs.DS] 1 Nov 2017.
- *
- * In term of that papers, let N - number of tuples, Xi - number of tuples with
- * key Ki, then estimation is:
- * log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
- * In our case all Xi are the same because now we don't have an estimation of
- * group sizes, we have only estimation of number of groups. In this case,
- * formula becomes: N * log(NumberOfGroups). Next, to support correct estimation
- * of multi-column sort we need separately compute each column, so, let k is a
- * column number, Gk - number of groups  defined by k columns:
- * N * sum( Fk * log(Gk) )
- * Fk is a function costs (including width) for k columns.
  */
-
 static Cost
 compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 					  Cost comparison_cost, double tuples, double output_tuples,
@@ -1862,7 +1890,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 	bool		has_fake_var = false;
 	int			i = 0;
 	Oid			prev_datatype = InvalidOid;
-	Cost		funcCost = 0.;
+	Cost		funcCost = 0.0;
 	List		*cache_varinfos = NIL;
 
 	/* fallback if pathkeys is unknown */
@@ -1873,6 +1901,10 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		 * a total number of tuple comparisons of N log2 K; but the constant
 		 * factor is a bit higher than for quicksort.  Tweak it so that the
 		 * cost curve is continuous at the crossover point.
+		 *
+		 * XXX I suppose the "quicksort factor" references to 1.5 at the end
+		 * of this function, but I'm not sure. I suggest we introduce some simple
+		 * constants for that, instead of magic values.
 		 */
 		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
 		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
@@ -1888,7 +1920,6 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 	 * - per column comparison function cost
 	 * - we try to compute needed number of comparison per column
 	 */
-
 	foreach(lc, pathkeys)
 	{
 		PathKey				*pathkey = (PathKey*)lfirst(lc);
@@ -1952,6 +1983,11 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
 			 * column while here we try to estimate number of groups over
 			 * set of columns.
+			 *
+			 * XXX Perhaps this should use DEFAULT_NUM_DISTINCT at least to
+			 * limit the calculated values, somehow?
+			 *
+			 * XXX What's the logic of the following formula?
 			 */
 			nGroups = ceil(2.0 + sqrt(tuples) *
 				list_length(pathkeyExprs) / list_length(pathkeys));
@@ -1968,6 +2004,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 			{
 				if (tuplesPerPrevGroup < output_tuples)
 					/* comparing only inside output_tuples */
+					/* XXX why not to use the same multiplier (1.5)? */
 					correctedNGroups =
 						ceil(2.0 * output_tuples / (tuplesPerPrevGroup / nGroups));
 				else
@@ -1993,7 +2030,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		tuplesPerPrevGroup = ceil(1.5 * tuplesPerPrevGroup / nGroups);
 
 		/*
-		 * We could skip all followed columns for cost estimation, because we
+		 * We could skip all following columns for cost estimation, because we
 		 * believe that tuples are unique by set ot previous columns
 		 */
 		if (tuplesPerPrevGroup <= 1.0)
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 7beb32488a..b092c3e055 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -515,10 +515,19 @@ pathkey_sort_cost_comparator(const void *_a, const void *_b)
 		return 0;
 	return 1;
 }
+
 /*
  * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
  * speedup sorting. Returns newly allocated lists, old ones stay untouched.
  * n_preordered defines a head of list which order should be prevented.
+ *
+ * XXX But we're not generating this only based on uniqueness (that's a bad
+ * term anyway, because we're using ndistinct estimates, not uniqueness).
+ * We're also using the comparator cost to calculate the expected sort cost,
+ * and optimize that.
+ *
+ * XXX This should explain how we generate the values - all permutations for
+ * up to 4 values, etc.
  */
 void
 get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
@@ -597,8 +606,9 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
 	else
 	{
 		/*
-		 * Since v13 list_free() can clean list elements so for original list not to be modified it should be copied to
-		 * a new one which can then be cleaned safely if needed.
+		 * Since v13 list_free() can clean list elements so for original list
+		 * not to be modified it should be copied to a new one which can then
+		 * be cleaned safely if needed.
 		 */
 		new_group_pathkeys = list_copy(*group_pathkeys);
 		nToPermute = nFreeKeys;
-- 
2.29.2

#56

Ibrar Ahmed

ibrar.ahmad@gmail.com

over 4 years ago

In reply to: Tomas Vondra (#55)

Re: POC: GROUP BY optimization

On Wed, Mar 10, 2021 at 4:05 AM Tomas Vondra <tomas.vondra@enterprisedb.com>
wrote:

Hi,

I take a look at the patch today. Attached is the v12 and a separate
patch with some comment tweaks and review comments.

1) I see the patch results in some plan changes in postgres_fdw. I
assume it's somehow related to the sort costing changes, but I find it a
bit suspicious. Why should the plan change from this:

Foreign Scan
Remote SQL: ... ORDER BY ... OFFSET 100 LIMIT 10;

to

Limit
Foreign Scan
Remote SQL: ... ORDER BY ...

But even if this is due to changes to order by costing, postponing the
limit seems like a net loss - the sort happens before the limit, and by
postponing the limit after foreign scan we need to transfer 100 more
rows. So what's causing this?

The difference in plan cost seems fairly substantial (old: 196.52, new:
241.94). But maybe that's OK and expected.

2) I wonder how much more expensive (in terms of CPU) is the new sort
costing code. It's iterative, and it's calling functions to calculate
number of groups etc. I wonder what's the impact simple queries?

3) It's not clear to me why we need "fake var" protection? Why would the
estimate_num_groups() fail for that?

4) In one of the places a comment says DEFAULT_NUM_DISTINCT is not used
because we're dealing with multiple columns. That makes sense, but maybe
we should use DEFAULT_NUM_DISTINCT at least to limit the range. That is,
with N columns we should restrict the nGroups estimate by:

min = DEFAULT_NUM_DISTINCT
max = Min(pow(DEFAULT_NUM_DISTINCT, ncolumns), tuples)

Also, it's not clear what's the logic behind the formula:

nGroups = ceil(2.0 + sqrt(tuples) *
list_length(pathkeyExprs) / list_length(pathkeys));

A comment explaining that would be helpful.

5) The compute_cpu_sort_cost comment includes two references (in a quite
mixed-up way), and then there's another reference in the code. I suggest
we list all of them in the function comment.

6) There's a bunch of magic values (various multipliers etc.). It's not
clear which values are meant to be equal, etc. Let's introduce nicer
constants with reasonable names.

7) The comment at get_cheapest_group_keys_order is a bit misleading,
because it talks about "uniqueness" - but that's not what we're looking
at here (I mean, is the column unique or not). We're looking at number
of distinct values in the column, which is a different thing. Also, it'd
be good to roughly explain what get_cheapest_group_keys_order does - how
it calculates the order (permutations up to 4 values, etc.).

8) The comment at compute_cpu_sort_cost should also explain basics of
the algorithm. I tried doing something like that, but I'm not sure I got
all the details right and it probably needs further improvements.

9) The main concern I have is still about the changes in planner.c, and
I think I've already shared it before. The code takes the grouping
pathkeys, and just reshuffles them to what it believes is a better order
(cheaper, ...). That is, there's on input pathkey list, and it gets
transformed into another pathkey list. The problem is that even if this
optimizes the sort, it may work against some optimization in the upper
part of the plan.

Imagine a query that does something like this:

SELECT a, b, count(*) FROM (
SELECT DISTINCT a, b, c, d FROM x
) GROUP BY a, b;

Now, from the costing perspective, it may be cheaper to do the inner
grouping by "c, d, a, b" for example. But that works directly against
the second grouping, which would benefit from having the input sorted by
"a, b". How exactly would this behaves depends on the number of distinct
values in the columns, how expensive the comparisons are for each
column, and so on. But you get the idea.

I admit I haven't managed to construct a reasonably query that'd be
significantly harmed by this, but I haven't spent a lot of time on it.

I'm pretty sure I used this trick in the past, when doing aggregations
on large data sets, where it was much better to "pipe" the data through
multiple GroupAggregate nodes.

For this reason I think the code generating paths should work a bit like
get_useful_pathkeys_for_relation and generate_useful_gather_paths.

* group_keys_reorder_by_pathkeys should not produce just one list of
pathkeys, but multiple interesting options. At the moment it should
produce at least the input grouping pathkeys (as specified in the
query), and the "optimal" pathkeys (by lowest cost).

* the places calling group_keys_reorder_by_pathkeys should loop on the
result, and generate separate path for each option.

I'd guess in the future we'll "peek forward" in the plan and see if
there are other interesting pathkeys (that's the expectation for
get_useful_pathkeys_for_relation).

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

The patch does not apply successfully, can you please take a look at that

http://cfbot.cputube.org/patch_33_1651.log

=== applying patch ./0001-v12-20210309.patch

atching file src/include/utils/selfuncs.h
Hunk #1 FAILED at 198.
1 out of 1 hunk FAILED -- saving rejects to file
src/include/utils/selfuncs.h.rej

Based on @Tomas Vondra comments, and patch does not apply, I am changing
the status to "Waiting On Author".

Ibrar Ahmed

#57

Tomas Vondra

tomas.vondra@enterprisedb.com

over 4 years ago

In reply to: Ibrar Ahmed (#56)

1 attachment(s)

Re: POC: GROUP BY optimization

Hi,

here is an updated version of this patch, with some significant changes.

The main change that instead of calling get_cheapest_group_keys_order
directly, the planner now calls get_useful_group_keys_orderings and gets
multiple "interesting" pathkey orderings instead of just a single one.
The callers then loop over these orderings and construct paths for all
of them. This idea is similar to get_useful_pathkeys_for_relation()
added by incremental sort.

FWIW this addresses point (9) from my last review - I started with it,
because it was the main thing affecting the overall architecture. The
remaining bits are more "local".

I haven't investigated how expensive those changes are (in terms of
planning overhead), but the number of extra orderings is fairly low, and
I'd expect most of the paths to be eliminated fairly quickly.

I've also added / improved a number of comments etc. but I'm sure more
cleanup is needed.

The other comments from the review still apply - I'm particularly
concerned about the (1) point, i.e. plan changes in postgres_fdw. Those
seem to be rather strange (LIMIT not being pushed down in queries
without any grouping). I'd bet this is due to changes in sort costing
and does not seem very desirable.

regards

[1]: /messages/by-id/22c44f98-bfa8-8630-62b5-5155e11eb284@enterprisedb.com
/messages/by-id/22c44f98-bfa8-8630-62b5-5155e11eb284@enterprisedb.com

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

Attachments:

group-by-reorder-20210720.patchtext/x-patch; charset=UTF-8; name=group-by-reorder-20210720.patchDownload

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index ed25e7a743..6145b99f38 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -1077,13 +1077,15 @@ ANALYZE ft5;
 -- join two tables
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                                                                                       QUERY PLAN                                                                                                       
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                        QUERY PLAN                                                                                        
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.c1, t2.c1, t1.c3
+         Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+         Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+(6 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
  c1  | c1  
@@ -1103,13 +1105,15 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 -- join three tables
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10;
-                                                                                                                                   QUERY PLAN                                                                                                                                    
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                     QUERY PLAN                                                                                                                     
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c2, t3.c3, t1.c3
-   Relations: ((public.ft1 t1) INNER JOIN (public.ft2 t2)) INNER JOIN (public.ft4 t3)
-   Remote SQL: SELECT r1."C 1", r2.c2, r4.c3, r1.c3 FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) INNER JOIN "S 1"."T 3" r4 ON (((r1."C 1" = r4.c1)))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 10::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.c1, t2.c2, t3.c3, t1.c3
+         Relations: ((public.ft1 t1) INNER JOIN (public.ft2 t2)) INNER JOIN (public.ft4 t3)
+         Remote SQL: SELECT r1."C 1", r2.c2, r4.c3, r1.c3 FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) INNER JOIN "S 1"."T 3" r4 ON (((r1."C 1" = r4.c1)))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+(6 rows)
 
 SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10;
  c1 | c2 |   c3   
@@ -1718,13 +1722,33 @@ ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw');
 -- tests whole-row reference for row marks
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1;
-                                                                                                                                                                                                                           QUERY PLAN                                                                                                                                                                                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                               QUERY PLAN                                                                                                                                                                                                                
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR UPDATE OF r1
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1;
  c1  | c1  
@@ -1743,13 +1767,33 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;
-                                                                                                                                                                                                                                    QUERY PLAN                                                                                                                                                                                                                                    
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                                        QUERY PLAN                                                                                                                                                                                                                        
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR UPDATE OF r1 FOR UPDATE OF r2
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1 FOR UPDATE OF r2
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;
  c1  | c1  
@@ -1769,13 +1813,33 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 -- join two tables with FOR SHARE clause
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1;
-                                                                                                                                                                                                                           QUERY PLAN                                                                                                                                                                                                                           
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                               QUERY PLAN                                                                                                                                                                                                               
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR SHARE OF r1
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1;
  c1  | c1  
@@ -1794,13 +1858,33 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
-                                                                                                                                                                                                                                   QUERY PLAN                                                                                                                                                                                                                                   
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                                       QUERY PLAN                                                                                                                                                                                                                       
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint FOR SHARE OF r1 FOR SHARE OF r2
-(4 rows)
+   ->  LockRows
+         Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+         ->  Foreign Scan
+               Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1 FOR SHARE OF r2
+               ->  Result
+                     Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
+                     ->  Sort
+                           Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                           Sort Key: t1.c3, t1.c1
+                           ->  Hash Join
+                                 Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
+                                 Hash Cond: (t1.c1 = t2.c1)
+                                 ->  Foreign Scan on public.ft1 t1
+                                       Output: t1.c1, t1.c3, t1.*
+                                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
+                                 ->  Hash
+                                       Output: t2.c1, t2.*
+                                       ->  Foreign Scan on public.ft2 t2
+                                             Output: t2.c1, t2.*
+                                             Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
+(24 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
  c1  | c1  
@@ -1854,13 +1938,15 @@ WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t
 -- ctid with whole-row reference
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.ctid, t1, t2, t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                                                                                                                                                                                                  QUERY PLAN                                                                                                                                                                                                                   
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                                                                   QUERY PLAN                                                                                                                                                                                                    
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1.ctid, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END, r1."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.ctid, t1.*, t2.*, t1.c1, t1.c3
+         Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+         Remote SQL: SELECT r1.ctid, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END, r1."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
+(6 rows)
 
 -- SEMI JOIN, not pushed down
 EXPLAIN (VERBOSE, COSTS OFF)
@@ -1931,13 +2017,15 @@ SELECT t1.c1 FROM ft1 t1 WHERE NOT EXISTS (SELECT 1 FROM ft2 t2 WHERE t1.c1 = t2
 -- CROSS JOIN can be pushed down
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
-                                                                                           QUERY PLAN                                                                                            
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                            QUERY PLAN                                                                             
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: t1.c1, t2.c1
-   Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-   Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) ORDER BY r1."C 1" ASC NULLS LAST, r2."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 100::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: t1.c1, t2.c1
+         Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+         Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (TRUE)) ORDER BY r1."C 1" ASC NULLS LAST, r2."C 1" ASC NULLS LAST
+(6 rows)
 
 SELECT t1.c1, t2.c1 FROM ft1 t1 CROSS JOIN ft2 t2 ORDER BY t1.c1, t2.c1 OFFSET 100 LIMIT 10;
  c1 | c1  
@@ -2644,16 +2732,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
- Sort
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Foreign Scan
    Output: (count(c2)), c2, 5, 7.0, 9
-   Sort Key: ft1.c2
-   ->  Foreign Scan
-         Output: (count(c2)), c2, 5, 7.0, 9
-         Relations: Aggregate on (public.ft1)
-         Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
-(7 rows)
+   Relations: Aggregate on (public.ft1)
+   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+(4 rows)
 
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
   w  | x | y |  z  
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b54cf34a8e..1e74e10cbc 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1754,6 +1754,326 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 									rterm->pathtarget->width);
 }
 
+/*
+ * is_fake_var
+ *		Workaround for generate_append_tlist() which generates fake Vars with
+ *		varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ *		Returns relative complexity of comparing two values based on it's width.
+ * The idea behind - long values have more expensive comparison. Return value is
+ * in cpu_operator_cost unit.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double	width = -1.0; /* fake value */
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresponding relation */
+	if (IsA(expr, Var))
+	{
+		Var		*var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo	*rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int	ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, use estimation by type */
+	if (width < 0.0)
+	{
+		Node	*node = (Node*) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Any value in pgsql is passed by Datum type, so any operation with value
+	 * could not be cheaper than operation with Datum type
+	 */
+	if (width <= sizeof(Datum))
+		return 1.0;
+
+	/*
+	 * Seems, cost of comparision is not directly proportional to args width,
+	 * because comparing args could be differ width (we known only average over
+	 * column) and difference often could be defined only by looking on first
+	 * bytes. So, use log16(width) as estimation.
+	 */
+	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ *		compute CPU cost of sort (i.e. in-memory)
+ *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, the columns may have very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algoritm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from two sources:
+ *
+ * 1) "Algorithms" (course), Robert Sedgewick, Kevin Wayne [https://algs4.cs.princeton.edu/home/]
+ *
+ * 2) "Quicksort Is Optimal For Many Equal Keys" (paper), Sebastian Wild,
+ * arXiv:1608.04906v4 [cs.DS] 1 Nov 2017. [https://arxiv.org/abs/1608.04906]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of tuples with
+ * key Ki, then the estimate of number of comparisons is:
+ *
+ *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ *
+ * In our case all Xi are the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ *	N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparions on ck is roughly
+ *
+ *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ *	N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column witdth, not just the comparator cost.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys.  In this case, it will fallback to
+ * simple comparison cost estimate.
+ */
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+					  Cost comparison_cost, double tuples, double output_tuples,
+					  bool heapSort)
+{
+	Cost		per_tuple_cost = 0.0;
+	ListCell	*lc;
+	List		*pathkeyExprs = NIL;
+	double		tuplesPerPrevGroup = tuples;
+	double		totalFuncCost = 1.0;
+	bool		has_fake_var = false;
+	int			i = 0;
+	Oid			prev_datatype = InvalidOid;
+	Cost		funcCost = 0.0;
+	List		*cache_varinfos = NIL;
+
+	/* fallback if pathkeys is unknown */
+	if (list_length(pathkeys) == 0)
+	{
+		/*
+		 * If we'll use a bounded heap-sort keeping just K tuples in memory, for
+		 * a total number of tuple comparisons of N log2 K; but the constant
+		 * factor is a bit higher than for quicksort.  Tweak it so that the
+		 * cost curve is continuous at the crossover point.
+		 *
+		 * XXX I suppose the "quicksort factor" references to 1.5 at the end
+		 * of this function, but I'm not sure. I suggest we introduce some simple
+		 * constants for that, instead of magic values.
+		 */
+		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+		/* add cost provided by caller */
+		per_tuple_cost += comparison_cost;
+
+		return per_tuple_cost * tuples;
+	}
+
+	/*
+	 * Computing total cost of sorting takes into account:
+	 * - per column comparison function cost
+	 * - we try to compute needed number of comparison per column
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey				*pathkey = (PathKey*)lfirst(lc);
+		EquivalenceMember	*em;
+		double				 nGroups,
+							 correctedNGroups;
+
+		/*
+		 * We believe than equivalence members aren't very  different, so, to
+		 * estimate cost we take just first member
+		 */
+		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+		if (em->em_datatype != InvalidOid)
+		{
+			/* do not lookup funcCost if data type is the same as previous */
+			if (prev_datatype != em->em_datatype)
+			{
+				Oid			sortop;
+				QualCost	cost;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+
+				cost.startup = 0;
+				cost.per_tuple = 0;
+				add_function_cost(root, get_opcode(sortop), NULL, &cost);
+				/* we need procost, not product of procost and cpu_operator_cost */
+				funcCost = cost.per_tuple / cpu_operator_cost;
+				prev_datatype = em->em_datatype;
+			}
+		}
+		else
+			funcCost = 1.0; /* fallback */
+
+		/* Try to take into account actual width fee */
+		funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+		totalFuncCost += funcCost;
+
+		/* remeber if we have a fake var in pathkeys */
+		has_fake_var |= is_fake_var(em->em_expr);
+		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+		/*
+		 * Prevent call estimate_num_groups() with fake Var. Note,
+		 * pathkeyExprs contains only previous columns
+		 */
+		if (has_fake_var == false)
+			/*
+			 * Recursively compute number of group in group from previous step
+			 */
+			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+													  tuplesPerPrevGroup, NULL, NULL,
+													  &cache_varinfos,
+													  list_length(pathkeyExprs) - 1);
+		else if (tuples > 4.0)
+			/*
+			 * Use geometric mean as estimation if there is no any stats.
+			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
+			 * column while here we try to estimate number of groups over
+			 * set of columns.
+			 *
+			 * XXX Perhaps this should use DEFAULT_NUM_DISTINCT at least to
+			 * limit the calculated values, somehow?
+			 *
+			 * XXX What's the logic of the following formula?
+			 */
+			nGroups = ceil(2.0 + sqrt(tuples) *
+				list_length(pathkeyExprs) / list_length(pathkeys));
+		else
+			nGroups = tuples;
+
+		/*
+		 * Presorted keys aren't participated in comparison but still checked
+		 * by qsort comparator.
+		 */
+		if (i >= nPresortedKeys)
+		{
+			if (heapSort)
+			{
+				if (tuplesPerPrevGroup < output_tuples)
+					/* comparing only inside output_tuples */
+					/* XXX why not to use the same multiplier (1.5)? */
+					correctedNGroups =
+						ceil(2.0 * output_tuples / (tuplesPerPrevGroup / nGroups));
+				else
+					/* two groups - in output and out */
+					correctedNGroups = 2.0;
+			}
+			else
+				correctedNGroups = nGroups;
+
+			if (correctedNGroups <= 1.0)
+				correctedNGroups = 2.0;
+			else
+				correctedNGroups = ceil(correctedNGroups);
+			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+		}
+
+		i++;
+
+		/*
+		 * Real-world distribution isn't uniform but now we don't have a way to
+		 * determine that, so, add multiplier to get closer to worst case.
+		 */
+		tuplesPerPrevGroup = ceil(1.5 * tuplesPerPrevGroup / nGroups);
+
+		/*
+		 * We could skip all following columns for cost estimation, because we
+		 * believe that tuples are unique by set ot previous columns
+		 */
+		if (tuplesPerPrevGroup <= 1.0)
+			break;
+	}
+
+	list_free(pathkeyExprs);
+
+	/* per_tuple_cost is in cpu_operator_cost units */
+	per_tuple_cost *= cpu_operator_cost;
+
+	/*
+	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E.
+	 * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation
+	 * formula has additional term proportional to number of tuples (See Chapter
+	 * 8.2 and Theorem 4.1). It has meaning with low number of tuples,
+	 * approximately less that 1e4. Of course, it could be implemented as
+	 * additional multiplier under logarithm, but use more complicated formula
+	 * which takes into account number of unique tuples and it isn't clear how
+	 * to combine multiplier with groups. Estimate it as 10 in cpu_operator_cost
+	 * unit.
+	 */
+	per_tuple_cost += 10 * cpu_operator_cost;
+
+	per_tuple_cost += comparison_cost;
+
+	return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+				   double tuples)
+{
+	return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+								0, tuples, tuples, false);
+}
+
 /*
  * cost_tuplesort
  *	  Determines and returns the cost of sorting a relation using tuplesort,
@@ -1770,7 +2090,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * number of initial runs formed and M is the merge order used by tuplesort.c.
  * Since the average initial run should be about sort_mem, we have
  *		disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *		cpu = comparison_cost * t * log2(t)
+ * 		and cpu cost (computed by compute_cpu_sort_cost()).
  *
  * If the sort is bounded (i.e., only the first k result tuples are needed)
  * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1789,9 +2109,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * 'comparison_cost' is the extra cost per comparison, if any
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later.
  */
 static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
 			   double limit_tuples)
@@ -1808,9 +2130,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
-
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
 	{
@@ -1834,12 +2153,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 		double		log_runs;
 		double		npageaccesses;
 
-		/*
-		 * CPU costs
-		 *
-		 * Assume about N log2 N comparisons
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		/* CPU costs */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 
 		/* Disk costs */
 
@@ -1855,18 +2172,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	}
 	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
 	{
-		/*
-		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
-		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		/* We'll use a bounded heap-sort keeping just K tuples in memory. */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  output_tuples, true);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 	}
 
 	/*
@@ -1899,8 +2215,8 @@ cost_incremental_sort(Path *path,
 					  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 					  double limit_tuples)
 {
-	Cost		startup_cost = 0,
-				run_cost = 0,
+	Cost		startup_cost,
+				run_cost,
 				input_run_cost = input_total_cost - input_startup_cost;
 	double		group_tuples,
 				input_groups;
@@ -1985,7 +2301,7 @@ cost_incremental_sort(Path *path,
 	 * pessimistic about incremental sort performance and increase its average
 	 * group size by half.
 	 */
-	cost_tuplesort(&group_startup_cost, &group_run_cost,
+	cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
 				   1.5 * group_tuples, width, comparison_cost, sort_mem,
 				   limit_tuples);
 
@@ -1993,7 +2309,7 @@ cost_incremental_sort(Path *path,
 	 * Startup cost of incremental sort is the startup cost of its first group
 	 * plus the cost of its input.
 	 */
-	startup_cost += group_startup_cost
+	startup_cost = group_startup_cost
 		+ input_startup_cost + group_input_run_cost;
 
 	/*
@@ -2002,7 +2318,7 @@ cost_incremental_sort(Path *path,
 	 * group, plus the total cost to process the remaining groups, plus the
 	 * remaining cost of input.
 	 */
-	run_cost += group_run_cost
+	run_cost = group_run_cost
 		+ (group_run_cost + group_startup_cost) * (input_groups - 1)
 		+ group_input_run_cost * (input_groups - 1);
 
@@ -2042,7 +2358,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	Cost		startup_cost;
 	Cost		run_cost;
 
-	cost_tuplesort(&startup_cost, &run_cost,
+	cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
 				   limit_tuples);
@@ -2140,7 +2456,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2208,7 +2524,7 @@ cost_append(AppendPath *apath)
 					 * any child.
 					 */
 					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
+							  root,
 							  pathkeys,
 							  subpath->total_cost,
 							  subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 6f1abbe47d..899da5b109 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -681,7 +681,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index bd9a176d7d..f3fa743679 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,16 +17,19 @@
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
 #include "access/stratnum.h"
 #include "catalog/pg_opfamily.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -334,6 +337,526 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/************************<DEBUG PART>*************************************/
+bool debug_group_by_reorder_by_pathkeys = true;
+bool debug_group_by_match_order_by = true;
+bool debug_cheapest_group_by = true;
+/************************</DEBUG PART>************************************/
+
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List	   *new_group_pathkeys= NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (debug_group_by_reorder_by_pathkeys == false)
+		return 0;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append if to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find first pathkey without a matching GROUP BY key, the rest of
+	 * the pathkeys is useless and can't be used to evaluate the grouping, so
+	 * we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	*sgc;
+
+		/* abort on first mismatch */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* XXX maybe return when (n == 0) */
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * Used to generate all permutations of a pathkey list.
+ */
+typedef struct PathkeyMutatorState {
+	List	   *elemsList;
+	ListCell  **elemCells;
+	void	  **elems;
+	int		   *positions;
+	int			 mutatorNColumns;
+	int			 count;
+} PathkeyMutatorState;
+
+
+/*
+ * PathkeyMutatorInit
+ *		Initialize state of the permutation generator.
+ *
+ * We want to generate permutations of elements in the "elems" list. We may want
+ * to skip some number of elements at the beginning (when treating as presorted)
+ * or at the end (we only permute a limited number of group keys).
+ *
+ * The list is decomposed into elements, and we also keep pointers to individual
+ * cells. This allows us to build the permuted list quickly and cheaply, without
+ * creating any copies.
+ */
+static void
+PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
+{
+	int i;
+	int			n = end - start;
+	ListCell	*lc;
+
+	memset(state, 0, sizeof(*state));
+
+	state->mutatorNColumns = n;
+
+	state->elemsList = list_copy(elems);
+
+	state->elems = palloc(sizeof(void*) * n);
+	state->elemCells = palloc(sizeof(ListCell*) * n);
+	state->positions = palloc(sizeof(int) * n);
+
+	i = 0;
+	for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+	{
+		state->elemCells[i] = lc;
+		state->elems[i] = lfirst(lc);
+		state->positions[i] = i + 1;
+		i++;
+
+		if (i >= n)
+			break;
+	}
+}
+
+/* Swap two elements of an array. */
+static void
+PathkeyMutatorSwap(int *a, int i, int j)
+{
+  int s = a[i];
+
+  a[i] = a[j];
+  a[j] = s;
+}
+
+/*
+ * Generate the next permutation of elements.
+ */
+static bool
+PathkeyMutatorNextSet(int *a, int n)
+{
+	int j, k, l, r;
+
+	j = n - 2;
+
+	while (j >= 0 && a[j] >= a[j + 1])
+		j--;
+
+	if (j < 0)
+		return false;
+
+	k = n - 1;
+
+	while (k >= 0 && a[j] >= a[k])
+		k--;
+
+	PathkeyMutatorSwap(a, j, k);
+
+	l = j + 1;
+	r = n - 1;
+
+	while (l < r)
+		PathkeyMutatorSwap(a, l++, r--);
+
+	return true;
+}
+
+/*
+ * PathkeyMutatorNext
+ *		Generate the next permutation of list of elements.
+ *
+ * Returns the next permutation (as a list of elements) or NIL if there are no
+ * more permutations.
+ */
+static List *
+PathkeyMutatorNext(PathkeyMutatorState *state)
+{
+	int	i;
+
+	state->count++;
+
+	/* first permutation is original list */
+	if (state->count == 1)
+		return state->elemsList;
+
+	/* when there are no more permutations, return NIL */
+	if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
+	{
+		pfree(state->elems);
+		pfree(state->elemCells);
+		pfree(state->positions);
+
+		list_free(state->elemsList);
+
+		return NIL;
+	}
+
+	/* update the list cells to point to the right elements */
+	for(i=0; i<state->mutatorNColumns; i++)
+		lfirst(state->elemCells[i]) =
+			(void *) state->elems[ state->positions[i] - 1 ];
+
+	return state->elemsList;
+}
+
+typedef struct {
+	Cost cost;
+	PathKey *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+	const PathkeySortCost *a = (PathkeySortCost *) _a;
+	const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+	if (a->cost < b->cost)
+		return -1;
+	else if (a->cost == b->cost)
+		return 0;
+	return 1;
+}
+
+/*
+ * get_cheapest_group_keys_order
+ *		Returns the pathkeys in an order cheapest to evaluate.
+ *
+ * Given a list of pathkeys, we try to reorder them in a way that minimizes
+ * the CPU cost of sorting. This depends mainly on the cost of comparator
+ * function (the pathkeys may use different data types) and the number of
+ * distinct values in each column (which affects the number of comparator
+ * calls for the following pathkeys).
+ *
+ * In case the input is partially sorted, only the remaining pathkeys are
+ * considered.
+ *
+ * Returns newly allocated lists. If no reordering is possible (or needed),
+ * the lists are set to NIL.
+ */
+static bool
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	List		   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL,
+				   *var_group_pathkeys;
+
+	ListCell	   *cell;
+	PathkeyMutatorState	mstate;
+	double			cheapest_sort_cost = -1.0;
+
+	int nFreeKeys;
+	int nToPermute;
+
+	/* If this optimization is disabled, we're done. */
+	if (!debug_cheapest_group_by)
+		return false;
+
+	/* If there are less than 2 unsorted pathkeys, we're done. */
+	if (list_length(*group_pathkeys) - n_preordered < 2)
+		return false;
+
+	/*
+	 * We could exhaustively cost all possible orderings of the pathkeys, but for
+	 * large number of pathkeys that might be prohibitively expensive. So we try
+	 * to apply a simple cheap heuristics first - we sort the pathkeys by sort
+	 * cost (as if the pathkey was sorted independently) and then check only the
+	 * four cheapest pathkeys. The remaining pathkeys are kept ordered by cost.
+	 *
+	 * XXX This is a very simple heuristics, and likely to work fine for most
+	 * cases (because number of GROUP BY clauses tends to be lower than 4). But
+	 * it ignores how the number of distinct values in each pathkey affects the
+	 * following sorts. It may be better to use "more expensive" pathkey first
+	 * if it has many distinct values, because it then limits the number of
+	 * comparisons for the remaining pathkeys. But evaluating that is kinda the
+	 * expensive bit we're trying to not do.
+	 */
+	nFreeKeys = list_length(*group_pathkeys) - n_preordered;
+	nToPermute = 4;
+	if (nFreeKeys > nToPermute)
+	{
+		int i;
+		PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
+
+		/* skip the pre-ordered pathkeys */
+		cell = list_nth_cell(*group_pathkeys, n_preordered);
+
+		/* estimate cost for sorting individual pathkeys */
+		for (i = 0; cell != NULL; i++, (cell = lnext(*group_pathkeys, cell)))
+		{
+			List *to_cost = list_make1(lfirst(cell));
+
+			Assert(i < nFreeKeys);
+
+			costs[i].pathkey = lfirst(cell);
+			costs[i].cost = cost_sort_estimate(root, to_cost, 0, nrows);
+
+			pfree(to_cost);
+		}
+
+		/* sort the pathkeys by sort cost in ascending order */
+		qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+		/*
+		 * Rebuild the list of pathkeys - first the preordered ones, then the
+		 * rest ordered by cost.
+		 */
+		new_group_pathkeys = list_truncate(list_copy(*group_pathkeys), n_preordered);
+
+		for (i = 0; i < nFreeKeys; i++)
+			new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+		pfree(costs);
+	}
+	else
+	{
+		/*
+		 * Since v13 list_free() can clean list elements so for original list
+		 * not to be modified it should be copied to a new one which can then
+		 * be cleaned safely if needed.
+		 */
+		new_group_pathkeys = list_copy(*group_pathkeys);
+		nToPermute = nFreeKeys;
+	}
+
+	Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
+
+	/*
+	 * Generate pathkey lists with permutations of the first nToPermute pathkeys.
+	 *
+	 * XXX We simply calculate sort cost for each individual pathkey list, but
+	 * there's room for two dynamic programming optimizations here. Firstly, we
+	 * may pass the current "best" cost to cost_sort_estimate so that it can
+	 * "abort" if the estimated pathkeys list exceeds it. Secondly, it could pass
+	 * return information about the position when it exceeded the cost, and we
+	 * could skip all permutations with the same prefix.
+	 *
+	 * Imagine we've already found ordering with cost C1, and we're evaluating
+	 * another ordering - cost_sort_estimate() calculates cost by adding the
+	 * pathkeys one by one (more or less), and the cost only grows. If at any
+	 * point it exceeds C1, it can't possibly be "better" so we can discard it.
+	 * But we also know that we can discard all ordering with the same prefix,
+	 * because if we're estimating (a,b,c,d) and we exceeded C1 at (a,b) then
+	 * the same thing will happen for any ordering with this prefix.
+	 *
+	 *
+	 */
+	PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
+
+	while((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
+	{
+		Cost	cost;
+
+		cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+		if (cost < cheapest_sort_cost || cheapest_sort_cost < 0)
+		{
+			list_free(new_group_pathkeys);
+			new_group_pathkeys = list_copy(var_group_pathkeys);
+			cheapest_sort_cost = cost;
+		}
+	}
+
+	/* Reorder the group clauses according to the reordered pathkeys. */
+	foreach(cell, new_group_pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		new_group_clauses = lappend(new_group_clauses,
+						get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+												*group_clauses));
+	}
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+											   *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+
+	return true;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each items stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to minimize the sort cost
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reoredered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
+ *   the tail reoredered to minimize the sort cost
+ *
+ * There are other potentially interesting orderings (e.g. it might be best to
+ * match the first ORDER BY key, order the remaining keys differently and then
+ * rely on incremental sort to fix this), but we ignore those for now. To make
+ * this work we'd have to pretty much generate all possible permutations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys,
+								List *group_pathkeys, List *group_clauses)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+	int			n_preordered = 0;
+
+	List *pathkeys = group_pathkeys;
+	List *clauses = group_clauses;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+
+	infos = lappend(infos, info);
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost, ignoring both the
+	 * target ordering (ORDER BY) and ordering of the input path.
+	 */
+	if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered))
+	{
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to match
+	 * as much of the ordering as possible - we get this sort for free (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefinting from incremental sort
+	 * as much as possible.
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (path_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/* reorder the tail to minimize sort cost */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause, but only if set debug_group_by_match_order_by).
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (root->sort_pathkeys && debug_group_by_match_order_by)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to sort_pathkeys.
+		 */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1862,6 +2385,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the odering. The ordering
+ * may not be "complete" and may require incremental sort, but that's fine. So
+ * we simply count prefix pathkeys with a matching group key, and stop once we
+ * find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordeding not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1876,6 +2447,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1911,6 +2485,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1868c4eff4..bead51727f 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6002,24 +6002,124 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
 
-			if (path == cheapest_path || is_sorted)
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest-total path if it isn't already sorted */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_path || is_sorted)
+				{
+					/* Sort the cheapest-total path if it isn't already sorted */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					/* Now decide what to stick atop it */
+					if (parse->groupingSets)
+					{
+						consider_groupingsets_paths(root, grouped_rel,
+													path, true, can_hash,
+													gd, agg_costs, dNumGroups);
+					}
+					else if (parse->hasAggs)
+					{
+						/*
+						 * We have aggregation, possibly with plain GROUP BY. Make
+						 * an AggPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_SIMPLE,
+												 info->clauses,
+												 havingQual,
+												 agg_costs,
+												 dNumGroups));
+					}
+					else if (group_clauses)
+					{
+						/*
+						 * We have GROUP BY without aggregation or grouping sets.
+						 * Make a GroupPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+					}
+					else
+					{
+						/* Other cases should have been handled above */
+						Assert(false);
+					}
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, no point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6031,17 +6131,17 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->hasAggs)
 				{
 					/*
-					 * We have aggregation, possibly with plain GROUP BY. Make
-					 * an AggPath.
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 info->clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6049,14 +6149,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->groupClause)
 				{
 					/*
-					 * We have GROUP BY without aggregation or grouping sets.
-					 * Make a GroupPath.
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6066,79 +6166,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					Assert(false);
 				}
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, no point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_agg_path(root,
-										 grouped_rel,
-										 path,
-										 grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_SIMPLE,
-										 parse->groupClause,
-										 havingQual,
-										 agg_costs,
-										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_group_path(root,
-										   grouped_rel,
-										   path,
-										   parse->groupClause,
-										   havingQual,
-										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
-			}
 		}
 
 		/*
@@ -6149,100 +6176,125 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
 				Path	   *path_original = path;
-				bool		is_sorted;
-				int			presorted_keys;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
+				List	   *pathkey_orderings = NIL;
 
-				/*
-				 * Insert a Sort node, if required.  But there's no point in
-				 * sorting anything but the cheapest path.
-				 */
-				if (!is_sorted)
+				List	   *group_pathkeys = root->group_pathkeys;
+				List	   *group_clauses = parse->groupClause;
+
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys,
+																	group_pathkeys,
+																	group_clauses);
+
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach (lc2, pathkey_orderings)
 				{
-					if (path != partially_grouped_rel->cheapest_total_path)
-						continue;
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				}
+					bool		is_sorted;
+					int			presorted_keys = 0;
+					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* restore the path (we replace it in the loop) */
+					path = path_original;
 
-				/*
-				 * Now we may consider incremental sort on this path, but only
-				 * when the path is not already sorted and when incremental
-				 * sort is enabled.
-				 */
-				if (is_sorted || !enable_incremental_sort)
-					continue;
+					is_sorted = pathkeys_count_contained_in(info->pathkeys,
+															path->pathkeys,
+															&presorted_keys);
 
-				/* Restore the input path (we might have added Sort on top). */
-				path = path_original;
+					/*
+					 * Insert a Sort node, if required.  But there's no point in
+					 * sorting anything but the cheapest path.
+					 */
+					if (!is_sorted)
+					{
+						if (path != partially_grouped_rel->cheapest_total_path)
+							continue;
 
-				/* no shared prefix, not point in building incremental sort */
-				if (presorted_keys == 0)
-					continue;
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
 
-				/*
-				 * We should have already excluded pathkeys of length 1
-				 * because then presorted_keys > 0 would imply is_sorted was
-				 * true.
-				 */
-				Assert(list_length(root->group_pathkeys) != 1);
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
 
-				path = (Path *) create_incremental_sort_path(root,
-															 grouped_rel,
-															 path,
-															 root->group_pathkeys,
-															 presorted_keys,
-															 -1.0);
+					/*
+					 * Now we may consider incremental sort on this path, but only
+					 * when the path is not already sorted and when incremental
+					 * sort is enabled.
+					 */
+					if (is_sorted || !enable_incremental_sort)
+						continue;
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* Restore the input path (we might have added Sort on top). */
+					path = path_original;
+
+					/* no shared prefix, not point in building incremental sort */
+					if (presorted_keys == 0)
+						continue;
+
+					/*
+					 * We should have already excluded pathkeys of length 1
+					 * because then presorted_keys > 0 would imply is_sorted was
+					 * true.
+					 */
+					Assert(list_length(root->group_pathkeys) != 1);
+
+					path = (Path *) create_incremental_sort_path(root,
+																 grouped_rel,
+																 path,
+																 info->pathkeys,
+																 presorted_keys,
+																 -1.0);
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+				}
 			}
 		}
 	}
@@ -6445,41 +6497,71 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
+			Path	   *path_save = path;
+
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
-			if (path == cheapest_total_path || is_sorted)
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(partially_grouped_rel, (Path *)
-							 create_agg_path(root,
-											 partially_grouped_rel,
-											 path,
-											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
-											 NIL,
-											 agg_partial_costs,
-											 dNumPartialGroups));
-				else
-					add_path(partially_grouped_rel, (Path *)
-							 create_group_path(root,
-											   partially_grouped_rel,
-											   path,
-											   parse->groupClause,
-											   NIL,
-											   dNumPartialGroups));
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_total_path || is_sorted)
+				{
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_path(partially_grouped_rel, (Path *)
+								 create_agg_path(root,
+												 partially_grouped_rel,
+												 path,
+												 partially_grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_INITIAL_SERIAL,
+												 info->clauses,
+												 NIL,
+												 agg_partial_costs,
+												 dNumPartialGroups));
+					else
+						add_path(partially_grouped_rel, (Path *)
+								 create_group_path(root,
+												   partially_grouped_rel,
+												   path,
+												   info->clauses,
+												   NIL,
+												   dNumPartialGroups));
+				}
 			}
 		}
 
@@ -6489,6 +6571,8 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 * We can also skip the entire loop when we only have a single-item
 		 * group_pathkeys because then we can't possibly have a presorted
 		 * prefix of the list without having the list be fully sorted.
+		 *
+		 * XXX Shouldn't this also consider the group-key-reordering?
 		 */
 		if (enable_incremental_sort && list_length(root->group_pathkeys) > 1)
 		{
@@ -6547,24 +6631,100 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
 
-			if (path == cheapest_partial_path || is_sorted)
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_original;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_partial_path || is_sorted)
+				{
+
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_agg_path(root,
+														 partially_grouped_rel,
+														 path,
+														 partially_grouped_rel->reltarget,
+														 info->clauses ? AGG_SORTED : AGG_PLAIN,
+														 AGGSPLIT_INITIAL_SERIAL,
+														 info->clauses,
+														 NIL,
+														 agg_partial_costs,
+														 dNumPartialPartialGroups));
+					else
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_group_path(root,
+														   partially_grouped_rel,
+														   path,
+														   info->clauses,
+														   NIL,
+														   dNumPartialPartialGroups));
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, not point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 partially_grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
@@ -6572,9 +6732,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 info->clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 info->clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6583,59 +6743,10 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   info->clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, not point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 partially_grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_agg_path(root,
-												 partially_grouped_rel,
-												 path,
-												 partially_grouped_rel->reltarget,
-												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-												 AGGSPLIT_INITIAL_SERIAL,
-												 parse->groupClause,
-												 NIL,
-												 agg_partial_costs,
-												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_group_path(root,
-												   partially_grouped_rel,
-												   path,
-												   parse->groupClause,
-												   NIL,
-												   dNumPartialPartialGroups));
 		}
 	}
 
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 0c94cbe767..78ec5d05e6 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1342,7 +1342,7 @@ create_append_path(PlannerInfo *root,
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 0c8c05f6c2..5b326364d1 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3294,7 +3294,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 }
 
 /*
- * estimate_num_groups		- Estimate number of groups in a grouped query
+ * estimate_num_groups/estimate_num_groups_incremental
+ *		- Estimate number of groups in a grouped query.
+ *		  _incremental variant is performance optimization for
+ *		  case of adding one-by-one column
  *
  * Given a query having a GROUP BY clause, estimate how many groups there
  * will be --- ie, the number of distinct combinations of the GROUP BY
@@ -3368,11 +3371,22 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset, EstimationInfo *estinfo)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, estinfo,
+										   NULL, 0);
+}
+
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows,
+					List **pgset, EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i, j;
 
 	/* Zero the estinfo output parameter, if non-NULL */
 	if (estinfo != NULL)
@@ -3403,7 +3417,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3412,6 +3426,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++; /* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3481,7 +3503,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3498,6 +3524,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a2e0f8de7e..28d43b14d8 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2116,6 +2116,35 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+/************************<DEBUG OPT GROUP BY>*********************************/
+	{
+		{"debug_group_by_reorder_by_pathkeys", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reorder GROUP BY by pathkeys"),
+			NULL
+		},
+		&debug_group_by_reorder_by_pathkeys,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_group_by_match_order_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable matching GROUP BY by ORDER BY."),
+			NULL
+		},
+		&debug_group_by_match_order_by,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("find a cheapest order of columns in GROUP BY."),
+			NULL
+		},
+		&debug_cheapest_group_by,
+		true,
+		NULL, NULL, NULL
+	},
+/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index f7b009ec43..e0dd3b62b5 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -267,6 +267,7 @@ typedef enum NodeTag
 	T_EquivalenceClass,
 	T_EquivalenceMember,
 	T_PathKey,
+	T_PathKeyInfo,
 	T_PathTarget,
 	T_RestrictInfo,
 	T_IndexClause,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index a692bcfb53..1f823016bc 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1061,6 +1061,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 2113bc82de..a3ff71fa9d 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -112,7 +112,9 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *path);
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+							   int nPresortedKeys, double tuples);
+extern void cost_append(AppendPath *path, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f1d111063c..47540b6c0e 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -203,6 +203,17 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
+/************************<DEBUG OPT GROUP BY>*********************************/
+extern bool debug_group_by_reorder_by_pathkeys;
+extern bool debug_group_by_match_order_by;
+extern bool debug_cheapest_group_by;
+/************************</DEBUG OPT GROUP BY>********************************/
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys,
+											 List *pathkeys, List *clauses);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 9dd444e1ff..e20f45e104 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -214,6 +214,11 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset,
 								  EstimationInfo *estinfo);
 
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows, List **pgset,
+					EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs);
+
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
 									   Selectivity *mcv_freq,
diff --git a/src/test/modules/unsafe_tests/Makefile b/src/test/modules/unsafe_tests/Makefile
index 3ecf5fcfc5..2cf710eb2c 100644
--- a/src/test/modules/unsafe_tests/Makefile
+++ b/src/test/modules/unsafe_tests/Makefile
@@ -1,6 +1,6 @@
 # src/test/modules/unsafe_tests/Makefile
 
-REGRESS = rolenames alter_system_table
+REGRESS = alter_system_table
 
 ifdef USE_PGXS
 PG_CONFIG = pg_config
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 23b112b2af..f8ab04295e 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1200,7 +1200,8 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+   Group Key: $0, $1
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1223,10 +1224,8 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+   ->  Result
+(25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -2408,6 +2407,241 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, d, c, v
+   ->  Sort
+         Sort Key: p, d, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Incremental Sort
+         Sort Key: p, c, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Incremental Sort
+         Sort Key: p, v, c
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, d, v
+   ->  Incremental Sort
+         Sort Key: p, c, d, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Incremental Sort
+         Sort Key: p, v, c, d
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 545e301e48..21c429226f 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1439,7 +1439,7 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index f3589d0dbb..59e68ac0b7 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1984,8 +1984,8 @@ USING (name);
 ------+----+----
  bb   | 12 | 13
  cc   | 22 | 23
- dd   |    | 33
  ee   | 42 |   
+ dd   |    | 33
 (4 rows)
 
 -- Cases with non-nullable expressions in subquery results;
@@ -2019,8 +2019,8 @@ NATURAL FULL JOIN
 ------+------+------+------+------
  bb   |   12 |    2 |   13 |    3
  cc   |   22 |    2 |   23 |    3
- dd   |      |      |   33 |    3
  ee   |   42 |    2 |      |     
+ dd   |      |      |   33 |    3
 (4 rows)
 
 SELECT * FROM
@@ -4612,18 +4612,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
@@ -6298,44 +6300,39 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Unique
+   ->  HashAggregate
          Output: j3.id
-         ->  Sort
+         Group Key: j3.id
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(13 rows)
+(11 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Group
+   ->  HashAggregate
          Output: j3.id
          Group Key: j3.id
-         ->  Sort
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(14 rows)
+(11 rows)
 
 drop table j1;
 drop table j2;
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index dfa4b036b5..a08a3825ff 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -952,32 +952,30 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 --------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Gather
-         Workers Planned: 2
-         ->  Parallel Append
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml.a
-                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml.a
-                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_5.a
-                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_5.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_2.a
-                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_2.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(27 rows)
+   ->  Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(25 rows)
 
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
  a  | sum  |  array_agg  | count 
@@ -996,34 +994,32 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 -- Without ORDER BY clause, to test Gather at top-most path
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                                QUERY PLAN                                 
----------------------------------------------------------------------------
- Gather
-   Workers Planned: 2
-   ->  Parallel Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(25 rows)
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Append
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml.a
+         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml.a
+               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_2.a
+         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_2.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_5.a
+         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_5.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(23 rows)
 
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -1379,28 +1375,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
 -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
 EXPLAIN (COSTS OFF)
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize GroupAggregate
+   ->  Finalize HashAggregate
          Group Key: pagg_tab_para.y
          Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather Merge
+         ->  Gather
                Workers Planned: 2
-               ->  Sort
-                     Sort Key: pagg_tab_para.y
-                     ->  Parallel Append
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_1.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_2.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+               ->  Parallel Append
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_1.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_2.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
 
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
  y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 27f7525b3e..100ded51b0 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -466,52 +466,41 @@ EXPLAIN (COSTS OFF)
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
   GROUP BY 1, 2 ORDER BY 1, 2;
-                                                   QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Group
    Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Merge Append
+   ->  Sort
          Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Group
-               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
-                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1.a, prt1.b
-                                 ->  Seq Scan on prt1_p1 prt1
-                           ->  Sort
-                                 Sort Key: p2.a, p2.b
-                                 ->  Seq Scan on prt2_p1 p2
-         ->  Group
-               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_1.a, prt1_1.b
-                                 ->  Seq Scan on prt1_p2 prt1_1
-                           ->  Sort
-                                 Sort Key: p2_1.a, p2_1.b
-                                 ->  Seq Scan on prt2_p2 p2_1
-         ->  Group
-               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_2.a, prt1_2.b
-                                 ->  Seq Scan on prt1_p3 prt1_2
-                           ->  Sort
-                                 Sort Key: p2_2.a, p2_2.b
-                                 ->  Seq Scan on prt2_p3 p2_2
-(43 rows)
+         ->  Append
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_1.a, prt1_1.b
+                           ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Sort
+                           Sort Key: p2_1.a, p2_1.b
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_2.a, prt1_2.b
+                           ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Sort
+                           Sort Key: p2_2.a, p2_2.b
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a))
+                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_3.b, prt1_3.a
+                           ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Sort
+                           Sort Key: p2_3.b, p2_3.a
+                           ->  Seq Scan on prt2_p3 p2_3
+(32 rows)
 
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 75f78db8f5..63b52c065f 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1291,24 +1291,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2;
-                        QUERY PLAN                        
-----------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: (q2 IS NOT NULL)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: (q2 IS NOT NULL)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: (q2 IS NOT NULL)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: (q2 IS NOT NULL)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
@@ -1327,24 +1325,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2;
-                       QUERY PLAN                       
---------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                    QUERY PLAN                    
+--------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: ((- q1) = q2)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: ((- q1) = q2)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: ((- q1) = q2)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: ((- q1) = q2)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index ed2d6b3bdf..26fe382364 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -999,6 +999,105 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index d8768a6b54..1de163e039 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -213,7 +213,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;

#58

Tomas Vondra

tomas.vondra@enterprisedb.com

over 4 years ago

In reply to: Tomas Vondra (#57)

Re: POC: GROUP BY optimization

On 7/20/21 3:12 PM, Tomas Vondra wrote:

...

The other comments from the review still apply - I'm particularly
concerned about the (1) point, i.e. plan changes in postgres_fdw. Those
seem to be rather strange (LIMIT not being pushed down in queries
without any grouping). I'd bet this is due to changes in sort costing
and does not seem very desirable.

I did look at this a bit closer, and yes - this very much seems like a
costing issue in the patch. The first query in postgres_fdw that changes
switches from a query with LIMIT pushed to remote

QUERY PLAN
------------------------------------------------------------------------
Foreign Scan (cost=196.29..196.52 rows=10 width=14)
Output: t1.c1, t2.c1, t1.c3
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1
INNER JOIN "S 1"."T 1" r2 ON ... LIMIT ... OFFSET ...
(4 rows)

to the LIMIT executed locally

QUERY PLAN
-------------------------------------------------------------------------
Limit (cost=241.72..241.94 rows=10 width=14)
Output: t1.c1, t2.c1, t1.c3
-> Foreign Scan (cost=239.47..261.97 rows=1000 width=14)
Output: t1.c1, t2.c1, t1.c3
Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1"
r1 INNER JOIN ...
(6 rows)

The FDW code runs explain on both queries - with and without LIMIT
pushed to remote side, to get estimates, and without the patch it gets this:

QUERY PLAN
------------------------------------------------------------------------------
Sort (cost=106.97..109.47 rows=1000 width=14)
Sort Key: r1.c3, r1."C 1"
-> Hash Join (cost=33.50..57.14 rows=1000 width=14)
Hash Cond: (r1."C 1" = r2."C 1")
-> Seq Scan on "T 1" r1 (cost=0.00..21.00 rows=1000 width=10)
-> Hash (cost=21.00..21.00 rows=1000 width=4)
-> Seq Scan on "T 1" r2 (cost=0.00..21.00 rows=1000
width=4)
(7 rows)

QUERY PLAN
------------------------------------------------------------------------------------
Limit (cost=96.29..96.32 rows=10 width=14)
-> Sort (cost=96.04..98.54 rows=1000 width=14)
Sort Key: r1.c3, r1."C 1"
-> Hash Join (cost=33.50..57.14 rows=1000 width=14)
Hash Cond: (r1."C 1" = r2."C 1")
-> Seq Scan on "T 1" r1 (cost=0.00..21.00 rows=1000
width=10)
-> Hash (cost=21.00..21.00 rows=1000 width=4)
-> Seq Scan on "T 1" r2 (cost=0.00..21.00
rows=1000 width=4)
(8 rows)

while with the patch it gets this:

QUERY PLAN
------------------------------------------------------------------------------
Sort (cost=139.47..141.97 rows=1000 width=14)
Sort Key: r1.c3, r1."C 1"
-> Hash Join (cost=33.50..57.14 rows=1000 width=14)
Hash Cond: (r1."C 1" = r2."C 1")
-> Seq Scan on "T 1" r1 (cost=0.00..21.00 rows=1000 width=10)
-> Hash (cost=21.00..21.00 rows=1000 width=4)
-> Seq Scan on "T 1" r2 (cost=0.00..21.00 rows=1000
width=4)
(7 rows)

QUERY PLAN
------------------------------------------------------------------------------------
Limit (cost=145.75..145.77 rows=10 width=14)
-> Sort (cost=145.50..148.00 rows=1000 width=14)
Sort Key: r1.c3, r1."C 1"
-> Hash Join (cost=33.50..57.14 rows=1000 width=14)
Hash Cond: (r1."C 1" = r2."C 1")
-> Seq Scan on "T 1" r1 (cost=0.00..21.00 rows=1000
width=10)
-> Hash (cost=21.00..21.00 rows=1000 width=4)
-> Seq Scan on "T 1" r2 (cost=0.00..21.00
rows=1000 width=4)
(8 rows)

Notice that the costs get "inverted"

master patched
----------------------------------------------
no limit 106.97..109.47 139.47..141.97
limit 96.29.. 96.32 145.75..145.77

so the limit looks a bit more expensive - just enough so that it seems
cheaper to transfer more rows and execute the limit locally.

IMO this looks rather suspicious (or wrong), and compute_cpu_sort_cost
may need some fixes. I wonder why differs from the old code so much ...

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#59

Tomas Vondra

tomas.vondra@enterprisedb.com

over 4 years ago

In reply to: Tomas Vondra (#58)

1 attachment(s)

Re: POC: GROUP BY optimization

Hi,

after a bit more time spent on this, I found that the issue is due to
this chunk of code in

if (heapSort)
{
if (tuplesPerPrevGroup < output_tuples)
/* comparing only inside output_tuples */
correctedNGroups =
ceil(2.0 * output_tuples /
(tuplesPerPrevGroup / nGroups));
else
/* two groups - in output and out */
correctedNGroups = 2.0;
}
else
correctedNGroups = nGroups;

if (correctedNGroups <= 1.0)
correctedNGroups = 2.0;
else
correctedNGroups = ceil(correctedNGroups);
per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);

There's a couple issues, mostly due to differences in handling of cases
with different heapSort flag. A full-sort (no LIMIT clause) we have
heapSort=false, and hence the execution simply jumps to

correctedNGroups = nGroups;

while for LIMIT we do heapSort=true, in which case we also start with

tuplesPerPrevGroup = ntuples;

That is almost certainly greater than output_tuples (=limit+offset), so
the first if condition in calculating correctedNGroups can't possibly be
true, and we simply end with

correctedNGroups = 2.0;

in the first loop. Which seems pretty bogus - why would there be just
two groups? When processing the first expression, it's as if there was
one big "prev group" with all the tuples, so why not to just use nGroups
as it is?

This seems to confuse the costing quite a bit - enough to produce the
"inversed" costs with/without LIMIT, and picking the other plan.

I've simplified the costing a bit, and the attached version actually
undoes all the "suspicious" plan changes in postgres_fdw. It changes one
new plan, but that seems somewhat reasonable, as it pushes sort to the
remote side.

But after looking at the costing function, I have a bunch of additional
comments and questions:

1) I looked at the resources mentioned as sources the formulas came
from, but I've been unable to really match the algorithm to them. The
quicksort paper is particularly "dense", the notation seems to be very
different, and none of the theorems seem like an obvious fit. Would be
good to make the relationship clearer in comments etc.

For the Sedgewick course it's even worse - it's way too extensive to
just point at it and say "We're using ideas from this!" because no one
is going to know which chapter/section to look at. We need to be a bit
more specific about the reference.

2) I'm a bit puzzled by the "heapsort" chunk, actually. How come we need
it now, when we didn't need that before? In a way, the difference in
behavior between heasort and non-heapsort is what triggered the plan
changes ...

FWIW It's quite possible I tweaked the costing incorrectly, but it ends
up choosing the right plans purely by accident.

3) I'm getting a bit skeptical about the various magic coefficients that
are meant to model higher costs with non-uniform distribution. But
consider that we do this, for example:

tuplesPerPrevGroup = ceil(1.5 * tuplesPerPrevGroup / nGroups);

but then in the next loop we call estimate_num_groups_incremental and
pass this "tweaked" tuplesPerPrevGroup value to it. I'm pretty sure this
may have various strange consequences - we'll calculate the nGroups
based on the inflated value, and we'll calculate tuplesPerPrevGroup from
that again - that seems susceptible to "amplification".

We inflate tuplesPerPrevGroup by 50%, which means we'll get a higher
nGroups estimate in the next loop - but not linearly. An then we'll
calculate the inflated tuplesPerPrevGroup and estimated nGroup ...

That seems pretty dubious, with hard to explain behavior, IMO.

If we want to keep applying these coefficients, we need to do that in a
way that does not affect the subsequent loop like this - we might tweak
the per_tuple_cost formula, for example, not tuplesPerPrevGroup.

4) I'm not sure it's actually a good idea to pass tuplesPerPrevGroup to
estimate_num_groups_incremental. In principle yes, if we use "group
size" from the previous step, then the returned value is the number of
new groups after adding the "new" pathkey.

But even if we ignore the issues with amplification mentioned in (3),
there's an issue with non-linear behavior in estimate_num_groups,
because at some point it's calculating

D(N,n,p) = n * (1 - ((N-p)/N)^(N/n))

where N - total rows, p - sample size, n - number of distinct values.
And if we have (N1,n1) and (N2,n2) then the ratio of calculated
estimated (which is pretty much what calculating group size does)

D(N2,n2,p2) / D(N1,n1,p1)

which will differ depending on p1 and p2. And if we're tweaking the
tuplesPerPrevGroup all the time, that's really annoying, as it may make
the groups smaller or larger, which is unpredictable and annoying, and I
wonder if it might go against the idea of penalizing tuplesPerPrevGroup
to some extent.

We could simply use the input "tuples" value here, and then divide the
current and previous estimate to calculate the number of new groups.

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

Attachments:

group-by-reorder-20210722.patchtext/x-patch; charset=UTF-8; name=group-by-reorder-20210722.patchDownload

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index ed25e7a743..341bb7195e 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2644,16 +2644,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
- Sort
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Foreign Scan
    Output: (count(c2)), c2, 5, 7.0, 9
-   Sort Key: ft1.c2
-   ->  Foreign Scan
-         Output: (count(c2)), c2, 5, 7.0, 9
-         Relations: Aggregate on (public.ft1)
-         Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
-(7 rows)
+   Relations: Aggregate on (public.ft1)
+   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+(4 rows)
 
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
   w  | x | y |  z  
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b54cf34a8e..4db5441db4 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1754,6 +1754,325 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 									rterm->pathtarget->width);
 }
 
+/*
+ * is_fake_var
+ *		Workaround for generate_append_tlist() which generates fake Vars with
+ *		varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ *		Returns relative complexity of comparing two values based on it's width.
+ * The idea behind - long values have more expensive comparison. Return value is
+ * in cpu_operator_cost unit.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double	width = -1.0; /* fake value */
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresponding relation */
+	if (IsA(expr, Var))
+	{
+		Var		*var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo	*rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int	ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, use estimation by type */
+	if (width < 0.0)
+	{
+		Node	*node = (Node*) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Any value in pgsql is passed by Datum type, so any operation with value
+	 * could not be cheaper than operation with Datum type
+	 */
+	if (width <= sizeof(Datum))
+		return 1.0;
+
+	/*
+	 * Seems, cost of comparision is not directly proportional to args width,
+	 * because comparing args could be differ width (we known only average over
+	 * column) and difference often could be defined only by looking on first
+	 * bytes. So, use log16(width) as estimation.
+	 */
+	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ *		compute CPU cost of sort (i.e. in-memory)
+ *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, the columns may have very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algoritm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from two sources:
+ *
+ * 1) "Algorithms" (course), Robert Sedgewick, Kevin Wayne [https://algs4.cs.princeton.edu/home/]
+ *
+ * 2) "Quicksort Is Optimal For Many Equal Keys" (paper), Sebastian Wild,
+ * arXiv:1608.04906v4 [cs.DS] 1 Nov 2017. [https://arxiv.org/abs/1608.04906]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of tuples with
+ * key Ki, then the estimate of number of comparisons is:
+ *
+ *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ *
+ * In our case all Xi are the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ *	N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparions on ck is roughly
+ *
+ *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ *	N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column witdth, not just the comparator cost.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys.  In this case, it will fallback to
+ * simple comparison cost estimate.
+ */
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+					  Cost comparison_cost, double tuples, double output_tuples,
+					  bool heapSort)
+{
+	Cost		per_tuple_cost = 0.0;
+	ListCell	*lc;
+	List		*pathkeyExprs = NIL;
+	double		tuplesPerPrevGroup = tuples;
+	double		totalFuncCost = 1.0;
+	bool		has_fake_var = false;
+	int			i = 0;
+	Oid			prev_datatype = InvalidOid;
+	Cost		funcCost = 0.0;
+	List		*cache_varinfos = NIL;
+
+	/* fallback if pathkeys is unknown */
+	if (list_length(pathkeys) == 0)
+	{
+		/*
+		 * If we'll use a bounded heap-sort keeping just K tuples in memory, for
+		 * a total number of tuple comparisons of N log2 K; but the constant
+		 * factor is a bit higher than for quicksort.  Tweak it so that the
+		 * cost curve is continuous at the crossover point.
+		 *
+		 * XXX I suppose the "quicksort factor" references to 1.5 at the end
+		 * of this function, but I'm not sure. I suggest we introduce some simple
+		 * constants for that, instead of magic values.
+		 */
+		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+		/* add cost provided by caller */
+		per_tuple_cost += comparison_cost;
+
+		return per_tuple_cost * tuples;
+	}
+
+	/*
+	 * Computing total cost of sorting takes into account:
+	 * - per column comparison function cost
+	 * - we try to compute needed number of comparison per column
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey				*pathkey = (PathKey*)lfirst(lc);
+		EquivalenceMember	*em;
+		double				 nGroups,
+							 correctedNGroups;
+
+		/*
+		 * We believe than equivalence members aren't very  different, so, to
+		 * estimate cost we take just first member
+		 */
+		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+		if (em->em_datatype != InvalidOid)
+		{
+			/* do not lookup funcCost if data type is the same as previous */
+			if (prev_datatype != em->em_datatype)
+			{
+				Oid			sortop;
+				QualCost	cost;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+
+				cost.startup = 0;
+				cost.per_tuple = 0;
+				add_function_cost(root, get_opcode(sortop), NULL, &cost);
+				/* we need procost, not product of procost and cpu_operator_cost */
+				funcCost = cost.per_tuple / cpu_operator_cost;
+				prev_datatype = em->em_datatype;
+			}
+		}
+		else
+			funcCost = 1.0; /* fallback */
+
+		/* Try to take into account actual width fee */
+		funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+		totalFuncCost += funcCost;
+
+		/* remeber if we have a fake var in pathkeys */
+		has_fake_var |= is_fake_var(em->em_expr);
+		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+		/*
+		 * Prevent call estimate_num_groups() with fake Var. Note,
+		 * pathkeyExprs contains only previous columns
+		 */
+		if (has_fake_var == false)
+			/*
+			 * Recursively compute number of group in group from previous step
+			 */
+			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+													  tuplesPerPrevGroup, NULL, NULL,
+													  &cache_varinfos,
+													  list_length(pathkeyExprs) - 1);
+		else if (tuples > 4.0)
+			/*
+			 * Use geometric mean as estimation if there is no any stats.
+			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
+			 * column while here we try to estimate number of groups over
+			 * set of columns.
+			 *
+			 * XXX Perhaps this should use DEFAULT_NUM_DISTINCT at least to
+			 * limit the calculated values, somehow?
+			 *
+			 * XXX What's the logic of the following formula?
+			 */
+			nGroups = ceil(2.0 + sqrt(tuples) *
+				list_length(pathkeyExprs) / list_length(pathkeys));
+		else
+			nGroups = tuples;
+
+		/*
+		 * Presorted keys aren't participated in comparison but still checked
+		 * by qsort comparator.
+		 */
+		if (i >= nPresortedKeys)
+		{
+			if (heapSort)
+			{
+				double heap_tuples;
+
+				/* have to keep at least one group, and a multiple of group size */
+				heap_tuples = Max(ceil(output_tuples / tuplesPerPrevGroup) * tuplesPerPrevGroup,
+								  tuplesPerPrevGroup);
+
+				/* so how many (whole) groups is that? */
+				correctedNGroups = ceil(heap_tuples / tuplesPerPrevGroup);
+			}
+			else
+				/* all groups in the input */
+				correctedNGroups = nGroups;
+
+			correctedNGroups = Max(1.0, ceil(correctedNGroups));
+
+			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+		}
+
+		i++;
+
+		/*
+		 * Real-world distribution isn't uniform but now we don't have a way to
+		 * determine that, so, add multiplier to get closer to worst case.
+		 */
+		tuplesPerPrevGroup = ceil(1.5 * tuplesPerPrevGroup / nGroups);
+
+		/*
+		 * We could skip all following columns for cost estimation, because we
+		 * believe that tuples are unique by set ot previous columns
+		 */
+		if (tuplesPerPrevGroup <= 1.0)
+			break;
+	}
+
+	list_free(pathkeyExprs);
+
+	/* per_tuple_cost is in cpu_operator_cost units */
+	per_tuple_cost *= cpu_operator_cost;
+
+	/*
+	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E.
+	 * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation
+	 * formula has additional term proportional to number of tuples (See Chapter
+	 * 8.2 and Theorem 4.1). It has meaning with low number of tuples,
+	 * approximately less that 1e4. Of course, it could be implemented as
+	 * additional multiplier under logarithm, but use more complicated formula
+	 * which takes into account number of unique tuples and it isn't clear how
+	 * to combine multiplier with groups. Estimate it as 10 in cpu_operator_cost
+	 * unit.
+	 */
+	per_tuple_cost += 10 * cpu_operator_cost;
+
+	per_tuple_cost += comparison_cost;
+
+	return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+				   double tuples)
+{
+	return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+								0, tuples, tuples, false);
+}
+
 /*
  * cost_tuplesort
  *	  Determines and returns the cost of sorting a relation using tuplesort,
@@ -1770,7 +2089,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * number of initial runs formed and M is the merge order used by tuplesort.c.
  * Since the average initial run should be about sort_mem, we have
  *		disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *		cpu = comparison_cost * t * log2(t)
+ * 		and cpu cost (computed by compute_cpu_sort_cost()).
  *
  * If the sort is bounded (i.e., only the first k result tuples are needed)
  * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1789,9 +2108,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * 'comparison_cost' is the extra cost per comparison, if any
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later.
  */
 static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
 			   double limit_tuples)
@@ -1808,9 +2129,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
-
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
 	{
@@ -1834,12 +2152,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 		double		log_runs;
 		double		npageaccesses;
 
-		/*
-		 * CPU costs
-		 *
-		 * Assume about N log2 N comparisons
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		/* CPU costs */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 
 		/* Disk costs */
 
@@ -1855,18 +2171,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	}
 	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
 	{
-		/*
-		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
-		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		/* We'll use a bounded heap-sort keeping just K tuples in memory. */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  output_tuples, true);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 	}
 
 	/*
@@ -1899,8 +2214,8 @@ cost_incremental_sort(Path *path,
 					  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 					  double limit_tuples)
 {
-	Cost		startup_cost = 0,
-				run_cost = 0,
+	Cost		startup_cost,
+				run_cost,
 				input_run_cost = input_total_cost - input_startup_cost;
 	double		group_tuples,
 				input_groups;
@@ -1985,7 +2300,7 @@ cost_incremental_sort(Path *path,
 	 * pessimistic about incremental sort performance and increase its average
 	 * group size by half.
 	 */
-	cost_tuplesort(&group_startup_cost, &group_run_cost,
+	cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
 				   1.5 * group_tuples, width, comparison_cost, sort_mem,
 				   limit_tuples);
 
@@ -1993,7 +2308,7 @@ cost_incremental_sort(Path *path,
 	 * Startup cost of incremental sort is the startup cost of its first group
 	 * plus the cost of its input.
 	 */
-	startup_cost += group_startup_cost
+	startup_cost = group_startup_cost
 		+ input_startup_cost + group_input_run_cost;
 
 	/*
@@ -2002,7 +2317,7 @@ cost_incremental_sort(Path *path,
 	 * group, plus the total cost to process the remaining groups, plus the
 	 * remaining cost of input.
 	 */
-	run_cost += group_run_cost
+	run_cost = group_run_cost
 		+ (group_run_cost + group_startup_cost) * (input_groups - 1)
 		+ group_input_run_cost * (input_groups - 1);
 
@@ -2042,7 +2357,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	Cost		startup_cost;
 	Cost		run_cost;
 
-	cost_tuplesort(&startup_cost, &run_cost,
+	cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
 				   limit_tuples);
@@ -2140,7 +2455,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2208,7 +2523,7 @@ cost_append(AppendPath *apath)
 					 * any child.
 					 */
 					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
+							  root,
 							  pathkeys,
 							  subpath->total_cost,
 							  subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 6f1abbe47d..899da5b109 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -681,7 +681,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index bd9a176d7d..f3fa743679 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,16 +17,19 @@
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
 #include "access/stratnum.h"
 #include "catalog/pg_opfamily.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -334,6 +337,526 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/************************<DEBUG PART>*************************************/
+bool debug_group_by_reorder_by_pathkeys = true;
+bool debug_group_by_match_order_by = true;
+bool debug_cheapest_group_by = true;
+/************************</DEBUG PART>************************************/
+
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List	   *new_group_pathkeys= NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (debug_group_by_reorder_by_pathkeys == false)
+		return 0;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append if to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find first pathkey without a matching GROUP BY key, the rest of
+	 * the pathkeys is useless and can't be used to evaluate the grouping, so
+	 * we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	*sgc;
+
+		/* abort on first mismatch */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* XXX maybe return when (n == 0) */
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * Used to generate all permutations of a pathkey list.
+ */
+typedef struct PathkeyMutatorState {
+	List	   *elemsList;
+	ListCell  **elemCells;
+	void	  **elems;
+	int		   *positions;
+	int			 mutatorNColumns;
+	int			 count;
+} PathkeyMutatorState;
+
+
+/*
+ * PathkeyMutatorInit
+ *		Initialize state of the permutation generator.
+ *
+ * We want to generate permutations of elements in the "elems" list. We may want
+ * to skip some number of elements at the beginning (when treating as presorted)
+ * or at the end (we only permute a limited number of group keys).
+ *
+ * The list is decomposed into elements, and we also keep pointers to individual
+ * cells. This allows us to build the permuted list quickly and cheaply, without
+ * creating any copies.
+ */
+static void
+PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
+{
+	int i;
+	int			n = end - start;
+	ListCell	*lc;
+
+	memset(state, 0, sizeof(*state));
+
+	state->mutatorNColumns = n;
+
+	state->elemsList = list_copy(elems);
+
+	state->elems = palloc(sizeof(void*) * n);
+	state->elemCells = palloc(sizeof(ListCell*) * n);
+	state->positions = palloc(sizeof(int) * n);
+
+	i = 0;
+	for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+	{
+		state->elemCells[i] = lc;
+		state->elems[i] = lfirst(lc);
+		state->positions[i] = i + 1;
+		i++;
+
+		if (i >= n)
+			break;
+	}
+}
+
+/* Swap two elements of an array. */
+static void
+PathkeyMutatorSwap(int *a, int i, int j)
+{
+  int s = a[i];
+
+  a[i] = a[j];
+  a[j] = s;
+}
+
+/*
+ * Generate the next permutation of elements.
+ */
+static bool
+PathkeyMutatorNextSet(int *a, int n)
+{
+	int j, k, l, r;
+
+	j = n - 2;
+
+	while (j >= 0 && a[j] >= a[j + 1])
+		j--;
+
+	if (j < 0)
+		return false;
+
+	k = n - 1;
+
+	while (k >= 0 && a[j] >= a[k])
+		k--;
+
+	PathkeyMutatorSwap(a, j, k);
+
+	l = j + 1;
+	r = n - 1;
+
+	while (l < r)
+		PathkeyMutatorSwap(a, l++, r--);
+
+	return true;
+}
+
+/*
+ * PathkeyMutatorNext
+ *		Generate the next permutation of list of elements.
+ *
+ * Returns the next permutation (as a list of elements) or NIL if there are no
+ * more permutations.
+ */
+static List *
+PathkeyMutatorNext(PathkeyMutatorState *state)
+{
+	int	i;
+
+	state->count++;
+
+	/* first permutation is original list */
+	if (state->count == 1)
+		return state->elemsList;
+
+	/* when there are no more permutations, return NIL */
+	if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
+	{
+		pfree(state->elems);
+		pfree(state->elemCells);
+		pfree(state->positions);
+
+		list_free(state->elemsList);
+
+		return NIL;
+	}
+
+	/* update the list cells to point to the right elements */
+	for(i=0; i<state->mutatorNColumns; i++)
+		lfirst(state->elemCells[i]) =
+			(void *) state->elems[ state->positions[i] - 1 ];
+
+	return state->elemsList;
+}
+
+typedef struct {
+	Cost cost;
+	PathKey *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+	const PathkeySortCost *a = (PathkeySortCost *) _a;
+	const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+	if (a->cost < b->cost)
+		return -1;
+	else if (a->cost == b->cost)
+		return 0;
+	return 1;
+}
+
+/*
+ * get_cheapest_group_keys_order
+ *		Returns the pathkeys in an order cheapest to evaluate.
+ *
+ * Given a list of pathkeys, we try to reorder them in a way that minimizes
+ * the CPU cost of sorting. This depends mainly on the cost of comparator
+ * function (the pathkeys may use different data types) and the number of
+ * distinct values in each column (which affects the number of comparator
+ * calls for the following pathkeys).
+ *
+ * In case the input is partially sorted, only the remaining pathkeys are
+ * considered.
+ *
+ * Returns newly allocated lists. If no reordering is possible (or needed),
+ * the lists are set to NIL.
+ */
+static bool
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	List		   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL,
+				   *var_group_pathkeys;
+
+	ListCell	   *cell;
+	PathkeyMutatorState	mstate;
+	double			cheapest_sort_cost = -1.0;
+
+	int nFreeKeys;
+	int nToPermute;
+
+	/* If this optimization is disabled, we're done. */
+	if (!debug_cheapest_group_by)
+		return false;
+
+	/* If there are less than 2 unsorted pathkeys, we're done. */
+	if (list_length(*group_pathkeys) - n_preordered < 2)
+		return false;
+
+	/*
+	 * We could exhaustively cost all possible orderings of the pathkeys, but for
+	 * large number of pathkeys that might be prohibitively expensive. So we try
+	 * to apply a simple cheap heuristics first - we sort the pathkeys by sort
+	 * cost (as if the pathkey was sorted independently) and then check only the
+	 * four cheapest pathkeys. The remaining pathkeys are kept ordered by cost.
+	 *
+	 * XXX This is a very simple heuristics, and likely to work fine for most
+	 * cases (because number of GROUP BY clauses tends to be lower than 4). But
+	 * it ignores how the number of distinct values in each pathkey affects the
+	 * following sorts. It may be better to use "more expensive" pathkey first
+	 * if it has many distinct values, because it then limits the number of
+	 * comparisons for the remaining pathkeys. But evaluating that is kinda the
+	 * expensive bit we're trying to not do.
+	 */
+	nFreeKeys = list_length(*group_pathkeys) - n_preordered;
+	nToPermute = 4;
+	if (nFreeKeys > nToPermute)
+	{
+		int i;
+		PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
+
+		/* skip the pre-ordered pathkeys */
+		cell = list_nth_cell(*group_pathkeys, n_preordered);
+
+		/* estimate cost for sorting individual pathkeys */
+		for (i = 0; cell != NULL; i++, (cell = lnext(*group_pathkeys, cell)))
+		{
+			List *to_cost = list_make1(lfirst(cell));
+
+			Assert(i < nFreeKeys);
+
+			costs[i].pathkey = lfirst(cell);
+			costs[i].cost = cost_sort_estimate(root, to_cost, 0, nrows);
+
+			pfree(to_cost);
+		}
+
+		/* sort the pathkeys by sort cost in ascending order */
+		qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+		/*
+		 * Rebuild the list of pathkeys - first the preordered ones, then the
+		 * rest ordered by cost.
+		 */
+		new_group_pathkeys = list_truncate(list_copy(*group_pathkeys), n_preordered);
+
+		for (i = 0; i < nFreeKeys; i++)
+			new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+		pfree(costs);
+	}
+	else
+	{
+		/*
+		 * Since v13 list_free() can clean list elements so for original list
+		 * not to be modified it should be copied to a new one which can then
+		 * be cleaned safely if needed.
+		 */
+		new_group_pathkeys = list_copy(*group_pathkeys);
+		nToPermute = nFreeKeys;
+	}
+
+	Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
+
+	/*
+	 * Generate pathkey lists with permutations of the first nToPermute pathkeys.
+	 *
+	 * XXX We simply calculate sort cost for each individual pathkey list, but
+	 * there's room for two dynamic programming optimizations here. Firstly, we
+	 * may pass the current "best" cost to cost_sort_estimate so that it can
+	 * "abort" if the estimated pathkeys list exceeds it. Secondly, it could pass
+	 * return information about the position when it exceeded the cost, and we
+	 * could skip all permutations with the same prefix.
+	 *
+	 * Imagine we've already found ordering with cost C1, and we're evaluating
+	 * another ordering - cost_sort_estimate() calculates cost by adding the
+	 * pathkeys one by one (more or less), and the cost only grows. If at any
+	 * point it exceeds C1, it can't possibly be "better" so we can discard it.
+	 * But we also know that we can discard all ordering with the same prefix,
+	 * because if we're estimating (a,b,c,d) and we exceeded C1 at (a,b) then
+	 * the same thing will happen for any ordering with this prefix.
+	 *
+	 *
+	 */
+	PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
+
+	while((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
+	{
+		Cost	cost;
+
+		cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+		if (cost < cheapest_sort_cost || cheapest_sort_cost < 0)
+		{
+			list_free(new_group_pathkeys);
+			new_group_pathkeys = list_copy(var_group_pathkeys);
+			cheapest_sort_cost = cost;
+		}
+	}
+
+	/* Reorder the group clauses according to the reordered pathkeys. */
+	foreach(cell, new_group_pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		new_group_clauses = lappend(new_group_clauses,
+						get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+												*group_clauses));
+	}
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+											   *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+
+	return true;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each items stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to minimize the sort cost
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reoredered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
+ *   the tail reoredered to minimize the sort cost
+ *
+ * There are other potentially interesting orderings (e.g. it might be best to
+ * match the first ORDER BY key, order the remaining keys differently and then
+ * rely on incremental sort to fix this), but we ignore those for now. To make
+ * this work we'd have to pretty much generate all possible permutations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys,
+								List *group_pathkeys, List *group_clauses)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+	int			n_preordered = 0;
+
+	List *pathkeys = group_pathkeys;
+	List *clauses = group_clauses;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+
+	infos = lappend(infos, info);
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost, ignoring both the
+	 * target ordering (ORDER BY) and ordering of the input path.
+	 */
+	if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered))
+	{
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to match
+	 * as much of the ordering as possible - we get this sort for free (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefinting from incremental sort
+	 * as much as possible.
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (path_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/* reorder the tail to minimize sort cost */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause, but only if set debug_group_by_match_order_by).
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (root->sort_pathkeys && debug_group_by_match_order_by)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to sort_pathkeys.
+		 */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1862,6 +2385,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the odering. The ordering
+ * may not be "complete" and may require incremental sort, but that's fine. So
+ * we simply count prefix pathkeys with a matching group key, and stop once we
+ * find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordeding not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1876,6 +2447,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1911,6 +2485,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1868c4eff4..bead51727f 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6002,24 +6002,124 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
 
-			if (path == cheapest_path || is_sorted)
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest-total path if it isn't already sorted */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_path || is_sorted)
+				{
+					/* Sort the cheapest-total path if it isn't already sorted */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					/* Now decide what to stick atop it */
+					if (parse->groupingSets)
+					{
+						consider_groupingsets_paths(root, grouped_rel,
+													path, true, can_hash,
+													gd, agg_costs, dNumGroups);
+					}
+					else if (parse->hasAggs)
+					{
+						/*
+						 * We have aggregation, possibly with plain GROUP BY. Make
+						 * an AggPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_SIMPLE,
+												 info->clauses,
+												 havingQual,
+												 agg_costs,
+												 dNumGroups));
+					}
+					else if (group_clauses)
+					{
+						/*
+						 * We have GROUP BY without aggregation or grouping sets.
+						 * Make a GroupPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+					}
+					else
+					{
+						/* Other cases should have been handled above */
+						Assert(false);
+					}
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, no point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6031,17 +6131,17 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->hasAggs)
 				{
 					/*
-					 * We have aggregation, possibly with plain GROUP BY. Make
-					 * an AggPath.
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 info->clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6049,14 +6149,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->groupClause)
 				{
 					/*
-					 * We have GROUP BY without aggregation or grouping sets.
-					 * Make a GroupPath.
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6066,79 +6166,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					Assert(false);
 				}
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, no point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_agg_path(root,
-										 grouped_rel,
-										 path,
-										 grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_SIMPLE,
-										 parse->groupClause,
-										 havingQual,
-										 agg_costs,
-										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_group_path(root,
-										   grouped_rel,
-										   path,
-										   parse->groupClause,
-										   havingQual,
-										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
-			}
 		}
 
 		/*
@@ -6149,100 +6176,125 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
 				Path	   *path_original = path;
-				bool		is_sorted;
-				int			presorted_keys;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
+				List	   *pathkey_orderings = NIL;
 
-				/*
-				 * Insert a Sort node, if required.  But there's no point in
-				 * sorting anything but the cheapest path.
-				 */
-				if (!is_sorted)
+				List	   *group_pathkeys = root->group_pathkeys;
+				List	   *group_clauses = parse->groupClause;
+
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys,
+																	group_pathkeys,
+																	group_clauses);
+
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach (lc2, pathkey_orderings)
 				{
-					if (path != partially_grouped_rel->cheapest_total_path)
-						continue;
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				}
+					bool		is_sorted;
+					int			presorted_keys = 0;
+					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* restore the path (we replace it in the loop) */
+					path = path_original;
 
-				/*
-				 * Now we may consider incremental sort on this path, but only
-				 * when the path is not already sorted and when incremental
-				 * sort is enabled.
-				 */
-				if (is_sorted || !enable_incremental_sort)
-					continue;
+					is_sorted = pathkeys_count_contained_in(info->pathkeys,
+															path->pathkeys,
+															&presorted_keys);
 
-				/* Restore the input path (we might have added Sort on top). */
-				path = path_original;
+					/*
+					 * Insert a Sort node, if required.  But there's no point in
+					 * sorting anything but the cheapest path.
+					 */
+					if (!is_sorted)
+					{
+						if (path != partially_grouped_rel->cheapest_total_path)
+							continue;
 
-				/* no shared prefix, not point in building incremental sort */
-				if (presorted_keys == 0)
-					continue;
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
 
-				/*
-				 * We should have already excluded pathkeys of length 1
-				 * because then presorted_keys > 0 would imply is_sorted was
-				 * true.
-				 */
-				Assert(list_length(root->group_pathkeys) != 1);
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
 
-				path = (Path *) create_incremental_sort_path(root,
-															 grouped_rel,
-															 path,
-															 root->group_pathkeys,
-															 presorted_keys,
-															 -1.0);
+					/*
+					 * Now we may consider incremental sort on this path, but only
+					 * when the path is not already sorted and when incremental
+					 * sort is enabled.
+					 */
+					if (is_sorted || !enable_incremental_sort)
+						continue;
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* Restore the input path (we might have added Sort on top). */
+					path = path_original;
+
+					/* no shared prefix, not point in building incremental sort */
+					if (presorted_keys == 0)
+						continue;
+
+					/*
+					 * We should have already excluded pathkeys of length 1
+					 * because then presorted_keys > 0 would imply is_sorted was
+					 * true.
+					 */
+					Assert(list_length(root->group_pathkeys) != 1);
+
+					path = (Path *) create_incremental_sort_path(root,
+																 grouped_rel,
+																 path,
+																 info->pathkeys,
+																 presorted_keys,
+																 -1.0);
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+				}
 			}
 		}
 	}
@@ -6445,41 +6497,71 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
+			Path	   *path_save = path;
+
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
-			if (path == cheapest_total_path || is_sorted)
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(partially_grouped_rel, (Path *)
-							 create_agg_path(root,
-											 partially_grouped_rel,
-											 path,
-											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
-											 NIL,
-											 agg_partial_costs,
-											 dNumPartialGroups));
-				else
-					add_path(partially_grouped_rel, (Path *)
-							 create_group_path(root,
-											   partially_grouped_rel,
-											   path,
-											   parse->groupClause,
-											   NIL,
-											   dNumPartialGroups));
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_total_path || is_sorted)
+				{
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_path(partially_grouped_rel, (Path *)
+								 create_agg_path(root,
+												 partially_grouped_rel,
+												 path,
+												 partially_grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_INITIAL_SERIAL,
+												 info->clauses,
+												 NIL,
+												 agg_partial_costs,
+												 dNumPartialGroups));
+					else
+						add_path(partially_grouped_rel, (Path *)
+								 create_group_path(root,
+												   partially_grouped_rel,
+												   path,
+												   info->clauses,
+												   NIL,
+												   dNumPartialGroups));
+				}
 			}
 		}
 
@@ -6489,6 +6571,8 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 * We can also skip the entire loop when we only have a single-item
 		 * group_pathkeys because then we can't possibly have a presorted
 		 * prefix of the list without having the list be fully sorted.
+		 *
+		 * XXX Shouldn't this also consider the group-key-reordering?
 		 */
 		if (enable_incremental_sort && list_length(root->group_pathkeys) > 1)
 		{
@@ -6547,24 +6631,100 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
 
-			if (path == cheapest_partial_path || is_sorted)
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_original;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_partial_path || is_sorted)
+				{
+
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_agg_path(root,
+														 partially_grouped_rel,
+														 path,
+														 partially_grouped_rel->reltarget,
+														 info->clauses ? AGG_SORTED : AGG_PLAIN,
+														 AGGSPLIT_INITIAL_SERIAL,
+														 info->clauses,
+														 NIL,
+														 agg_partial_costs,
+														 dNumPartialPartialGroups));
+					else
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_group_path(root,
+														   partially_grouped_rel,
+														   path,
+														   info->clauses,
+														   NIL,
+														   dNumPartialPartialGroups));
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, not point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 partially_grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
@@ -6572,9 +6732,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 info->clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 info->clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6583,59 +6743,10 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   info->clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, not point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 partially_grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_agg_path(root,
-												 partially_grouped_rel,
-												 path,
-												 partially_grouped_rel->reltarget,
-												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-												 AGGSPLIT_INITIAL_SERIAL,
-												 parse->groupClause,
-												 NIL,
-												 agg_partial_costs,
-												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_group_path(root,
-												   partially_grouped_rel,
-												   path,
-												   parse->groupClause,
-												   NIL,
-												   dNumPartialPartialGroups));
 		}
 	}
 
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 0c94cbe767..78ec5d05e6 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1342,7 +1342,7 @@ create_append_path(PlannerInfo *root,
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 0c8c05f6c2..5b326364d1 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3294,7 +3294,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 }
 
 /*
- * estimate_num_groups		- Estimate number of groups in a grouped query
+ * estimate_num_groups/estimate_num_groups_incremental
+ *		- Estimate number of groups in a grouped query.
+ *		  _incremental variant is performance optimization for
+ *		  case of adding one-by-one column
  *
  * Given a query having a GROUP BY clause, estimate how many groups there
  * will be --- ie, the number of distinct combinations of the GROUP BY
@@ -3368,11 +3371,22 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset, EstimationInfo *estinfo)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, estinfo,
+										   NULL, 0);
+}
+
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows,
+					List **pgset, EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i, j;
 
 	/* Zero the estinfo output parameter, if non-NULL */
 	if (estinfo != NULL)
@@ -3403,7 +3417,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3412,6 +3426,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++; /* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3481,7 +3503,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3498,6 +3524,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a2e0f8de7e..28d43b14d8 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2116,6 +2116,35 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+/************************<DEBUG OPT GROUP BY>*********************************/
+	{
+		{"debug_group_by_reorder_by_pathkeys", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reorder GROUP BY by pathkeys"),
+			NULL
+		},
+		&debug_group_by_reorder_by_pathkeys,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_group_by_match_order_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable matching GROUP BY by ORDER BY."),
+			NULL
+		},
+		&debug_group_by_match_order_by,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("find a cheapest order of columns in GROUP BY."),
+			NULL
+		},
+		&debug_cheapest_group_by,
+		true,
+		NULL, NULL, NULL
+	},
+/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index f7b009ec43..e0dd3b62b5 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -267,6 +267,7 @@ typedef enum NodeTag
 	T_EquivalenceClass,
 	T_EquivalenceMember,
 	T_PathKey,
+	T_PathKeyInfo,
 	T_PathTarget,
 	T_RestrictInfo,
 	T_IndexClause,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index e20c245f98..a148e3fd3f 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1061,6 +1061,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 2113bc82de..a3ff71fa9d 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -112,7 +112,9 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *path);
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+							   int nPresortedKeys, double tuples);
+extern void cost_append(AppendPath *path, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f1d111063c..47540b6c0e 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -203,6 +203,17 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
+/************************<DEBUG OPT GROUP BY>*********************************/
+extern bool debug_group_by_reorder_by_pathkeys;
+extern bool debug_group_by_match_order_by;
+extern bool debug_cheapest_group_by;
+/************************</DEBUG OPT GROUP BY>********************************/
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys,
+											 List *pathkeys, List *clauses);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 9dd444e1ff..e20f45e104 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -214,6 +214,11 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset,
 								  EstimationInfo *estinfo);
 
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows, List **pgset,
+					EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs);
+
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
 									   Selectivity *mcv_freq,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 23b112b2af..f8ab04295e 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1200,7 +1200,8 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+   Group Key: $0, $1
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1223,10 +1224,8 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+   ->  Result
+(25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -2408,6 +2407,241 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, d, c, v
+   ->  Sort
+         Sort Key: p, d, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Incremental Sort
+         Sort Key: p, c, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Incremental Sort
+         Sort Key: p, v, c
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, d, v
+   ->  Incremental Sort
+         Sort Key: p, c, d, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Incremental Sort
+         Sort Key: p, v, c, d
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 545e301e48..21c429226f 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1439,7 +1439,7 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index f3589d0dbb..59e68ac0b7 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1984,8 +1984,8 @@ USING (name);
 ------+----+----
  bb   | 12 | 13
  cc   | 22 | 23
- dd   |    | 33
  ee   | 42 |   
+ dd   |    | 33
 (4 rows)
 
 -- Cases with non-nullable expressions in subquery results;
@@ -2019,8 +2019,8 @@ NATURAL FULL JOIN
 ------+------+------+------+------
  bb   |   12 |    2 |   13 |    3
  cc   |   22 |    2 |   23 |    3
- dd   |      |      |   33 |    3
  ee   |   42 |    2 |      |     
+ dd   |      |      |   33 |    3
 (4 rows)
 
 SELECT * FROM
@@ -4612,18 +4612,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
@@ -6298,44 +6300,39 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Unique
+   ->  HashAggregate
          Output: j3.id
-         ->  Sort
+         Group Key: j3.id
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(13 rows)
+(11 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Group
+   ->  HashAggregate
          Output: j3.id
          Group Key: j3.id
-         ->  Sort
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(14 rows)
+(11 rows)
 
 drop table j1;
 drop table j2;
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index dfa4b036b5..a08a3825ff 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -952,32 +952,30 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 --------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Gather
-         Workers Planned: 2
-         ->  Parallel Append
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml.a
-                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml.a
-                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_5.a
-                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_5.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_2.a
-                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_2.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(27 rows)
+   ->  Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(25 rows)
 
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
  a  | sum  |  array_agg  | count 
@@ -996,34 +994,32 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 -- Without ORDER BY clause, to test Gather at top-most path
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                                QUERY PLAN                                 
----------------------------------------------------------------------------
- Gather
-   Workers Planned: 2
-   ->  Parallel Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(25 rows)
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Append
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml.a
+         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml.a
+               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_2.a
+         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_2.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_5.a
+         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_5.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(23 rows)
 
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -1379,28 +1375,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
 -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
 EXPLAIN (COSTS OFF)
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize GroupAggregate
+   ->  Finalize HashAggregate
          Group Key: pagg_tab_para.y
          Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather Merge
+         ->  Gather
                Workers Planned: 2
-               ->  Sort
-                     Sort Key: pagg_tab_para.y
-                     ->  Parallel Append
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_1.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_2.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+               ->  Parallel Append
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_1.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_2.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
 
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
  y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 27f7525b3e..100ded51b0 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -466,52 +466,41 @@ EXPLAIN (COSTS OFF)
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
   GROUP BY 1, 2 ORDER BY 1, 2;
-                                                   QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Group
    Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Merge Append
+   ->  Sort
          Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Group
-               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
-                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1.a, prt1.b
-                                 ->  Seq Scan on prt1_p1 prt1
-                           ->  Sort
-                                 Sort Key: p2.a, p2.b
-                                 ->  Seq Scan on prt2_p1 p2
-         ->  Group
-               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_1.a, prt1_1.b
-                                 ->  Seq Scan on prt1_p2 prt1_1
-                           ->  Sort
-                                 Sort Key: p2_1.a, p2_1.b
-                                 ->  Seq Scan on prt2_p2 p2_1
-         ->  Group
-               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_2.a, prt1_2.b
-                                 ->  Seq Scan on prt1_p3 prt1_2
-                           ->  Sort
-                                 Sort Key: p2_2.a, p2_2.b
-                                 ->  Seq Scan on prt2_p3 p2_2
-(43 rows)
+         ->  Append
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_1.a, prt1_1.b
+                           ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Sort
+                           Sort Key: p2_1.a, p2_1.b
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_2.a, prt1_2.b
+                           ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Sort
+                           Sort Key: p2_2.a, p2_2.b
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a))
+                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_3.b, prt1_3.a
+                           ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Sort
+                           Sort Key: p2_3.b, p2_3.a
+                           ->  Seq Scan on prt2_p3 p2_3
+(32 rows)
 
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 75f78db8f5..63b52c065f 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1291,24 +1291,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2;
-                        QUERY PLAN                        
-----------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: (q2 IS NOT NULL)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: (q2 IS NOT NULL)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: (q2 IS NOT NULL)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: (q2 IS NOT NULL)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
@@ -1327,24 +1325,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2;
-                       QUERY PLAN                       
---------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                    QUERY PLAN                    
+--------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: ((- q1) = q2)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: ((- q1) = q2)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: ((- q1) = q2)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: ((- q1) = q2)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index ed2d6b3bdf..26fe382364 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -999,6 +999,105 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index d8768a6b54..1de163e039 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -213,7 +213,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;

#60

Andrey V. Lepikhov

a.lepikhov@postgrespro.ru

almost 4 years ago

In reply to: Tomas Vondra (#59)

1 attachment(s)

Re: POC: GROUP BY optimization

I keep work on this patch. Here is intermediate results.

On 7/22/21 3:58 AM, Tomas Vondra wrote:

in the first loop. Which seems pretty bogus - why would there be just
two groups? When processing the first expression, it's as if there was
one big "prev group" with all the tuples, so why not to just use nGroups
as it is?

I think, heapsort code seems very strange. Look into fallback case. It
based on an output_tuples value. Maybe we should use nGroups value here,
but based on a number of output_tuples?

1) I looked at the resources mentioned as sources the formulas came
from, but I've been unable to really match the algorithm to them. The
quicksort paper is particularly "dense", the notation seems to be very
different, and none of the theorems seem like an obvious fit. Would be
good to make the relationship clearer in comments etc.

Fixed (See attachment).

3) I'm getting a bit skeptical about the various magic coefficients that
are meant to model higher costs with non-uniform distribution. But
consider that we do this, for example:

tuplesPerPrevGroup = ceil(1.5 * tuplesPerPrevGroup / nGroups);

but then in the next loop we call estimate_num_groups_incremental and
pass this "tweaked" tuplesPerPrevGroup value to it. I'm pretty sure this
may have various strange consequences - we'll calculate the nGroups
based on the inflated value, and we'll calculate tuplesPerPrevGroup from
that again - that seems susceptible to "amplification".

We inflate tuplesPerPrevGroup by 50%, which means we'll get a higher
nGroups estimate in the next loop - but not linearly. An then we'll
calculate the inflated tuplesPerPrevGroup and estimated nGroup ...

Weighting coefficient '1.5' shows our desire to minimize the number of
comparison operations on each next attribute of a pathkeys list.
Increasing this coef we increase a chance, that planner will order
pathkeys by decreasing of uniqueness.
I think, it's ok.

--
regards,
Andrey Lepikhov
Postgres Professional

Attachments:

fixes.txttext/plain; charset=UTF-8; name=fixes.txtDownload

From fbc5e6709550f485a2153dda97ef805700717f23 Mon Sep 17 00:00:00 2001
From: Andrey Lepikhov <a.lepikhov@postgrespro.ru>
Date: Fri, 24 Dec 2021 13:01:48 +0500
Subject: [PATCH] My fixes.

---
 src/backend/optimizer/path/costsize.c | 33 ++++++++++++---------------
 src/backend/optimizer/path/pathkeys.c |  9 ++++----
 src/backend/optimizer/plan/planner.c  |  8 -------
 3 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index afb8ba54ea..70af9c91d5 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1845,19 +1845,17 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
  * groups in the current pathkey prefix and the new pathkey), and the comparison
  * costs (which is data type specific).
  *
- * Estimation of the number of comparisons is based on ideas from two sources:
+ * Estimation of the number of comparisons is based on ideas from:
  *
- * 1) "Algorithms" (course), Robert Sedgewick, Kevin Wayne [https://algs4.cs.princeton.edu/home/]
+ * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
+ * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
  *
- * 2) "Quicksort Is Optimal For Many Equal Keys" (paper), Sebastian Wild,
- * arXiv:1608.04906v4 [cs.DS] 1 Nov 2017. [https://arxiv.org/abs/1608.04906]
- *
- * In term of that paper, let N - number of tuples, Xi - number of tuples with
- * key Ki, then the estimate of number of comparisons is:
+ * In term of that paper, let N - number of tuples, Xi - number of identical
+ * tuples with value Ki, then the estimate of number of comparisons is:
  *
  *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
  *
- * In our case all Xi are the same because now we don't have any estimation of
+ * We assume all Xi the same because now we don't have any estimation of
  * group sizes, we have only know the estimate of number of groups (distinct
  * values). In that case, formula becomes:
  *
@@ -1865,7 +1863,7 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
  *
  * For multi-column sorts we need to estimate the number of comparisons for
  * each individual column - for example with columns (c1, c2, ..., ck) we
- * can estimate that number of comparions on ck is roughly
+ * can estimate that number of comparisons on ck is roughly
  *
  *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
  *
@@ -1874,10 +1872,10 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
  *
  *	N * sum( Fk * log(Gk) )
  *
- * Note: We also consider column witdth, not just the comparator cost.
+ * Note: We also consider column width, not just the comparator cost.
  *
  * NOTE: some callers currently pass NIL for pathkeys because they
- * can't conveniently supply the sort keys.  In this case, it will fallback to
+ * can't conveniently supply the sort keys. In this case, it will fallback to
  * simple comparison cost estimate.
  */
 static Cost
@@ -1925,13 +1923,13 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 	 */
 	foreach(lc, pathkeys)
 	{
-		PathKey				*pathkey = (PathKey*)lfirst(lc);
+		PathKey				*pathkey = (PathKey*) lfirst(lc);
 		EquivalenceMember	*em;
 		double				 nGroups,
 							 correctedNGroups;
 
 		/*
-		 * We believe than equivalence members aren't very  different, so, to
+		 * We believe that equivalence members aren't very different, so, to
 		 * estimate cost we take just first member
 		 */
 		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
@@ -1964,7 +1962,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 
 		totalFuncCost += funcCost;
 
-		/* remeber if we have a fake var in pathkeys */
+		/* Remember if we have a fake var in pathkeys */
 		has_fake_var |= is_fake_var(em->em_expr);
 		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
 
@@ -1974,7 +1972,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		 */
 		if (has_fake_var == false)
 			/*
-			 * Recursively compute number of group in group from previous step
+			 * Recursively compute number of groups in a group from previous step
 			 */
 			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
 													  tuplesPerPrevGroup, NULL, NULL,
@@ -1992,8 +1990,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 			 *
 			 * XXX What's the logic of the following formula?
 			 */
-			nGroups = ceil(2.0 + sqrt(tuples) *
-				list_length(pathkeyExprs) / list_length(pathkeys));
+			nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
 		else
 			nGroups = tuples;
 
@@ -2033,7 +2030,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 
 		/*
 		 * We could skip all following columns for cost estimation, because we
-		 * believe that tuples are unique by set ot previous columns
+		 * believe that tuples are unique by the set of previous columns
 		 */
 		if (tuplesPerPrevGroup <= 1.0)
 			break;
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 457018c2a4..f1919823da 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -544,9 +544,10 @@ PathkeyMutatorNext(PathkeyMutatorState *state)
 	return state->elemsList;
 }
 
-typedef struct {
-	Cost cost;
-	PathKey *pathkey;
+typedef struct PathkeySortCost
+{
+	Cost		cost;
+	PathKey	   *pathkey;
 } PathkeySortCost;
 
 static int
@@ -793,7 +794,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
 	 * more complex logic to decide the ordering.
 	 *
 	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
-	 * more a complement, because it allows benefinting from incremental sort
+	 * more a complement, because it allows benefiting from incremental sort
 	 * as much as possible.
 	 *
 	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index fac5822e5b..a7e36ed76e 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6158,7 +6158,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			Path	   *path_save = path;
 			Path	   *path_original = path;
 
 			List	   *pathkey_orderings = NIL;
@@ -6182,9 +6181,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				int			presorted_keys = 0;
 				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				/* restore the path (we replace it in the loop) */
-				path = path_save;
-
 				is_sorted = pathkeys_count_contained_in(info->pathkeys,
 														path->pathkeys,
 														&presorted_keys);
@@ -6653,7 +6649,6 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			Path	   *path_save = path;
 
 			List	   *pathkey_orderings = NIL;
 
@@ -6676,9 +6671,6 @@ create_partial_grouping_paths(PlannerInfo *root,
 				int			presorted_keys = 0;
 				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				/* restore the path (we replace it in the loop) */
-				path = path_save;
-
 				is_sorted = pathkeys_count_contained_in(info->pathkeys,
 														path->pathkeys,
 														&presorted_keys);
-- 
2.25.1

#61

Andrey Lepikhov

a.lepikhov@postgrespro.ru

almost 4 years ago

In reply to: Andrey V. Lepikhov (#60)

1 attachment(s)

Re: POC: GROUP BY optimization

On 7/22/21 3:58 AM, Tomas Vondra wrote:

4) I'm not sure it's actually a good idea to pass tuplesPerPrevGroup to
estimate_num_groups_incremental. In principle yes, if we use "group
size" from the previous step, then the returned value is the number of
new groups after adding the "new" pathkey.
But even if we ignore the issues with amplification mentioned in (3),
there's an issue with non-linear behavior in estimate_num_groups,
because at some point it's calculating

D(N,n,p) = n * (1 - ((N-p)/N)^(N/n))

where N - total rows, p - sample size, n - number of distinct values.
And if we have (N1,n1) and (N2,n2) then the ratio of calculated
estimated (which is pretty much what calculating group size does)

D(N2,n2,p2) / D(N1,n1,p1)

which will differ depending on p1 and p2. And if we're tweaking the
tuplesPerPrevGroup all the time, that's really annoying, as it may make
the groups smaller or larger, which is unpredictable and annoying, and I
wonder if it might go against the idea of penalizing tuplesPerPrevGroup
to some extent.
We could simply use the input "tuples" value here, and then divide the
current and previous estimate to calculate the number of new groups.

tuplesPerPrevGroup is only a top boundary for the estimation. I think,
we should use result of previous estimation as a limit for the next
during incremental estimation. Maybe we simply limit the
tuplesPerPrevGroup value to ensure of monotonic non-growth of this
value? - see in attachment patch to previous fixes.

--
regards,
Andrey Lepikhov
Postgres Professional

Attachments:

guard_tuplesPerPrevGroup.txttext/plain; charset=UTF-8; name=guard_tuplesPerPrevGroup.txtDownload

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 70af9c91d5..4e26cd275d 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2025,8 +2025,10 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		/*
 		 * Real-world distribution isn't uniform but now we don't have a way to
 		 * determine that, so, add multiplier to get closer to worst case.
+		 * Guard this value against irrational decisions.
 		 */
-		tuplesPerPrevGroup = ceil(1.5 * tuplesPerPrevGroup / nGroups);
+		tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
+								 ceil(1.5 * tuplesPerPrevGroup / nGroups));
 
 		/*
 		 * We could skip all following columns for cost estimation, because we

#62

Tomas Vondra

tomas.vondra@enterprisedb.com

almost 4 years ago

In reply to: Andrey Lepikhov (#61)

1 attachment(s)

Re: POC: GROUP BY optimization

On 1/21/22 12:09, Andrey Lepikhov wrote:

On 7/22/21 3:58 AM, Tomas Vondra wrote:

4) I'm not sure it's actually a good idea to pass tuplesPerPrevGroup to
estimate_num_groups_incremental. In principle yes, if we use "group
size" from the previous step, then the returned value is the number of
new groups after adding the "new" pathkey.
But even if we ignore the issues with amplification mentioned in (3),
there's an issue with non-linear behavior in estimate_num_groups,
because at some point it's calculating

D(N,n,p) = n * (1 - ((N-p)/N)^(N/n))

where N - total rows, p - sample size, n - number of distinct values.
And if we have (N1,n1) and (N2,n2) then the ratio of calculated
estimated (which is pretty much what calculating group size does)

D(N2,n2,p2) / D(N1,n1,p1)

which will differ depending on p1 and p2. And if we're tweaking the
tuplesPerPrevGroup all the time, that's really annoying, as it may make
the groups smaller or larger, which is unpredictable and annoying, and I
wonder if it might go against the idea of penalizing tuplesPerPrevGroup
to some extent.
We could simply use the input "tuples" value here, and then divide the
current and previous estimate to calculate the number of new groups.

tuplesPerPrevGroup is only a top boundary for the estimation. I think,
we should use result of previous estimation as a limit for the next
during incremental estimation. Maybe we simply limit the
tuplesPerPrevGroup value to ensure of monotonic non-growth of this
value? - see in attachment patch to previous fixes.

Yes, I think that's a reasonable defense - it makes no sense to exceed
the group size in the preceding step, which could happen with small
number of groups or some unexpected ndistinct estimate.

The other thing we could do is reduce the coefficient gradually - so
it'd be 1.5 for the first pathkey, then 1.25 for the next one, and so
on. But it seems somewhat arbitrary (I certainly don't have some sound
theoretical justification ...).

I've merged most of the fixes you reported. I've skipped the path_save
removal in planner.c, because that seems incorrect - if there are
multiple pathkeys, we must start with the original path, not the
modified one we built in the last iteration. Or am I missing something?

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

Attachments:

v20220121-0001-GROUP-BY-reordering.patchtext/x-patch; charset=UTF-8; name=v20220121-0001-GROUP-BY-reordering.patchDownload

From 590589f4388499d41d2c1bb42678b794c044d045 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Fri, 21 Jan 2022 20:22:14 +0100
Subject: [PATCH v20220121] GROUP BY reordering

---
 .../postgres_fdw/expected/postgres_fdw.out    |  15 +-
 src/backend/optimizer/path/costsize.c         | 369 +++++++++-
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 580 ++++++++++++++++
 src/backend/optimizer/plan/planner.c          | 653 ++++++++++--------
 src/backend/optimizer/util/pathnode.c         |   2 +-
 src/backend/utils/adt/selfuncs.c              |  37 +-
 src/backend/utils/misc/guc.c                  |  29 +
 src/include/nodes/nodes.h                     |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/cost.h                  |   4 +-
 src/include/optimizer/paths.h                 |  11 +
 src/include/utils/selfuncs.h                  |   5 +
 src/test/regress/expected/aggregates.out      | 244 ++++++-
 .../regress/expected/incremental_sort.out     |   2 +-
 src/test/regress/expected/join.out            |  51 +-
 .../regress/expected/partition_aggregate.out  | 136 ++--
 src/test/regress/expected/partition_join.out  |  75 +-
 src/test/regress/expected/union.out           |  60 +-
 src/test/regress/sql/aggregates.sql           |  99 +++
 src/test/regress/sql/incremental_sort.sql     |   2 +-
 21 files changed, 1904 insertions(+), 494 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 7d6f7d9e3df..9926b6bba07 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2741,16 +2741,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
- Sort
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Foreign Scan
    Output: (count(c2)), c2, 5, 7.0, 9
-   Sort Key: ft1.c2
-   ->  Foreign Scan
-         Output: (count(c2)), c2, 5, 7.0, 9
-         Relations: Aggregate on (public.ft1)
-         Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
-(7 rows)
+   Relations: Aggregate on (public.ft1)
+   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+(4 rows)
 
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
   w  | x | y |  z  
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 8dc7dd4ca26..68a32740d7e 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1754,6 +1754,325 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 									rterm->pathtarget->width);
 }
 
+/*
+ * is_fake_var
+ *		Workaround for generate_append_tlist() which generates fake Vars with
+ *		varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ *		Returns relative complexity of comparing two values based on it's width.
+ * The idea behind - long values have more expensive comparison. Return value is
+ * in cpu_operator_cost unit.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double	width = -1.0; /* fake value */
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresponding relation */
+	if (IsA(expr, Var))
+	{
+		Var		*var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo	*rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int	ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, use estimation by type */
+	if (width < 0.0)
+	{
+		Node	*node = (Node*) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Any value in pgsql is passed by Datum type, so any operation with value
+	 * could not be cheaper than operation with Datum type
+	 */
+	if (width <= sizeof(Datum))
+		return 1.0;
+
+	/*
+	 * Seems, cost of comparision is not directly proportional to args width,
+	 * because comparing args could be differ width (we known only average over
+	 * column) and difference often could be defined only by looking on first
+	 * bytes. So, use log16(width) as estimation.
+	 */
+	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ *		compute CPU cost of sort (i.e. in-memory)
+ *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, the columns may have very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algoritm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from:
+ *
+ * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
+ * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of identical
+ * tuples with value Ki, then the estimate of number of comparisons is:
+ *
+ *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ *
+ * We assume all Xi the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ *	N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparisons on ck is roughly
+ *
+ *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ *	N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column width, not just the comparator cost.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys. In this case, it will fallback to
+ * simple comparison cost estimate.
+ */
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+					  Cost comparison_cost, double tuples, double output_tuples,
+					  bool heapSort)
+{
+	Cost		per_tuple_cost = 0.0;
+	ListCell	*lc;
+	List		*pathkeyExprs = NIL;
+	double		tuplesPerPrevGroup = tuples;
+	double		totalFuncCost = 1.0;
+	bool		has_fake_var = false;
+	int			i = 0;
+	Oid			prev_datatype = InvalidOid;
+	Cost		funcCost = 0.0;
+	List		*cache_varinfos = NIL;
+
+	/* fallback if pathkeys is unknown */
+	if (list_length(pathkeys) == 0)
+	{
+		/*
+		 * If we'll use a bounded heap-sort keeping just K tuples in memory, for
+		 * a total number of tuple comparisons of N log2 K; but the constant
+		 * factor is a bit higher than for quicksort.  Tweak it so that the
+		 * cost curve is continuous at the crossover point.
+		 *
+		 * XXX I suppose the "quicksort factor" references to 1.5 at the end
+		 * of this function, but I'm not sure. I suggest we introduce some simple
+		 * constants for that, instead of magic values.
+		 */
+		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+		/* add cost provided by caller */
+		per_tuple_cost += comparison_cost;
+
+		return per_tuple_cost * tuples;
+	}
+
+	/*
+	 * Computing total cost of sorting takes into account:
+	 * - per column comparison function cost
+	 * - we try to compute needed number of comparison per column
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey				*pathkey = (PathKey*) lfirst(lc);
+		EquivalenceMember	*em;
+		double				 nGroups,
+							 correctedNGroups;
+
+		/*
+		 * We believe that equivalence members aren't very different, so, to
+		 * estimate cost we take just first member
+		 */
+		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+		if (em->em_datatype != InvalidOid)
+		{
+			/* do not lookup funcCost if data type is the same as previous */
+			if (prev_datatype != em->em_datatype)
+			{
+				Oid			sortop;
+				QualCost	cost;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+
+				cost.startup = 0;
+				cost.per_tuple = 0;
+				add_function_cost(root, get_opcode(sortop), NULL, &cost);
+				/* we need procost, not product of procost and cpu_operator_cost */
+				funcCost = cost.per_tuple / cpu_operator_cost;
+				prev_datatype = em->em_datatype;
+			}
+		}
+		else
+			funcCost = 1.0; /* fallback */
+
+		/* Try to take into account actual width fee */
+		funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+		totalFuncCost += funcCost;
+
+		/* Remember if we have a fake var in pathkeys */
+		has_fake_var |= is_fake_var(em->em_expr);
+		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+		/*
+		 * Prevent call estimate_num_groups() with fake Var. Note,
+		 * pathkeyExprs contains only previous columns
+		 */
+		if (has_fake_var == false)
+			/*
+			 * Recursively compute number of groups in a group from previous step
+			 */
+			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+													  tuplesPerPrevGroup, NULL, NULL,
+													  &cache_varinfos,
+													  list_length(pathkeyExprs) - 1);
+		else if (tuples > 4.0)
+			/*
+			 * Use geometric mean as estimation if there is no any stats.
+			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
+			 * column while here we try to estimate number of groups over
+			 * set of columns.
+			 *
+			 * XXX Perhaps this should use DEFAULT_NUM_DISTINCT at least to
+			 * limit the calculated values, somehow?
+			 *
+			 * XXX What's the logic of the following formula?
+			 */
+			nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
+		else
+			nGroups = tuples;
+
+		/*
+		 * Presorted keys aren't participated in comparison but still checked
+		 * by qsort comparator.
+		 */
+		if (i >= nPresortedKeys)
+		{
+			if (heapSort)
+			{
+				double heap_tuples;
+
+				/* have to keep at least one group, and a multiple of group size */
+				heap_tuples = Max(ceil(output_tuples / tuplesPerPrevGroup) * tuplesPerPrevGroup,
+								  tuplesPerPrevGroup);
+
+				/* so how many (whole) groups is that? */
+				correctedNGroups = ceil(heap_tuples / tuplesPerPrevGroup);
+			}
+			else
+				/* all groups in the input */
+				correctedNGroups = nGroups;
+
+			correctedNGroups = Max(1.0, ceil(correctedNGroups));
+
+			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+		}
+
+		i++;
+
+		/*
+		 * Real-world distribution isn't uniform but now we don't have a way to
+		 * determine that, so, add multiplier to get closer to worst case.
+		 * But ensure the number of tuples does not exceed the group size in the
+		 * preceding step.
+		 */
+		tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
+								 ceil(1.5 * tuplesPerPrevGroup / nGroups));
+
+		/*
+		 * We could skip all following columns for cost estimation, because we
+		 * believe that tuples are unique by the set of previous columns
+		 */
+		if (tuplesPerPrevGroup <= 1.0)
+			break;
+	}
+
+	list_free(pathkeyExprs);
+
+	/* per_tuple_cost is in cpu_operator_cost units */
+	per_tuple_cost *= cpu_operator_cost;
+
+	/*
+	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E.
+	 * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation
+	 * formula has additional term proportional to number of tuples (See Chapter
+	 * 8.2 and Theorem 4.1). It has meaning with low number of tuples,
+	 * approximately less that 1e4. Of course, it could be implemented as
+	 * additional multiplier under logarithm, but use more complicated formula
+	 * which takes into account number of unique tuples and it isn't clear how
+	 * to combine multiplier with groups. Estimate it as 10 in cpu_operator_cost
+	 * unit.
+	 */
+	per_tuple_cost += 10 * cpu_operator_cost;
+
+	per_tuple_cost += comparison_cost;
+
+	return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+				   double tuples)
+{
+	return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+								0, tuples, tuples, false);
+}
+
 /*
  * cost_tuplesort
  *	  Determines and returns the cost of sorting a relation using tuplesort,
@@ -1770,7 +2089,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * number of initial runs formed and M is the merge order used by tuplesort.c.
  * Since the average initial run should be about sort_mem, we have
  *		disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *		cpu = comparison_cost * t * log2(t)
+ * 		and cpu cost (computed by compute_cpu_sort_cost()).
  *
  * If the sort is bounded (i.e., only the first k result tuples are needed)
  * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1789,9 +2108,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * 'comparison_cost' is the extra cost per comparison, if any
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later.
  */
 static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
 			   double limit_tuples)
@@ -1808,9 +2129,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
-
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
 	{
@@ -1834,12 +2152,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 		double		log_runs;
 		double		npageaccesses;
 
-		/*
-		 * CPU costs
-		 *
-		 * Assume about N log2 N comparisons
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		/* CPU costs */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 
 		/* Disk costs */
 
@@ -1855,18 +2171,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	}
 	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
 	{
-		/*
-		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
-		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		/* We'll use a bounded heap-sort keeping just K tuples in memory. */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  output_tuples, true);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 	}
 
 	/*
@@ -1899,8 +2214,8 @@ cost_incremental_sort(Path *path,
 					  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 					  double limit_tuples)
 {
-	Cost		startup_cost = 0,
-				run_cost = 0,
+	Cost		startup_cost,
+				run_cost,
 				input_run_cost = input_total_cost - input_startup_cost;
 	double		group_tuples,
 				input_groups;
@@ -1985,7 +2300,7 @@ cost_incremental_sort(Path *path,
 	 * pessimistic about incremental sort performance and increase its average
 	 * group size by half.
 	 */
-	cost_tuplesort(&group_startup_cost, &group_run_cost,
+	cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
 				   1.5 * group_tuples, width, comparison_cost, sort_mem,
 				   limit_tuples);
 
@@ -1993,7 +2308,7 @@ cost_incremental_sort(Path *path,
 	 * Startup cost of incremental sort is the startup cost of its first group
 	 * plus the cost of its input.
 	 */
-	startup_cost += group_startup_cost
+	startup_cost = group_startup_cost
 		+ input_startup_cost + group_input_run_cost;
 
 	/*
@@ -2002,7 +2317,7 @@ cost_incremental_sort(Path *path,
 	 * group, plus the total cost to process the remaining groups, plus the
 	 * remaining cost of input.
 	 */
-	run_cost += group_run_cost
+	run_cost = group_run_cost
 		+ (group_run_cost + group_startup_cost) * (input_groups - 1)
 		+ group_input_run_cost * (input_groups - 1);
 
@@ -2042,7 +2357,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	Cost		startup_cost;
 	Cost		run_cost;
 
-	cost_tuplesort(&startup_cost, &run_cost,
+	cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
 				   limit_tuples);
@@ -2140,7 +2455,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2208,7 +2523,7 @@ cost_append(AppendPath *apath)
 					 * any child.
 					 */
 					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
+							  root,
 							  pathkeys,
 							  subpath->total_cost,
 							  subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 8c6770de972..5487ae2ee4c 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -681,7 +681,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 86a35cdef17..f18d4c8e4e7 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,16 +17,19 @@
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
 #include "access/stratnum.h"
 #include "catalog/pg_opfamily.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -334,6 +337,530 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/************************<DEBUG PART>*************************************/
+bool debug_group_by_reorder_by_pathkeys = true;
+bool debug_group_by_match_order_by = true;
+bool debug_cheapest_group_by = true;
+/************************</DEBUG PART>************************************/
+
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List	   *new_group_pathkeys= NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (debug_group_by_reorder_by_pathkeys == false)
+		return 0;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append if to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find first pathkey without a matching GROUP BY key, the rest of
+	 * the pathkeys is useless and can't be used to evaluate the grouping, so
+	 * we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	*sgc;
+
+		/* abort on first mismatch */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* XXX maybe return when (n == 0) */
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * Used to generate all permutations of a pathkey list.
+ */
+typedef struct PathkeyMutatorState {
+	List	   *elemsList;
+	ListCell  **elemCells;
+	void	  **elems;
+	int		   *positions;
+	int			 mutatorNColumns;
+	int			 count;
+} PathkeyMutatorState;
+
+
+/*
+ * PathkeyMutatorInit
+ *		Initialize state of the permutation generator.
+ *
+ * We want to generate permutations of elements in the "elems" list. We may want
+ * to skip some number of elements at the beginning (when treating as presorted)
+ * or at the end (we only permute a limited number of group keys).
+ *
+ * The list is decomposed into elements, and we also keep pointers to individual
+ * cells. This allows us to build the permuted list quickly and cheaply, without
+ * creating any copies.
+ */
+static void
+PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
+{
+	int i;
+	int			n = end - start;
+	ListCell	*lc;
+
+	memset(state, 0, sizeof(*state));
+
+	state->mutatorNColumns = n;
+
+	state->elemsList = list_copy(elems);
+
+	state->elems = palloc(sizeof(void*) * n);
+	state->elemCells = palloc(sizeof(ListCell*) * n);
+	state->positions = palloc(sizeof(int) * n);
+
+	i = 0;
+	for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+	{
+		state->elemCells[i] = lc;
+		state->elems[i] = lfirst(lc);
+		state->positions[i] = i + 1;
+		i++;
+
+		if (i >= n)
+			break;
+	}
+}
+
+/* Swap two elements of an array. */
+static void
+PathkeyMutatorSwap(int *a, int i, int j)
+{
+  int s = a[i];
+
+  a[i] = a[j];
+  a[j] = s;
+}
+
+/*
+ * Generate the next permutation of elements.
+ */
+static bool
+PathkeyMutatorNextSet(int *a, int n)
+{
+	int j, k, l, r;
+
+	j = n - 2;
+
+	while (j >= 0 && a[j] >= a[j + 1])
+		j--;
+
+	if (j < 0)
+		return false;
+
+	k = n - 1;
+
+	while (k >= 0 && a[j] >= a[k])
+		k--;
+
+	PathkeyMutatorSwap(a, j, k);
+
+	l = j + 1;
+	r = n - 1;
+
+	while (l < r)
+		PathkeyMutatorSwap(a, l++, r--);
+
+	return true;
+}
+
+/*
+ * PathkeyMutatorNext
+ *		Generate the next permutation of list of elements.
+ *
+ * Returns the next permutation (as a list of elements) or NIL if there are no
+ * more permutations.
+ */
+static List *
+PathkeyMutatorNext(PathkeyMutatorState *state)
+{
+	int	i;
+
+	state->count++;
+
+	/* first permutation is original list */
+	if (state->count == 1)
+		return state->elemsList;
+
+	/* when there are no more permutations, return NIL */
+	if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
+	{
+		pfree(state->elems);
+		pfree(state->elemCells);
+		pfree(state->positions);
+
+		list_free(state->elemsList);
+
+		return NIL;
+	}
+
+	/* update the list cells to point to the right elements */
+	for(i=0; i<state->mutatorNColumns; i++)
+		lfirst(state->elemCells[i]) =
+			(void *) state->elems[ state->positions[i] - 1 ];
+
+	return state->elemsList;
+}
+
+/*
+ * Cost of comparing pathkeys.
+ */
+typedef struct PathkeySortCost
+{
+	Cost		cost;
+	PathKey	   *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+	const PathkeySortCost *a = (PathkeySortCost *) _a;
+	const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+	if (a->cost < b->cost)
+		return -1;
+	else if (a->cost == b->cost)
+		return 0;
+	return 1;
+}
+
+/*
+ * get_cheapest_group_keys_order
+ *		Returns the pathkeys in an order cheapest to evaluate.
+ *
+ * Given a list of pathkeys, we try to reorder them in a way that minimizes
+ * the CPU cost of sorting. This depends mainly on the cost of comparator
+ * function (the pathkeys may use different data types) and the number of
+ * distinct values in each column (which affects the number of comparator
+ * calls for the following pathkeys).
+ *
+ * In case the input is partially sorted, only the remaining pathkeys are
+ * considered.
+ *
+ * Returns newly allocated lists. If no reordering is possible (or needed),
+ * the lists are set to NIL.
+ */
+static bool
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	List		   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL,
+				   *var_group_pathkeys;
+
+	ListCell	   *cell;
+	PathkeyMutatorState	mstate;
+	double			cheapest_sort_cost = -1.0;
+
+	int nFreeKeys;
+	int nToPermute;
+
+	/* If this optimization is disabled, we're done. */
+	if (!debug_cheapest_group_by)
+		return false;
+
+	/* If there are less than 2 unsorted pathkeys, we're done. */
+	if (list_length(*group_pathkeys) - n_preordered < 2)
+		return false;
+
+	/*
+	 * We could exhaustively cost all possible orderings of the pathkeys, but for
+	 * large number of pathkeys that might be prohibitively expensive. So we try
+	 * to apply a simple cheap heuristics first - we sort the pathkeys by sort
+	 * cost (as if the pathkey was sorted independently) and then check only the
+	 * four cheapest pathkeys. The remaining pathkeys are kept ordered by cost.
+	 *
+	 * XXX This is a very simple heuristics, and likely to work fine for most
+	 * cases (because number of GROUP BY clauses tends to be lower than 4). But
+	 * it ignores how the number of distinct values in each pathkey affects the
+	 * following sorts. It may be better to use "more expensive" pathkey first
+	 * if it has many distinct values, because it then limits the number of
+	 * comparisons for the remaining pathkeys. But evaluating that is kinda the
+	 * expensive bit we're trying to not do.
+	 */
+	nFreeKeys = list_length(*group_pathkeys) - n_preordered;
+	nToPermute = 4;
+	if (nFreeKeys > nToPermute)
+	{
+		int i;
+		PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
+
+		/* skip the pre-ordered pathkeys */
+		cell = list_nth_cell(*group_pathkeys, n_preordered);
+
+		/* estimate cost for sorting individual pathkeys */
+		for (i = 0; cell != NULL; i++, (cell = lnext(*group_pathkeys, cell)))
+		{
+			List *to_cost = list_make1(lfirst(cell));
+
+			Assert(i < nFreeKeys);
+
+			costs[i].pathkey = lfirst(cell);
+			costs[i].cost = cost_sort_estimate(root, to_cost, 0, nrows);
+
+			pfree(to_cost);
+		}
+
+		/* sort the pathkeys by sort cost in ascending order */
+		qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+		/*
+		 * Rebuild the list of pathkeys - first the preordered ones, then the
+		 * rest ordered by cost.
+		 */
+		new_group_pathkeys = list_truncate(list_copy(*group_pathkeys), n_preordered);
+
+		for (i = 0; i < nFreeKeys; i++)
+			new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+		pfree(costs);
+	}
+	else
+	{
+		/*
+		 * Since v13 list_free() can clean list elements so for original list
+		 * not to be modified it should be copied to a new one which can then
+		 * be cleaned safely if needed.
+		 */
+		new_group_pathkeys = list_copy(*group_pathkeys);
+		nToPermute = nFreeKeys;
+	}
+
+	Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
+
+	/*
+	 * Generate pathkey lists with permutations of the first nToPermute pathkeys.
+	 *
+	 * XXX We simply calculate sort cost for each individual pathkey list, but
+	 * there's room for two dynamic programming optimizations here. Firstly, we
+	 * may pass the current "best" cost to cost_sort_estimate so that it can
+	 * "abort" if the estimated pathkeys list exceeds it. Secondly, it could pass
+	 * return information about the position when it exceeded the cost, and we
+	 * could skip all permutations with the same prefix.
+	 *
+	 * Imagine we've already found ordering with cost C1, and we're evaluating
+	 * another ordering - cost_sort_estimate() calculates cost by adding the
+	 * pathkeys one by one (more or less), and the cost only grows. If at any
+	 * point it exceeds C1, it can't possibly be "better" so we can discard it.
+	 * But we also know that we can discard all ordering with the same prefix,
+	 * because if we're estimating (a,b,c,d) and we exceeded C1 at (a,b) then
+	 * the same thing will happen for any ordering with this prefix.
+	 *
+	 *
+	 */
+	PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
+
+	while((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
+	{
+		Cost	cost;
+
+		cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+		if (cost < cheapest_sort_cost || cheapest_sort_cost < 0)
+		{
+			list_free(new_group_pathkeys);
+			new_group_pathkeys = list_copy(var_group_pathkeys);
+			cheapest_sort_cost = cost;
+		}
+	}
+
+	/* Reorder the group clauses according to the reordered pathkeys. */
+	foreach(cell, new_group_pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		new_group_clauses = lappend(new_group_clauses,
+						get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+												*group_clauses));
+	}
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+											   *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+
+	return true;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each items stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to minimize the sort cost
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reoredered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
+ *   the tail reoredered to minimize the sort cost
+ *
+ * There are other potentially interesting orderings (e.g. it might be best to
+ * match the first ORDER BY key, order the remaining keys differently and then
+ * rely on incremental sort to fix this), but we ignore those for now. To make
+ * this work we'd have to pretty much generate all possible permutations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys,
+								List *group_pathkeys, List *group_clauses)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+	int			n_preordered = 0;
+
+	List *pathkeys = group_pathkeys;
+	List *clauses = group_clauses;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+
+	infos = lappend(infos, info);
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost, ignoring both the
+	 * target ordering (ORDER BY) and ordering of the input path.
+	 */
+	if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered))
+	{
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to match
+	 * as much of the ordering as possible - we get this sort for free (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefiting from incremental sort
+	 * as much as possible.
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (path_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/* reorder the tail to minimize sort cost */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause, but only if set debug_group_by_match_order_by).
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (root->sort_pathkeys && debug_group_by_match_order_by)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to sort_pathkeys.
+		 */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1862,6 +2389,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the odering. The ordering
+ * may not be "complete" and may require incremental sort, but that's fine. So
+ * we simply count prefix pathkeys with a matching group key, and stop once we
+ * find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordeding not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1876,6 +2451,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1911,6 +2489,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index bd09f85aea1..fac5822e5b5 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6156,24 +6156,124 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
 
-			if (path == cheapest_path || is_sorted)
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest-total path if it isn't already sorted */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_path || is_sorted)
+				{
+					/* Sort the cheapest-total path if it isn't already sorted */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					/* Now decide what to stick atop it */
+					if (parse->groupingSets)
+					{
+						consider_groupingsets_paths(root, grouped_rel,
+													path, true, can_hash,
+													gd, agg_costs, dNumGroups);
+					}
+					else if (parse->hasAggs)
+					{
+						/*
+						 * We have aggregation, possibly with plain GROUP BY. Make
+						 * an AggPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_SIMPLE,
+												 info->clauses,
+												 havingQual,
+												 agg_costs,
+												 dNumGroups));
+					}
+					else if (group_clauses)
+					{
+						/*
+						 * We have GROUP BY without aggregation or grouping sets.
+						 * Make a GroupPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+					}
+					else
+					{
+						/* Other cases should have been handled above */
+						Assert(false);
+					}
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, no point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6185,17 +6285,17 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->hasAggs)
 				{
 					/*
-					 * We have aggregation, possibly with plain GROUP BY. Make
-					 * an AggPath.
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 info->clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6203,14 +6303,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->groupClause)
 				{
 					/*
-					 * We have GROUP BY without aggregation or grouping sets.
-					 * Make a GroupPath.
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6220,79 +6320,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					Assert(false);
 				}
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, no point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_agg_path(root,
-										 grouped_rel,
-										 path,
-										 grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_SIMPLE,
-										 parse->groupClause,
-										 havingQual,
-										 agg_costs,
-										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_group_path(root,
-										   grouped_rel,
-										   path,
-										   parse->groupClause,
-										   havingQual,
-										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
-			}
 		}
 
 		/*
@@ -6303,100 +6330,125 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
 				Path	   *path_original = path;
-				bool		is_sorted;
-				int			presorted_keys;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
+				List	   *pathkey_orderings = NIL;
 
-				/*
-				 * Insert a Sort node, if required.  But there's no point in
-				 * sorting anything but the cheapest path.
-				 */
-				if (!is_sorted)
+				List	   *group_pathkeys = root->group_pathkeys;
+				List	   *group_clauses = parse->groupClause;
+
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys,
+																	group_pathkeys,
+																	group_clauses);
+
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach (lc2, pathkey_orderings)
 				{
-					if (path != partially_grouped_rel->cheapest_total_path)
-						continue;
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				}
+					bool		is_sorted;
+					int			presorted_keys = 0;
+					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* restore the path (we replace it in the loop) */
+					path = path_original;
 
-				/*
-				 * Now we may consider incremental sort on this path, but only
-				 * when the path is not already sorted and when incremental
-				 * sort is enabled.
-				 */
-				if (is_sorted || !enable_incremental_sort)
-					continue;
+					is_sorted = pathkeys_count_contained_in(info->pathkeys,
+															path->pathkeys,
+															&presorted_keys);
 
-				/* Restore the input path (we might have added Sort on top). */
-				path = path_original;
+					/*
+					 * Insert a Sort node, if required.  But there's no point in
+					 * sorting anything but the cheapest path.
+					 */
+					if (!is_sorted)
+					{
+						if (path != partially_grouped_rel->cheapest_total_path)
+							continue;
 
-				/* no shared prefix, not point in building incremental sort */
-				if (presorted_keys == 0)
-					continue;
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
 
-				/*
-				 * We should have already excluded pathkeys of length 1
-				 * because then presorted_keys > 0 would imply is_sorted was
-				 * true.
-				 */
-				Assert(list_length(root->group_pathkeys) != 1);
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
 
-				path = (Path *) create_incremental_sort_path(root,
-															 grouped_rel,
-															 path,
-															 root->group_pathkeys,
-															 presorted_keys,
-															 -1.0);
+					/*
+					 * Now we may consider incremental sort on this path, but only
+					 * when the path is not already sorted and when incremental
+					 * sort is enabled.
+					 */
+					if (is_sorted || !enable_incremental_sort)
+						continue;
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* Restore the input path (we might have added Sort on top). */
+					path = path_original;
+
+					/* no shared prefix, not point in building incremental sort */
+					if (presorted_keys == 0)
+						continue;
+
+					/*
+					 * We should have already excluded pathkeys of length 1
+					 * because then presorted_keys > 0 would imply is_sorted was
+					 * true.
+					 */
+					Assert(list_length(root->group_pathkeys) != 1);
+
+					path = (Path *) create_incremental_sort_path(root,
+																 grouped_rel,
+																 path,
+																 info->pathkeys,
+																 presorted_keys,
+																 -1.0);
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+				}
 			}
 		}
 	}
@@ -6599,41 +6651,71 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
+			Path	   *path_save = path;
+
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
-			if (path == cheapest_total_path || is_sorted)
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(partially_grouped_rel, (Path *)
-							 create_agg_path(root,
-											 partially_grouped_rel,
-											 path,
-											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
-											 NIL,
-											 agg_partial_costs,
-											 dNumPartialGroups));
-				else
-					add_path(partially_grouped_rel, (Path *)
-							 create_group_path(root,
-											   partially_grouped_rel,
-											   path,
-											   parse->groupClause,
-											   NIL,
-											   dNumPartialGroups));
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_total_path || is_sorted)
+				{
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_path(partially_grouped_rel, (Path *)
+								 create_agg_path(root,
+												 partially_grouped_rel,
+												 path,
+												 partially_grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_INITIAL_SERIAL,
+												 info->clauses,
+												 NIL,
+												 agg_partial_costs,
+												 dNumPartialGroups));
+					else
+						add_path(partially_grouped_rel, (Path *)
+								 create_group_path(root,
+												   partially_grouped_rel,
+												   path,
+												   info->clauses,
+												   NIL,
+												   dNumPartialGroups));
+				}
 			}
 		}
 
@@ -6643,6 +6725,8 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 * We can also skip the entire loop when we only have a single-item
 		 * group_pathkeys because then we can't possibly have a presorted
 		 * prefix of the list without having the list be fully sorted.
+		 *
+		 * XXX Shouldn't this also consider the group-key-reordering?
 		 */
 		if (enable_incremental_sort && list_length(root->group_pathkeys) > 1)
 		{
@@ -6701,24 +6785,100 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
 
-			if (path == cheapest_partial_path || is_sorted)
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_original;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_partial_path || is_sorted)
+				{
+
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_agg_path(root,
+														 partially_grouped_rel,
+														 path,
+														 partially_grouped_rel->reltarget,
+														 info->clauses ? AGG_SORTED : AGG_PLAIN,
+														 AGGSPLIT_INITIAL_SERIAL,
+														 info->clauses,
+														 NIL,
+														 agg_partial_costs,
+														 dNumPartialPartialGroups));
+					else
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_group_path(root,
+														   partially_grouped_rel,
+														   path,
+														   info->clauses,
+														   NIL,
+														   dNumPartialPartialGroups));
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, not point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 partially_grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
@@ -6726,9 +6886,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 info->clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 info->clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6737,59 +6897,10 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   info->clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, not point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 partially_grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_agg_path(root,
-												 partially_grouped_rel,
-												 path,
-												 partially_grouped_rel->reltarget,
-												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-												 AGGSPLIT_INITIAL_SERIAL,
-												 parse->groupClause,
-												 NIL,
-												 agg_partial_costs,
-												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_group_path(root,
-												   partially_grouped_rel,
-												   path,
-												   parse->groupClause,
-												   NIL,
-												   dNumPartialPartialGroups));
 		}
 	}
 
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 5c32c96b71c..cd7c671a706 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1342,7 +1342,7 @@ create_append_path(PlannerInfo *root,
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 1fbb0b28c3b..36c303e45c5 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3294,7 +3294,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 }
 
 /*
- * estimate_num_groups		- Estimate number of groups in a grouped query
+ * estimate_num_groups/estimate_num_groups_incremental
+ *		- Estimate number of groups in a grouped query.
+ *		  _incremental variant is performance optimization for
+ *		  case of adding one-by-one column
  *
  * Given a query having a GROUP BY clause, estimate how many groups there
  * will be --- ie, the number of distinct combinations of the GROUP BY
@@ -3368,11 +3371,22 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset, EstimationInfo *estinfo)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, estinfo,
+										   NULL, 0);
+}
+
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows,
+					List **pgset, EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i, j;
 
 	/* Zero the estinfo output parameter, if non-NULL */
 	if (estinfo != NULL)
@@ -3403,7 +3417,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3412,6 +3426,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++; /* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3481,7 +3503,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3498,6 +3524,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 4c94f09c645..8719f67c9d0 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2132,6 +2132,35 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+/************************<DEBUG OPT GROUP BY>*********************************/
+	{
+		{"debug_group_by_reorder_by_pathkeys", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reorder GROUP BY by pathkeys"),
+			NULL
+		},
+		&debug_group_by_reorder_by_pathkeys,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_group_by_match_order_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable matching GROUP BY by ORDER BY."),
+			NULL
+		},
+		&debug_group_by_match_order_by,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("find a cheapest order of columns in GROUP BY."),
+			NULL
+		},
+		&debug_cheapest_group_by,
+		true,
+		NULL, NULL, NULL
+	},
+/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index f9ddafd3459..543b7f340dc 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -266,6 +266,7 @@ typedef enum NodeTag
 	T_EquivalenceClass,
 	T_EquivalenceMember,
 	T_PathKey,
+	T_PathKeyInfo,
 	T_PathTarget,
 	T_RestrictInfo,
 	T_IndexClause,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 1f3845b3fec..83582301908 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1070,6 +1070,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 356a51f370a..0b083a27b3c 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -112,7 +112,9 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *path);
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+							   int nPresortedKeys, double tuples);
+extern void cost_append(AppendPath *path, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 0c3a0b90c85..525b594cbd0 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -203,6 +203,17 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
+/************************<DEBUG OPT GROUP BY>*********************************/
+extern bool debug_group_by_reorder_by_pathkeys;
+extern bool debug_group_by_match_order_by;
+extern bool debug_cheapest_group_by;
+/************************</DEBUG OPT GROUP BY>********************************/
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys,
+											 List *pathkeys, List *clauses);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 8f3d73edfb2..c313a08d541 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -214,6 +214,11 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset,
 								  EstimationInfo *estinfo);
 
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows, List **pgset,
+					EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs);
+
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
 									   Selectivity *mcv_freq,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index be5fa5727d5..34cf3d900c1 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1200,7 +1200,8 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+   Group Key: $0, $1
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1223,10 +1224,8 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+   ->  Result
+(25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -2438,6 +2437,241 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, d, c, v
+   ->  Sort
+         Sort Key: p, d, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Incremental Sort
+         Sort Key: p, c, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Incremental Sort
+         Sort Key: p, v, c
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, d, v
+   ->  Incremental Sort
+         Sort Key: p, c, d, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Incremental Sort
+         Sort Key: p, v, c, d
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 545e301e482..21c429226f7 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1439,7 +1439,7 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index d5b5b775fdd..01d81292f5c 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1984,8 +1984,8 @@ USING (name);
 ------+----+----
  bb   | 12 | 13
  cc   | 22 | 23
- dd   |    | 33
  ee   | 42 |   
+ dd   |    | 33
 (4 rows)
 
 -- Cases with non-nullable expressions in subquery results;
@@ -2019,8 +2019,8 @@ NATURAL FULL JOIN
 ------+------+------+------+------
  bb   |   12 |    2 |   13 |    3
  cc   |   22 |    2 |   23 |    3
- dd   |      |      |   33 |    3
  ee   |   42 |    2 |      |     
+ dd   |      |      |   33 |    3
 (4 rows)
 
 SELECT * FROM
@@ -4618,18 +4618,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
@@ -6336,44 +6338,39 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Unique
+   ->  HashAggregate
          Output: j3.id
-         ->  Sort
+         Group Key: j3.id
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(13 rows)
+(11 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Group
+   ->  HashAggregate
          Output: j3.id
          Group Key: j3.id
-         ->  Sort
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(14 rows)
+(11 rows)
 
 drop table j1;
 drop table j2;
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index dfa4b036b52..a08a3825ff6 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -952,32 +952,30 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 --------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Gather
-         Workers Planned: 2
-         ->  Parallel Append
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml.a
-                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml.a
-                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_5.a
-                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_5.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_2.a
-                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_2.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(27 rows)
+   ->  Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(25 rows)
 
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
  a  | sum  |  array_agg  | count 
@@ -996,34 +994,32 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 -- Without ORDER BY clause, to test Gather at top-most path
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                                QUERY PLAN                                 
----------------------------------------------------------------------------
- Gather
-   Workers Planned: 2
-   ->  Parallel Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(25 rows)
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Append
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml.a
+         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml.a
+               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_2.a
+         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_2.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_5.a
+         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_5.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(23 rows)
 
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -1379,28 +1375,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
 -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
 EXPLAIN (COSTS OFF)
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize GroupAggregate
+   ->  Finalize HashAggregate
          Group Key: pagg_tab_para.y
          Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather Merge
+         ->  Gather
                Workers Planned: 2
-               ->  Sort
-                     Sort Key: pagg_tab_para.y
-                     ->  Parallel Append
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_1.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_2.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+               ->  Parallel Append
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_1.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_2.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
 
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
  y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index bb5b7c47a45..03926a84138 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -466,52 +466,41 @@ EXPLAIN (COSTS OFF)
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
   GROUP BY 1, 2 ORDER BY 1, 2;
-                                                   QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Group
    Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Merge Append
+   ->  Sort
          Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Group
-               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
-                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1.a, prt1.b
-                                 ->  Seq Scan on prt1_p1 prt1
-                           ->  Sort
-                                 Sort Key: p2.a, p2.b
-                                 ->  Seq Scan on prt2_p1 p2
-         ->  Group
-               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_1.a, prt1_1.b
-                                 ->  Seq Scan on prt1_p2 prt1_1
-                           ->  Sort
-                                 Sort Key: p2_1.a, p2_1.b
-                                 ->  Seq Scan on prt2_p2 p2_1
-         ->  Group
-               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_2.a, prt1_2.b
-                                 ->  Seq Scan on prt1_p3 prt1_2
-                           ->  Sort
-                                 Sort Key: p2_2.a, p2_2.b
-                                 ->  Seq Scan on prt2_p3 p2_2
-(43 rows)
+         ->  Append
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_1.a, prt1_1.b
+                           ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Sort
+                           Sort Key: p2_1.a, p2_1.b
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_2.a, prt1_2.b
+                           ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Sort
+                           Sort Key: p2_2.a, p2_2.b
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a))
+                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_3.b, prt1_3.a
+                           ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Sort
+                           Sort Key: p2_3.b, p2_3.a
+                           ->  Seq Scan on prt2_p3 p2_3
+(32 rows)
 
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index dece7310cfe..7ac4a9380e2 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1303,24 +1303,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2;
-                        QUERY PLAN                        
-----------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: (q2 IS NOT NULL)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: (q2 IS NOT NULL)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: (q2 IS NOT NULL)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: (q2 IS NOT NULL)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
@@ -1339,24 +1337,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2;
-                       QUERY PLAN                       
---------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                    QUERY PLAN                    
+--------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: ((- q1) = q2)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: ((- q1) = q2)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: ((- q1) = q2)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: ((- q1) = q2)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 7cf86465e94..a97d4058140 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1010,6 +1010,105 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index d8768a6b54d..1de163e0395 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -213,7 +213,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 
-- 
2.31.1

#63

Andrey Lepikhov

a.lepikhov@postgrespro.ru

almost 4 years ago

In reply to: Tomas Vondra (#62)

Re: POC: GROUP BY optimization

On 22/1/2022 01:34, Tomas Vondra wrote:

The other thing we could do is reduce the coefficient gradually - so
it'd be 1.5 for the first pathkey, then 1.25 for the next one, and so
on. But it seems somewhat arbitrary (I certainly don't have some sound
theoretical justification ...).

I think, it hasn't a reason to increase complexity without any theory at
the bottom. Simple solution allows to realize problems much faster, if
they arise.

... I've skipped the path_save
removal in planner.c, because that seems incorrect - if there are
multiple pathkeys, we must start with the original path, not the
modified one we built in the last iteration. Or am I missing something

You are right, I misunderstood the idea of path_save variable.

--
regards,
Andrey Lepikhov
Postgres Professional

#64

Andrey V. Lepikhov

a.lepikhov@postgrespro.ru

almost 4 years ago

In reply to: Andrey V. Lepikhov (#60)

1 attachment(s)

Re: POC: GROUP BY optimization

On 7/22/21 3:58 AM, Tomas Vondra wrote:
I've simplified the costing a bit, and the attached version actually
undoes all the "suspicious" plan changes in postgres_fdw. It changes one
new plan, but that seems somewhat reasonable, as it pushes sort to the
remote side.

I tried to justify heap-sort part of the compute_cpu_sort_cost() routine
and realized, that here we may have a mistake.
After a week of efforts, I don't found any research papers on dependence
of bounded heap-sort time compexity on number of duplicates.

So, I suppose self-made formula, based on simple logical constructions:

1. We should base on initial formula: cost ~ N*LOG2(M), where M -
output_tuples.
2. Realize, that full representation of this formula is:

cost ~ N*LOG2(min{N,M})

3. In the case of multicolumn, number of comparisons for each next
column can be estimated by the same formula, but arranged to a number of
tuples per group:

comparisons ~ input * LOG2(min{input,M})

4. Realize, that for the case, when M > input, we should change this
formula a bit:

comparisons ~ max{input,M} * LOG2(min{input,M})

Remember, that in our case M << tuples.
So, general formula for bounded heap sort can be written as:

cost ~ N * sum(max{n_i,M}/N * LOG2(min{n_i,M})), i=1,ncols

where n_1 == N, n_i - number of tuples per group, estimated from
previous iteration.

In attachment - an implementation of this approach.

--
regards,
Andrey Lepikhov
Postgres Professional

Attachments:

bounded_heap_sort_fix.txttext/plain; charset=UTF-8; name=bounded_heap_sort_fix.txtDownload

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 68a32740d7..2c3cce57aa 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1872,6 +1872,15 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
  *
  *	N * sum( Fk * log(Gk) )
  *
+ * For bounded heap sort we haven't such a duplicates-related research. So invent
+ * it based on a simple logic (M - number of output tuples):
+ * For one column we can estimate number of comparisons as:
+ * N * log(min{N,M})
+ * For the case of many columns we can natively estimate number of comparisons by
+ * the formula:
+ * sum(max{n_i,M} * log(min{N,M})),
+ * where n_0 == N, n_i - number of tuples per group, estimated on previous step.
+ *
  * Note: We also consider column width, not just the comparator cost.
  *
  * NOTE: some callers currently pass NIL for pathkeys because they
@@ -1881,7 +1890,7 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
 static Cost
 compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 					  Cost comparison_cost, double tuples, double output_tuples,
-					  bool heapSort)
+					  bool bounded_sort)
 {
 	Cost		per_tuple_cost = 0.0;
 	ListCell	*lc;
@@ -1907,7 +1916,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		 * of this function, but I'm not sure. I suggest we introduce some simple
 		 * constants for that, instead of magic values.
 		 */
-		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		output_tuples = (bounded_sort) ? 2.0 * output_tuples : tuples;
 		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
 
 		/* add cost provided by caller */
@@ -1925,8 +1934,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 	{
 		PathKey				*pathkey = (PathKey*) lfirst(lc);
 		EquivalenceMember	*em;
-		double				 nGroups,
-							 correctedNGroups;
+		double				 nGroups;
 
 		/*
 		 * We believe that equivalence members aren't very different, so, to
@@ -2000,24 +2008,16 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		 */
 		if (i >= nPresortedKeys)
 		{
-			if (heapSort)
-			{
-				double heap_tuples;
-
-				/* have to keep at least one group, and a multiple of group size */
-				heap_tuples = Max(ceil(output_tuples / tuplesPerPrevGroup) * tuplesPerPrevGroup,
-								  tuplesPerPrevGroup);
-
-				/* so how many (whole) groups is that? */
-				correctedNGroups = ceil(heap_tuples / tuplesPerPrevGroup);
-			}
+			/*
+			 * Quick sort and 'top-N' sorting (bounded heap sort) algorithms
+			 * have different formulas for time complexity estimation.
+			 */
+			if (bounded_sort)
+				per_tuple_cost += totalFuncCost *
+								  (Max(tuplesPerPrevGroup, output_tuples) / tuples) *
+								  LOG2(2.0 * Min(tuplesPerPrevGroup, output_tuples));
 			else
-				/* all groups in the input */
-				correctedNGroups = nGroups;
-
-			correctedNGroups = Max(1.0, ceil(correctedNGroups));
-
-			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+				per_tuple_cost += totalFuncCost * LOG2(nGroups);
 		}
 
 		i++;

#65

Andrey V. Lepikhov

a.lepikhov@postgrespro.ru

almost 4 years ago

In reply to: Tomas Vondra (#62)

2 attachment(s)

Re: POC: GROUP BY optimization

On 1/22/22 01:34, Tomas Vondra wrote:

I rebased (with minor fixes) this patch onto current master.

Also, second patch dedicated to a problem of "varno 0" (fake_var).
I think, this case should make the same estimations as in the case of
varno != 0, but no any stats found. So I suppose to restrict number of
groups with min of a number of incoming tuples and DEFAULT_NUM_DISTINCT
value.

--
regards,
Andrey Lepikhov
Postgres Professional

Attachments:

0002-Use-default-restriction-for-number-of-groups.txttext/plain; charset=UTF-8; name=0002-Use-default-restriction-for-number-of-groups.txtDownload

From d5fd0f8f981d9e457320c1007f21f2b9b74aab9e Mon Sep 17 00:00:00 2001
From: Andrey Lepikhov <a.lepikhov@postgrespro.ru>
Date: Thu, 10 Feb 2022 13:51:51 +0500
Subject: [PATCH 2/2] Use default restriction for number of groups.

---
 src/backend/optimizer/path/costsize.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 68a32740d7..b9e975df10 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1756,8 +1756,8 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 
 /*
  * is_fake_var
- *		Workaround for generate_append_tlist() which generates fake Vars with
- *		varno == 0, that will cause a fail of estimate_num_group() call
+ *		Workaround for generate_append_tlist() which generates fake Vars for the
+ *		case of "varno 0", that will cause a fail of estimate_num_group() call
  *
  * XXX Ummm, why would estimate_num_group fail with this?
  */
@@ -1978,21 +1978,13 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 													  tuplesPerPrevGroup, NULL, NULL,
 													  &cache_varinfos,
 													  list_length(pathkeyExprs) - 1);
-		else if (tuples > 4.0)
+		else
 			/*
-			 * Use geometric mean as estimation if there is no any stats.
-			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
-			 * column while here we try to estimate number of groups over
-			 * set of columns.
-			 *
-			 * XXX Perhaps this should use DEFAULT_NUM_DISTINCT at least to
-			 * limit the calculated values, somehow?
-			 *
-			 * XXX What's the logic of the following formula?
+			 * In case of full uncertainity use default defensive approach. It
+			 * means that any permutations of such vars are equivalent.
+			 * Also, see comments for the cost_incremental_sort routine.
 			 */
-			nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
-		else
-			nGroups = tuples;
+			nGroups = Min(tuplesPerPrevGroup, DEFAULT_NUM_DISTINCT);
 
 		/*
 		 * Presorted keys aren't participated in comparison but still checked
-- 
2.25.1

v20220210-0001-GROUP-BY-reordering.patchtext/x-patch; charset=UTF-8; name=v20220210-0001-GROUP-BY-reordering.patchDownload

From 7357142a0f45313aa09014c4413c011b61aafe5f Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Fri, 21 Jan 2022 20:22:14 +0100
Subject: [PATCH 1/2] GROUP BY reordering

---
 .../postgres_fdw/expected/postgres_fdw.out    |  15 +-
 src/backend/optimizer/path/costsize.c         | 369 +++++++++-
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 580 ++++++++++++++++
 src/backend/optimizer/plan/planner.c          | 653 ++++++++++--------
 src/backend/optimizer/util/pathnode.c         |   2 +-
 src/backend/utils/adt/selfuncs.c              |  37 +-
 src/backend/utils/misc/guc.c                  |  32 +
 src/include/nodes/nodes.h                     |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/cost.h                  |   4 +-
 src/include/optimizer/paths.h                 |  11 +
 src/include/utils/selfuncs.h                  |   5 +
 src/test/regress/expected/aggregates.out      | 244 ++++++-
 src/test/regress/expected/guc.out             |   9 +-
 .../regress/expected/incremental_sort.out     |   2 +-
 src/test/regress/expected/join.out            |  51 +-
 .../regress/expected/partition_aggregate.out  | 136 ++--
 src/test/regress/expected/partition_join.out  |  75 +-
 src/test/regress/expected/union.out           |  60 +-
 src/test/regress/sql/aggregates.sql           |  99 +++
 src/test/regress/sql/incremental_sort.sql     |   2 +-
 22 files changed, 1913 insertions(+), 497 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index b2e02caefe..2331d13858 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2741,16 +2741,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
- Sort
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Foreign Scan
    Output: (count(c2)), c2, 5, 7.0, 9
-   Sort Key: ft1.c2
-   ->  Foreign Scan
-         Output: (count(c2)), c2, 5, 7.0, 9
-         Relations: Aggregate on (public.ft1)
-         Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
-(7 rows)
+   Relations: Aggregate on (public.ft1)
+   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+(4 rows)
 
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
   w  | x | y |  z  
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 8dc7dd4ca2..68a32740d7 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1754,6 +1754,325 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 									rterm->pathtarget->width);
 }
 
+/*
+ * is_fake_var
+ *		Workaround for generate_append_tlist() which generates fake Vars with
+ *		varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ *		Returns relative complexity of comparing two values based on it's width.
+ * The idea behind - long values have more expensive comparison. Return value is
+ * in cpu_operator_cost unit.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double	width = -1.0; /* fake value */
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresponding relation */
+	if (IsA(expr, Var))
+	{
+		Var		*var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo	*rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int	ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, use estimation by type */
+	if (width < 0.0)
+	{
+		Node	*node = (Node*) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Any value in pgsql is passed by Datum type, so any operation with value
+	 * could not be cheaper than operation with Datum type
+	 */
+	if (width <= sizeof(Datum))
+		return 1.0;
+
+	/*
+	 * Seems, cost of comparision is not directly proportional to args width,
+	 * because comparing args could be differ width (we known only average over
+	 * column) and difference often could be defined only by looking on first
+	 * bytes. So, use log16(width) as estimation.
+	 */
+	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ *		compute CPU cost of sort (i.e. in-memory)
+ *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, the columns may have very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algoritm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from:
+ *
+ * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
+ * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of identical
+ * tuples with value Ki, then the estimate of number of comparisons is:
+ *
+ *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ *
+ * We assume all Xi the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ *	N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparisons on ck is roughly
+ *
+ *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ *	N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column width, not just the comparator cost.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys. In this case, it will fallback to
+ * simple comparison cost estimate.
+ */
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+					  Cost comparison_cost, double tuples, double output_tuples,
+					  bool heapSort)
+{
+	Cost		per_tuple_cost = 0.0;
+	ListCell	*lc;
+	List		*pathkeyExprs = NIL;
+	double		tuplesPerPrevGroup = tuples;
+	double		totalFuncCost = 1.0;
+	bool		has_fake_var = false;
+	int			i = 0;
+	Oid			prev_datatype = InvalidOid;
+	Cost		funcCost = 0.0;
+	List		*cache_varinfos = NIL;
+
+	/* fallback if pathkeys is unknown */
+	if (list_length(pathkeys) == 0)
+	{
+		/*
+		 * If we'll use a bounded heap-sort keeping just K tuples in memory, for
+		 * a total number of tuple comparisons of N log2 K; but the constant
+		 * factor is a bit higher than for quicksort.  Tweak it so that the
+		 * cost curve is continuous at the crossover point.
+		 *
+		 * XXX I suppose the "quicksort factor" references to 1.5 at the end
+		 * of this function, but I'm not sure. I suggest we introduce some simple
+		 * constants for that, instead of magic values.
+		 */
+		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+		/* add cost provided by caller */
+		per_tuple_cost += comparison_cost;
+
+		return per_tuple_cost * tuples;
+	}
+
+	/*
+	 * Computing total cost of sorting takes into account:
+	 * - per column comparison function cost
+	 * - we try to compute needed number of comparison per column
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey				*pathkey = (PathKey*) lfirst(lc);
+		EquivalenceMember	*em;
+		double				 nGroups,
+							 correctedNGroups;
+
+		/*
+		 * We believe that equivalence members aren't very different, so, to
+		 * estimate cost we take just first member
+		 */
+		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+		if (em->em_datatype != InvalidOid)
+		{
+			/* do not lookup funcCost if data type is the same as previous */
+			if (prev_datatype != em->em_datatype)
+			{
+				Oid			sortop;
+				QualCost	cost;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+
+				cost.startup = 0;
+				cost.per_tuple = 0;
+				add_function_cost(root, get_opcode(sortop), NULL, &cost);
+				/* we need procost, not product of procost and cpu_operator_cost */
+				funcCost = cost.per_tuple / cpu_operator_cost;
+				prev_datatype = em->em_datatype;
+			}
+		}
+		else
+			funcCost = 1.0; /* fallback */
+
+		/* Try to take into account actual width fee */
+		funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+		totalFuncCost += funcCost;
+
+		/* Remember if we have a fake var in pathkeys */
+		has_fake_var |= is_fake_var(em->em_expr);
+		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+		/*
+		 * Prevent call estimate_num_groups() with fake Var. Note,
+		 * pathkeyExprs contains only previous columns
+		 */
+		if (has_fake_var == false)
+			/*
+			 * Recursively compute number of groups in a group from previous step
+			 */
+			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+													  tuplesPerPrevGroup, NULL, NULL,
+													  &cache_varinfos,
+													  list_length(pathkeyExprs) - 1);
+		else if (tuples > 4.0)
+			/*
+			 * Use geometric mean as estimation if there is no any stats.
+			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
+			 * column while here we try to estimate number of groups over
+			 * set of columns.
+			 *
+			 * XXX Perhaps this should use DEFAULT_NUM_DISTINCT at least to
+			 * limit the calculated values, somehow?
+			 *
+			 * XXX What's the logic of the following formula?
+			 */
+			nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
+		else
+			nGroups = tuples;
+
+		/*
+		 * Presorted keys aren't participated in comparison but still checked
+		 * by qsort comparator.
+		 */
+		if (i >= nPresortedKeys)
+		{
+			if (heapSort)
+			{
+				double heap_tuples;
+
+				/* have to keep at least one group, and a multiple of group size */
+				heap_tuples = Max(ceil(output_tuples / tuplesPerPrevGroup) * tuplesPerPrevGroup,
+								  tuplesPerPrevGroup);
+
+				/* so how many (whole) groups is that? */
+				correctedNGroups = ceil(heap_tuples / tuplesPerPrevGroup);
+			}
+			else
+				/* all groups in the input */
+				correctedNGroups = nGroups;
+
+			correctedNGroups = Max(1.0, ceil(correctedNGroups));
+
+			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+		}
+
+		i++;
+
+		/*
+		 * Real-world distribution isn't uniform but now we don't have a way to
+		 * determine that, so, add multiplier to get closer to worst case.
+		 * But ensure the number of tuples does not exceed the group size in the
+		 * preceding step.
+		 */
+		tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
+								 ceil(1.5 * tuplesPerPrevGroup / nGroups));
+
+		/*
+		 * We could skip all following columns for cost estimation, because we
+		 * believe that tuples are unique by the set of previous columns
+		 */
+		if (tuplesPerPrevGroup <= 1.0)
+			break;
+	}
+
+	list_free(pathkeyExprs);
+
+	/* per_tuple_cost is in cpu_operator_cost units */
+	per_tuple_cost *= cpu_operator_cost;
+
+	/*
+	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E.
+	 * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation
+	 * formula has additional term proportional to number of tuples (See Chapter
+	 * 8.2 and Theorem 4.1). It has meaning with low number of tuples,
+	 * approximately less that 1e4. Of course, it could be implemented as
+	 * additional multiplier under logarithm, but use more complicated formula
+	 * which takes into account number of unique tuples and it isn't clear how
+	 * to combine multiplier with groups. Estimate it as 10 in cpu_operator_cost
+	 * unit.
+	 */
+	per_tuple_cost += 10 * cpu_operator_cost;
+
+	per_tuple_cost += comparison_cost;
+
+	return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+				   double tuples)
+{
+	return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+								0, tuples, tuples, false);
+}
+
 /*
  * cost_tuplesort
  *	  Determines and returns the cost of sorting a relation using tuplesort,
@@ -1770,7 +2089,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * number of initial runs formed and M is the merge order used by tuplesort.c.
  * Since the average initial run should be about sort_mem, we have
  *		disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *		cpu = comparison_cost * t * log2(t)
+ * 		and cpu cost (computed by compute_cpu_sort_cost()).
  *
  * If the sort is bounded (i.e., only the first k result tuples are needed)
  * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1789,9 +2108,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * 'comparison_cost' is the extra cost per comparison, if any
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later.
  */
 static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
 			   double limit_tuples)
@@ -1808,9 +2129,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
-
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
 	{
@@ -1834,12 +2152,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 		double		log_runs;
 		double		npageaccesses;
 
-		/*
-		 * CPU costs
-		 *
-		 * Assume about N log2 N comparisons
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		/* CPU costs */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 
 		/* Disk costs */
 
@@ -1855,18 +2171,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	}
 	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
 	{
-		/*
-		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
-		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		/* We'll use a bounded heap-sort keeping just K tuples in memory. */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  output_tuples, true);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 	}
 
 	/*
@@ -1899,8 +2214,8 @@ cost_incremental_sort(Path *path,
 					  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 					  double limit_tuples)
 {
-	Cost		startup_cost = 0,
-				run_cost = 0,
+	Cost		startup_cost,
+				run_cost,
 				input_run_cost = input_total_cost - input_startup_cost;
 	double		group_tuples,
 				input_groups;
@@ -1985,7 +2300,7 @@ cost_incremental_sort(Path *path,
 	 * pessimistic about incremental sort performance and increase its average
 	 * group size by half.
 	 */
-	cost_tuplesort(&group_startup_cost, &group_run_cost,
+	cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
 				   1.5 * group_tuples, width, comparison_cost, sort_mem,
 				   limit_tuples);
 
@@ -1993,7 +2308,7 @@ cost_incremental_sort(Path *path,
 	 * Startup cost of incremental sort is the startup cost of its first group
 	 * plus the cost of its input.
 	 */
-	startup_cost += group_startup_cost
+	startup_cost = group_startup_cost
 		+ input_startup_cost + group_input_run_cost;
 
 	/*
@@ -2002,7 +2317,7 @@ cost_incremental_sort(Path *path,
 	 * group, plus the total cost to process the remaining groups, plus the
 	 * remaining cost of input.
 	 */
-	run_cost += group_run_cost
+	run_cost = group_run_cost
 		+ (group_run_cost + group_startup_cost) * (input_groups - 1)
 		+ group_input_run_cost * (input_groups - 1);
 
@@ -2042,7 +2357,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	Cost		startup_cost;
 	Cost		run_cost;
 
-	cost_tuplesort(&startup_cost, &run_cost,
+	cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
 				   limit_tuples);
@@ -2140,7 +2455,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2208,7 +2523,7 @@ cost_append(AppendPath *apath)
 					 * any child.
 					 */
 					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
+							  root,
 							  pathkeys,
 							  subpath->total_cost,
 							  subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 8c6770de97..5487ae2ee4 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -681,7 +681,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 86a35cdef1..f18d4c8e4e 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,16 +17,19 @@
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
 #include "access/stratnum.h"
 #include "catalog/pg_opfamily.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -334,6 +337,530 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/************************<DEBUG PART>*************************************/
+bool debug_group_by_reorder_by_pathkeys = true;
+bool debug_group_by_match_order_by = true;
+bool debug_cheapest_group_by = true;
+/************************</DEBUG PART>************************************/
+
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List	   *new_group_pathkeys= NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (debug_group_by_reorder_by_pathkeys == false)
+		return 0;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append if to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find first pathkey without a matching GROUP BY key, the rest of
+	 * the pathkeys is useless and can't be used to evaluate the grouping, so
+	 * we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	*sgc;
+
+		/* abort on first mismatch */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* XXX maybe return when (n == 0) */
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * Used to generate all permutations of a pathkey list.
+ */
+typedef struct PathkeyMutatorState {
+	List	   *elemsList;
+	ListCell  **elemCells;
+	void	  **elems;
+	int		   *positions;
+	int			 mutatorNColumns;
+	int			 count;
+} PathkeyMutatorState;
+
+
+/*
+ * PathkeyMutatorInit
+ *		Initialize state of the permutation generator.
+ *
+ * We want to generate permutations of elements in the "elems" list. We may want
+ * to skip some number of elements at the beginning (when treating as presorted)
+ * or at the end (we only permute a limited number of group keys).
+ *
+ * The list is decomposed into elements, and we also keep pointers to individual
+ * cells. This allows us to build the permuted list quickly and cheaply, without
+ * creating any copies.
+ */
+static void
+PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
+{
+	int i;
+	int			n = end - start;
+	ListCell	*lc;
+
+	memset(state, 0, sizeof(*state));
+
+	state->mutatorNColumns = n;
+
+	state->elemsList = list_copy(elems);
+
+	state->elems = palloc(sizeof(void*) * n);
+	state->elemCells = palloc(sizeof(ListCell*) * n);
+	state->positions = palloc(sizeof(int) * n);
+
+	i = 0;
+	for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+	{
+		state->elemCells[i] = lc;
+		state->elems[i] = lfirst(lc);
+		state->positions[i] = i + 1;
+		i++;
+
+		if (i >= n)
+			break;
+	}
+}
+
+/* Swap two elements of an array. */
+static void
+PathkeyMutatorSwap(int *a, int i, int j)
+{
+  int s = a[i];
+
+  a[i] = a[j];
+  a[j] = s;
+}
+
+/*
+ * Generate the next permutation of elements.
+ */
+static bool
+PathkeyMutatorNextSet(int *a, int n)
+{
+	int j, k, l, r;
+
+	j = n - 2;
+
+	while (j >= 0 && a[j] >= a[j + 1])
+		j--;
+
+	if (j < 0)
+		return false;
+
+	k = n - 1;
+
+	while (k >= 0 && a[j] >= a[k])
+		k--;
+
+	PathkeyMutatorSwap(a, j, k);
+
+	l = j + 1;
+	r = n - 1;
+
+	while (l < r)
+		PathkeyMutatorSwap(a, l++, r--);
+
+	return true;
+}
+
+/*
+ * PathkeyMutatorNext
+ *		Generate the next permutation of list of elements.
+ *
+ * Returns the next permutation (as a list of elements) or NIL if there are no
+ * more permutations.
+ */
+static List *
+PathkeyMutatorNext(PathkeyMutatorState *state)
+{
+	int	i;
+
+	state->count++;
+
+	/* first permutation is original list */
+	if (state->count == 1)
+		return state->elemsList;
+
+	/* when there are no more permutations, return NIL */
+	if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
+	{
+		pfree(state->elems);
+		pfree(state->elemCells);
+		pfree(state->positions);
+
+		list_free(state->elemsList);
+
+		return NIL;
+	}
+
+	/* update the list cells to point to the right elements */
+	for(i=0; i<state->mutatorNColumns; i++)
+		lfirst(state->elemCells[i]) =
+			(void *) state->elems[ state->positions[i] - 1 ];
+
+	return state->elemsList;
+}
+
+/*
+ * Cost of comparing pathkeys.
+ */
+typedef struct PathkeySortCost
+{
+	Cost		cost;
+	PathKey	   *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+	const PathkeySortCost *a = (PathkeySortCost *) _a;
+	const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+	if (a->cost < b->cost)
+		return -1;
+	else if (a->cost == b->cost)
+		return 0;
+	return 1;
+}
+
+/*
+ * get_cheapest_group_keys_order
+ *		Returns the pathkeys in an order cheapest to evaluate.
+ *
+ * Given a list of pathkeys, we try to reorder them in a way that minimizes
+ * the CPU cost of sorting. This depends mainly on the cost of comparator
+ * function (the pathkeys may use different data types) and the number of
+ * distinct values in each column (which affects the number of comparator
+ * calls for the following pathkeys).
+ *
+ * In case the input is partially sorted, only the remaining pathkeys are
+ * considered.
+ *
+ * Returns newly allocated lists. If no reordering is possible (or needed),
+ * the lists are set to NIL.
+ */
+static bool
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	List		   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL,
+				   *var_group_pathkeys;
+
+	ListCell	   *cell;
+	PathkeyMutatorState	mstate;
+	double			cheapest_sort_cost = -1.0;
+
+	int nFreeKeys;
+	int nToPermute;
+
+	/* If this optimization is disabled, we're done. */
+	if (!debug_cheapest_group_by)
+		return false;
+
+	/* If there are less than 2 unsorted pathkeys, we're done. */
+	if (list_length(*group_pathkeys) - n_preordered < 2)
+		return false;
+
+	/*
+	 * We could exhaustively cost all possible orderings of the pathkeys, but for
+	 * large number of pathkeys that might be prohibitively expensive. So we try
+	 * to apply a simple cheap heuristics first - we sort the pathkeys by sort
+	 * cost (as if the pathkey was sorted independently) and then check only the
+	 * four cheapest pathkeys. The remaining pathkeys are kept ordered by cost.
+	 *
+	 * XXX This is a very simple heuristics, and likely to work fine for most
+	 * cases (because number of GROUP BY clauses tends to be lower than 4). But
+	 * it ignores how the number of distinct values in each pathkey affects the
+	 * following sorts. It may be better to use "more expensive" pathkey first
+	 * if it has many distinct values, because it then limits the number of
+	 * comparisons for the remaining pathkeys. But evaluating that is kinda the
+	 * expensive bit we're trying to not do.
+	 */
+	nFreeKeys = list_length(*group_pathkeys) - n_preordered;
+	nToPermute = 4;
+	if (nFreeKeys > nToPermute)
+	{
+		int i;
+		PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
+
+		/* skip the pre-ordered pathkeys */
+		cell = list_nth_cell(*group_pathkeys, n_preordered);
+
+		/* estimate cost for sorting individual pathkeys */
+		for (i = 0; cell != NULL; i++, (cell = lnext(*group_pathkeys, cell)))
+		{
+			List *to_cost = list_make1(lfirst(cell));
+
+			Assert(i < nFreeKeys);
+
+			costs[i].pathkey = lfirst(cell);
+			costs[i].cost = cost_sort_estimate(root, to_cost, 0, nrows);
+
+			pfree(to_cost);
+		}
+
+		/* sort the pathkeys by sort cost in ascending order */
+		qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+		/*
+		 * Rebuild the list of pathkeys - first the preordered ones, then the
+		 * rest ordered by cost.
+		 */
+		new_group_pathkeys = list_truncate(list_copy(*group_pathkeys), n_preordered);
+
+		for (i = 0; i < nFreeKeys; i++)
+			new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+		pfree(costs);
+	}
+	else
+	{
+		/*
+		 * Since v13 list_free() can clean list elements so for original list
+		 * not to be modified it should be copied to a new one which can then
+		 * be cleaned safely if needed.
+		 */
+		new_group_pathkeys = list_copy(*group_pathkeys);
+		nToPermute = nFreeKeys;
+	}
+
+	Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
+
+	/*
+	 * Generate pathkey lists with permutations of the first nToPermute pathkeys.
+	 *
+	 * XXX We simply calculate sort cost for each individual pathkey list, but
+	 * there's room for two dynamic programming optimizations here. Firstly, we
+	 * may pass the current "best" cost to cost_sort_estimate so that it can
+	 * "abort" if the estimated pathkeys list exceeds it. Secondly, it could pass
+	 * return information about the position when it exceeded the cost, and we
+	 * could skip all permutations with the same prefix.
+	 *
+	 * Imagine we've already found ordering with cost C1, and we're evaluating
+	 * another ordering - cost_sort_estimate() calculates cost by adding the
+	 * pathkeys one by one (more or less), and the cost only grows. If at any
+	 * point it exceeds C1, it can't possibly be "better" so we can discard it.
+	 * But we also know that we can discard all ordering with the same prefix,
+	 * because if we're estimating (a,b,c,d) and we exceeded C1 at (a,b) then
+	 * the same thing will happen for any ordering with this prefix.
+	 *
+	 *
+	 */
+	PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
+
+	while((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
+	{
+		Cost	cost;
+
+		cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+		if (cost < cheapest_sort_cost || cheapest_sort_cost < 0)
+		{
+			list_free(new_group_pathkeys);
+			new_group_pathkeys = list_copy(var_group_pathkeys);
+			cheapest_sort_cost = cost;
+		}
+	}
+
+	/* Reorder the group clauses according to the reordered pathkeys. */
+	foreach(cell, new_group_pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		new_group_clauses = lappend(new_group_clauses,
+						get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+												*group_clauses));
+	}
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+											   *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+
+	return true;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each items stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to minimize the sort cost
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reoredered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
+ *   the tail reoredered to minimize the sort cost
+ *
+ * There are other potentially interesting orderings (e.g. it might be best to
+ * match the first ORDER BY key, order the remaining keys differently and then
+ * rely on incremental sort to fix this), but we ignore those for now. To make
+ * this work we'd have to pretty much generate all possible permutations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys,
+								List *group_pathkeys, List *group_clauses)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+	int			n_preordered = 0;
+
+	List *pathkeys = group_pathkeys;
+	List *clauses = group_clauses;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+
+	infos = lappend(infos, info);
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost, ignoring both the
+	 * target ordering (ORDER BY) and ordering of the input path.
+	 */
+	if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered))
+	{
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to match
+	 * as much of the ordering as possible - we get this sort for free (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefiting from incremental sort
+	 * as much as possible.
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (path_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/* reorder the tail to minimize sort cost */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause, but only if set debug_group_by_match_order_by).
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (root->sort_pathkeys && debug_group_by_match_order_by)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to sort_pathkeys.
+		 */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1862,6 +2389,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the odering. The ordering
+ * may not be "complete" and may require incremental sort, but that's fine. So
+ * we simply count prefix pathkeys with a matching group key, and stop once we
+ * find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordeding not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1876,6 +2451,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1911,6 +2489,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index bd09f85aea..fac5822e5b 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6156,24 +6156,124 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
 
-			if (path == cheapest_path || is_sorted)
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest-total path if it isn't already sorted */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_path || is_sorted)
+				{
+					/* Sort the cheapest-total path if it isn't already sorted */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					/* Now decide what to stick atop it */
+					if (parse->groupingSets)
+					{
+						consider_groupingsets_paths(root, grouped_rel,
+													path, true, can_hash,
+													gd, agg_costs, dNumGroups);
+					}
+					else if (parse->hasAggs)
+					{
+						/*
+						 * We have aggregation, possibly with plain GROUP BY. Make
+						 * an AggPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_SIMPLE,
+												 info->clauses,
+												 havingQual,
+												 agg_costs,
+												 dNumGroups));
+					}
+					else if (group_clauses)
+					{
+						/*
+						 * We have GROUP BY without aggregation or grouping sets.
+						 * Make a GroupPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+					}
+					else
+					{
+						/* Other cases should have been handled above */
+						Assert(false);
+					}
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, no point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6185,17 +6285,17 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->hasAggs)
 				{
 					/*
-					 * We have aggregation, possibly with plain GROUP BY. Make
-					 * an AggPath.
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 info->clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6203,14 +6303,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->groupClause)
 				{
 					/*
-					 * We have GROUP BY without aggregation or grouping sets.
-					 * Make a GroupPath.
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6220,79 +6320,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					Assert(false);
 				}
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, no point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_agg_path(root,
-										 grouped_rel,
-										 path,
-										 grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_SIMPLE,
-										 parse->groupClause,
-										 havingQual,
-										 agg_costs,
-										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_group_path(root,
-										   grouped_rel,
-										   path,
-										   parse->groupClause,
-										   havingQual,
-										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
-			}
 		}
 
 		/*
@@ -6303,100 +6330,125 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
 				Path	   *path_original = path;
-				bool		is_sorted;
-				int			presorted_keys;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
+				List	   *pathkey_orderings = NIL;
 
-				/*
-				 * Insert a Sort node, if required.  But there's no point in
-				 * sorting anything but the cheapest path.
-				 */
-				if (!is_sorted)
+				List	   *group_pathkeys = root->group_pathkeys;
+				List	   *group_clauses = parse->groupClause;
+
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys,
+																	group_pathkeys,
+																	group_clauses);
+
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach (lc2, pathkey_orderings)
 				{
-					if (path != partially_grouped_rel->cheapest_total_path)
-						continue;
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				}
+					bool		is_sorted;
+					int			presorted_keys = 0;
+					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* restore the path (we replace it in the loop) */
+					path = path_original;
 
-				/*
-				 * Now we may consider incremental sort on this path, but only
-				 * when the path is not already sorted and when incremental
-				 * sort is enabled.
-				 */
-				if (is_sorted || !enable_incremental_sort)
-					continue;
+					is_sorted = pathkeys_count_contained_in(info->pathkeys,
+															path->pathkeys,
+															&presorted_keys);
 
-				/* Restore the input path (we might have added Sort on top). */
-				path = path_original;
+					/*
+					 * Insert a Sort node, if required.  But there's no point in
+					 * sorting anything but the cheapest path.
+					 */
+					if (!is_sorted)
+					{
+						if (path != partially_grouped_rel->cheapest_total_path)
+							continue;
 
-				/* no shared prefix, not point in building incremental sort */
-				if (presorted_keys == 0)
-					continue;
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
 
-				/*
-				 * We should have already excluded pathkeys of length 1
-				 * because then presorted_keys > 0 would imply is_sorted was
-				 * true.
-				 */
-				Assert(list_length(root->group_pathkeys) != 1);
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
 
-				path = (Path *) create_incremental_sort_path(root,
-															 grouped_rel,
-															 path,
-															 root->group_pathkeys,
-															 presorted_keys,
-															 -1.0);
+					/*
+					 * Now we may consider incremental sort on this path, but only
+					 * when the path is not already sorted and when incremental
+					 * sort is enabled.
+					 */
+					if (is_sorted || !enable_incremental_sort)
+						continue;
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* Restore the input path (we might have added Sort on top). */
+					path = path_original;
+
+					/* no shared prefix, not point in building incremental sort */
+					if (presorted_keys == 0)
+						continue;
+
+					/*
+					 * We should have already excluded pathkeys of length 1
+					 * because then presorted_keys > 0 would imply is_sorted was
+					 * true.
+					 */
+					Assert(list_length(root->group_pathkeys) != 1);
+
+					path = (Path *) create_incremental_sort_path(root,
+																 grouped_rel,
+																 path,
+																 info->pathkeys,
+																 presorted_keys,
+																 -1.0);
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+				}
 			}
 		}
 	}
@@ -6599,41 +6651,71 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
+			Path	   *path_save = path;
+
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
-			if (path == cheapest_total_path || is_sorted)
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(partially_grouped_rel, (Path *)
-							 create_agg_path(root,
-											 partially_grouped_rel,
-											 path,
-											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
-											 NIL,
-											 agg_partial_costs,
-											 dNumPartialGroups));
-				else
-					add_path(partially_grouped_rel, (Path *)
-							 create_group_path(root,
-											   partially_grouped_rel,
-											   path,
-											   parse->groupClause,
-											   NIL,
-											   dNumPartialGroups));
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_total_path || is_sorted)
+				{
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_path(partially_grouped_rel, (Path *)
+								 create_agg_path(root,
+												 partially_grouped_rel,
+												 path,
+												 partially_grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_INITIAL_SERIAL,
+												 info->clauses,
+												 NIL,
+												 agg_partial_costs,
+												 dNumPartialGroups));
+					else
+						add_path(partially_grouped_rel, (Path *)
+								 create_group_path(root,
+												   partially_grouped_rel,
+												   path,
+												   info->clauses,
+												   NIL,
+												   dNumPartialGroups));
+				}
 			}
 		}
 
@@ -6643,6 +6725,8 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 * We can also skip the entire loop when we only have a single-item
 		 * group_pathkeys because then we can't possibly have a presorted
 		 * prefix of the list without having the list be fully sorted.
+		 *
+		 * XXX Shouldn't this also consider the group-key-reordering?
 		 */
 		if (enable_incremental_sort && list_length(root->group_pathkeys) > 1)
 		{
@@ -6701,24 +6785,100 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
 
-			if (path == cheapest_partial_path || is_sorted)
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_original;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_partial_path || is_sorted)
+				{
+
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_agg_path(root,
+														 partially_grouped_rel,
+														 path,
+														 partially_grouped_rel->reltarget,
+														 info->clauses ? AGG_SORTED : AGG_PLAIN,
+														 AGGSPLIT_INITIAL_SERIAL,
+														 info->clauses,
+														 NIL,
+														 agg_partial_costs,
+														 dNumPartialPartialGroups));
+					else
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_group_path(root,
+														   partially_grouped_rel,
+														   path,
+														   info->clauses,
+														   NIL,
+														   dNumPartialPartialGroups));
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, not point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 partially_grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
@@ -6726,9 +6886,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 info->clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 info->clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6737,59 +6897,10 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   info->clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, not point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 partially_grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_agg_path(root,
-												 partially_grouped_rel,
-												 path,
-												 partially_grouped_rel->reltarget,
-												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-												 AGGSPLIT_INITIAL_SERIAL,
-												 parse->groupClause,
-												 NIL,
-												 agg_partial_costs,
-												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_group_path(root,
-												   partially_grouped_rel,
-												   path,
-												   parse->groupClause,
-												   NIL,
-												   dNumPartialPartialGroups));
 		}
 	}
 
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 5c32c96b71..cd7c671a70 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1342,7 +1342,7 @@ create_append_path(PlannerInfo *root,
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 1fbb0b28c3..36c303e45c 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3294,7 +3294,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 }
 
 /*
- * estimate_num_groups		- Estimate number of groups in a grouped query
+ * estimate_num_groups/estimate_num_groups_incremental
+ *		- Estimate number of groups in a grouped query.
+ *		  _incremental variant is performance optimization for
+ *		  case of adding one-by-one column
  *
  * Given a query having a GROUP BY clause, estimate how many groups there
  * will be --- ie, the number of distinct combinations of the GROUP BY
@@ -3368,11 +3371,22 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset, EstimationInfo *estinfo)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, estinfo,
+										   NULL, 0);
+}
+
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows,
+					List **pgset, EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i, j;
 
 	/* Zero the estinfo output parameter, if non-NULL */
 	if (estinfo != NULL)
@@ -3403,7 +3417,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3412,6 +3426,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++; /* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3481,7 +3503,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3498,6 +3524,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index f505413a7f..bba05a1595 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2132,6 +2132,38 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+/************************<DEBUG OPT GROUP BY>*********************************/
+	{
+		{"debug_group_by_reorder_by_pathkeys", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reorder GROUP BY by pathkeys"),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&debug_group_by_reorder_by_pathkeys,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_group_by_match_order_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable matching GROUP BY by ORDER BY."),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&debug_group_by_match_order_by,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("find a cheapest order of columns in GROUP BY."),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&debug_cheapest_group_by,
+		true,
+		NULL, NULL, NULL
+	},
+/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index da35f2c272..90a0c4fccb 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -266,6 +266,7 @@ typedef enum NodeTag
 	T_EquivalenceClass,
 	T_EquivalenceMember,
 	T_PathKey,
+	T_PathKeyInfo,
 	T_PathTarget,
 	T_RestrictInfo,
 	T_IndexClause,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 1f3845b3fe..8358230190 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1070,6 +1070,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 356a51f370..0b083a27b3 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -112,7 +112,9 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *path);
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+							   int nPresortedKeys, double tuples);
+extern void cost_append(AppendPath *path, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 0c3a0b90c8..525b594cbd 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -203,6 +203,17 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
+/************************<DEBUG OPT GROUP BY>*********************************/
+extern bool debug_group_by_reorder_by_pathkeys;
+extern bool debug_group_by_match_order_by;
+extern bool debug_cheapest_group_by;
+/************************</DEBUG OPT GROUP BY>********************************/
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys,
+											 List *pathkeys, List *clauses);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 8f3d73edfb..c313a08d54 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -214,6 +214,11 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset,
 								  EstimationInfo *estinfo);
 
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows, List **pgset,
+					EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs);
+
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
 									   Selectivity *mcv_freq,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 0a23a39aa2..601047fa3d 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1210,7 +1210,8 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+   Group Key: $0, $1
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1233,10 +1234,8 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+   ->  Result
+(25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -2448,6 +2447,241 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, d, c, v
+   ->  Sort
+         Sort Key: p, d, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Incremental Sort
+         Sort Key: p, c, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Incremental Sort
+         Sort Key: p, v, c
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, d, v
+   ->  Incremental Sort
+         Sort Key: p, c, d, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Incremental Sort
+         Sort Key: p, v, c, d
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/guc.out b/src/test/regress/expected/guc.out
index 75b6bfbf11..2137c0d8a2 100644
--- a/src/test/regress/expected/guc.out
+++ b/src/test/regress/expected/guc.out
@@ -847,10 +847,13 @@ SELECT name FROM tab_settings_flags
 SELECT name FROM tab_settings_flags
   WHERE category ~ '^Query Tuning' AND NOT explain
   ORDER BY 1;
-           name            
----------------------------
+                 name                 
+--------------------------------------
+ debug_enable_cheapest_group_by
+ debug_enable_group_by_match_order_by
+ debug_group_by_reorder_by_pathkeys
  default_statistics_target
-(1 row)
+(4 rows)
 
 -- Runtime-computed GUCs should be part of the preset category.
 SELECT name FROM tab_settings_flags
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 545e301e48..21c429226f 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1439,7 +1439,7 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 19caebabd0..bf1a2db2cf 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1984,8 +1984,8 @@ USING (name);
 ------+----+----
  bb   | 12 | 13
  cc   | 22 | 23
- dd   |    | 33
  ee   | 42 |   
+ dd   |    | 33
 (4 rows)
 
 -- Cases with non-nullable expressions in subquery results;
@@ -2019,8 +2019,8 @@ NATURAL FULL JOIN
 ------+------+------+------+------
  bb   |   12 |    2 |   13 |    3
  cc   |   22 |    2 |   23 |    3
- dd   |      |      |   33 |    3
  ee   |   42 |    2 |      |     
+ dd   |      |      |   33 |    3
 (4 rows)
 
 SELECT * FROM
@@ -4618,18 +4618,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
@@ -6336,44 +6338,39 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Unique
+   ->  HashAggregate
          Output: j3.id
-         ->  Sort
+         Group Key: j3.id
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(13 rows)
+(11 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Group
+   ->  HashAggregate
          Output: j3.id
          Group Key: j3.id
-         ->  Sort
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(14 rows)
+(11 rows)
 
 drop table j1;
 drop table j2;
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index dfa4b036b5..a08a3825ff 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -952,32 +952,30 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 --------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Gather
-         Workers Planned: 2
-         ->  Parallel Append
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml.a
-                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml.a
-                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_5.a
-                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_5.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_2.a
-                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_2.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(27 rows)
+   ->  Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(25 rows)
 
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
  a  | sum  |  array_agg  | count 
@@ -996,34 +994,32 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 -- Without ORDER BY clause, to test Gather at top-most path
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                                QUERY PLAN                                 
----------------------------------------------------------------------------
- Gather
-   Workers Planned: 2
-   ->  Parallel Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(25 rows)
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Append
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml.a
+         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml.a
+               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_2.a
+         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_2.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_5.a
+         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_5.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(23 rows)
 
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -1379,28 +1375,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
 -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
 EXPLAIN (COSTS OFF)
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize GroupAggregate
+   ->  Finalize HashAggregate
          Group Key: pagg_tab_para.y
          Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather Merge
+         ->  Gather
                Workers Planned: 2
-               ->  Sort
-                     Sort Key: pagg_tab_para.y
-                     ->  Parallel Append
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_1.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_2.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+               ->  Parallel Append
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_1.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_2.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
 
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
  y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index bb5b7c47a4..03926a8413 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -466,52 +466,41 @@ EXPLAIN (COSTS OFF)
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
   GROUP BY 1, 2 ORDER BY 1, 2;
-                                                   QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Group
    Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Merge Append
+   ->  Sort
          Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Group
-               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
-                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1.a, prt1.b
-                                 ->  Seq Scan on prt1_p1 prt1
-                           ->  Sort
-                                 Sort Key: p2.a, p2.b
-                                 ->  Seq Scan on prt2_p1 p2
-         ->  Group
-               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_1.a, prt1_1.b
-                                 ->  Seq Scan on prt1_p2 prt1_1
-                           ->  Sort
-                                 Sort Key: p2_1.a, p2_1.b
-                                 ->  Seq Scan on prt2_p2 p2_1
-         ->  Group
-               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_2.a, prt1_2.b
-                                 ->  Seq Scan on prt1_p3 prt1_2
-                           ->  Sort
-                                 Sort Key: p2_2.a, p2_2.b
-                                 ->  Seq Scan on prt2_p3 p2_2
-(43 rows)
+         ->  Append
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_1.a, prt1_1.b
+                           ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Sort
+                           Sort Key: p2_1.a, p2_1.b
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_2.a, prt1_2.b
+                           ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Sort
+                           Sort Key: p2_2.a, p2_2.b
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a))
+                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_3.b, prt1_3.a
+                           ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Sort
+                           Sort Key: p2_3.b, p2_3.a
+                           ->  Seq Scan on prt2_p3 p2_3
+(32 rows)
 
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index dece7310cf..7ac4a9380e 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1303,24 +1303,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2;
-                        QUERY PLAN                        
-----------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: (q2 IS NOT NULL)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: (q2 IS NOT NULL)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: (q2 IS NOT NULL)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: (q2 IS NOT NULL)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
@@ -1339,24 +1337,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2;
-                       QUERY PLAN                       
---------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                    QUERY PLAN                    
+--------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: ((- q1) = q2)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: ((- q1) = q2)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: ((- q1) = q2)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: ((- q1) = q2)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 2f5d0e00f3..c6e0d7ba2b 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1025,6 +1025,105 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index d8768a6b54..1de163e039 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -213,7 +213,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 
-- 
2.25.1

#66

Tomas Vondra

tomas.vondra@enterprisedb.com

almost 4 years ago

In reply to: Andrey V. Lepikhov (#65)

3 attachment(s)

Re: POC: GROUP BY optimization

On 2/10/22 10:00, Andrey V. Lepikhov wrote:

On 1/22/22 01:34, Tomas Vondra wrote:

I rebased (with minor fixes) this patch onto current master.

Also, second patch dedicated to a problem of "varno 0" (fake_var).
I think, this case should make the same estimations as in the case of
varno != 0, but no any stats found. So I suppose to restrict number of
groups with min of a number of incoming tuples and DEFAULT_NUM_DISTINCT
value.

Thanks for the rebase. The two proposed changes (tweaked costing and
simplified fake_var handling) seem fine to me. I think the last thing
that needs to be done is cleanup of the debug GUCs, which I added to
allow easier experimentation with the patch.

I probably won't remove the GUCs entirely, though. I plan to add a
single GUC that would enable/disable this optimization. I'm not a huge
fan of adding more and more GUCs, but in this case it's probably the
right thing to do given the complexity of estimating cost with
correlated columns etc.

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

Attachments:

0002-Use-default-restriction-for-number-of-group-20220315.patchtext/x-patch; charset=UTF-8; name=0002-Use-default-restriction-for-number-of-group-20220315.patchDownload

From cfd3515a0024515546afe89fc9b2595990eaf3b2 Mon Sep 17 00:00:00 2001
From: Andrey Lepikhov <a.lepikhov@postgrespro.ru>
Date: Thu, 10 Feb 2022 13:51:51 +0500
Subject: [PATCH 2/3] Use default restriction for number of groups.

---
 src/backend/optimizer/path/costsize.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 68a32740d7e..b9e975df10c 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1756,8 +1756,8 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 
 /*
  * is_fake_var
- *		Workaround for generate_append_tlist() which generates fake Vars with
- *		varno == 0, that will cause a fail of estimate_num_group() call
+ *		Workaround for generate_append_tlist() which generates fake Vars for the
+ *		case of "varno 0", that will cause a fail of estimate_num_group() call
  *
  * XXX Ummm, why would estimate_num_group fail with this?
  */
@@ -1978,21 +1978,13 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 													  tuplesPerPrevGroup, NULL, NULL,
 													  &cache_varinfos,
 													  list_length(pathkeyExprs) - 1);
-		else if (tuples > 4.0)
+		else
 			/*
-			 * Use geometric mean as estimation if there is no any stats.
-			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
-			 * column while here we try to estimate number of groups over
-			 * set of columns.
-			 *
-			 * XXX Perhaps this should use DEFAULT_NUM_DISTINCT at least to
-			 * limit the calculated values, somehow?
-			 *
-			 * XXX What's the logic of the following formula?
+			 * In case of full uncertainity use default defensive approach. It
+			 * means that any permutations of such vars are equivalent.
+			 * Also, see comments for the cost_incremental_sort routine.
 			 */
-			nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
-		else
-			nGroups = tuples;
+			nGroups = Min(tuplesPerPrevGroup, DEFAULT_NUM_DISTINCT);
 
 		/*
 		 * Presorted keys aren't participated in comparison but still checked
-- 
2.34.1

0003-fixup-tweak-costing-20220315.patchtext/x-patch; charset=UTF-8; name=0003-fixup-tweak-costing-20220315.patchDownload

From 7432a917c30ab5e5f1db9d2569a38e59e4f37c3a Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Mon, 14 Mar 2022 23:28:55 +0100
Subject: [PATCH 3/3] fixup: tweak costing

---
 src/backend/optimizer/path/costsize.c | 42 +++++++++++++--------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b9e975df10c..7224efaba5e 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1872,6 +1872,15 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
  *
  *	N * sum( Fk * log(Gk) )
  *
+ * For bounded heap sort we haven't such a duplicates-related research. So invent
+ * it based on a simple logic (M - number of output tuples):
+ * For one column we can estimate number of comparisons as:
+ * N * log(min{N,M})
+ * For the case of many columns we can natively estimate number of comparisons by
+ * the formula:
+ * sum(max{n_i,M} * log(min{N,M})),
+ * where n_0 == N, n_i - number of tuples per group, estimated on previous step.
+ *
  * Note: We also consider column width, not just the comparator cost.
  *
  * NOTE: some callers currently pass NIL for pathkeys because they
@@ -1881,7 +1890,7 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
 static Cost
 compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 					  Cost comparison_cost, double tuples, double output_tuples,
-					  bool heapSort)
+					  bool bounded_sort)
 {
 	Cost		per_tuple_cost = 0.0;
 	ListCell	*lc;
@@ -1907,7 +1916,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		 * of this function, but I'm not sure. I suggest we introduce some simple
 		 * constants for that, instead of magic values.
 		 */
-		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		output_tuples = (bounded_sort) ? 2.0 * output_tuples : tuples;
 		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
 
 		/* add cost provided by caller */
@@ -1925,8 +1934,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 	{
 		PathKey				*pathkey = (PathKey*) lfirst(lc);
 		EquivalenceMember	*em;
-		double				 nGroups,
-							 correctedNGroups;
+		double				 nGroups;
 
 		/*
 		 * We believe that equivalence members aren't very different, so, to
@@ -1992,24 +2000,16 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		 */
 		if (i >= nPresortedKeys)
 		{
-			if (heapSort)
-			{
-				double heap_tuples;
-
-				/* have to keep at least one group, and a multiple of group size */
-				heap_tuples = Max(ceil(output_tuples / tuplesPerPrevGroup) * tuplesPerPrevGroup,
-								  tuplesPerPrevGroup);
-
-				/* so how many (whole) groups is that? */
-				correctedNGroups = ceil(heap_tuples / tuplesPerPrevGroup);
-			}
+			/*
+			 * Quick sort and 'top-N' sorting (bounded heap sort) algorithms
+			 * have different formulas for time complexity estimation.
+			 */
+			if (bounded_sort)
+				per_tuple_cost += totalFuncCost *
+								  (Max(tuplesPerPrevGroup, output_tuples) / tuples) *
+								  LOG2(2.0 * Min(tuplesPerPrevGroup, output_tuples));
 			else
-				/* all groups in the input */
-				correctedNGroups = nGroups;
-
-			correctedNGroups = Max(1.0, ceil(correctedNGroups));
-
-			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+				per_tuple_cost += totalFuncCost * LOG2(nGroups);
 		}
 
 		i++;
-- 
2.34.1

0001-Optimize-order-of-GROUP-BY-keys-20220315.patchtext/x-patch; charset=UTF-8; name=0001-Optimize-order-of-GROUP-BY-keys-20220315.patchDownload

From dca50a28b0be21decde3628302a2f8546875e884 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Fri, 21 Jan 2022 20:22:14 +0100
Subject: [PATCH 1/3] Optimize order of GROUP BY keys

When evaluating a query with a multi-column GROUP BY clause using sort,
the cost may be heavily dependent on the order in which the keys are
compared when building the groups. Grouping does not imply any ordering,
so we're allowed to compare the keys in arbitrary order, and a Hash Agg
leverages this. But for Group Agg, we simply compared keys in the order
as specified in the query. This commit explores alternative ordering of
the keys, trying to find a cheaper one.

To pick the optimal order of comparisons, multiple parametes need to be
considered, both local and global ones. Intuitively, it's better to
leave more expensive comparions (for complex data types etc.) at the
very end, because maybe one of the earlier comparisons will be enough to
make a decision, and the expensive comparison will be unnecessary. The
other local parameter is cardinality - the higher the cardinality of a
key, the lower the probability we'll need to compare additional keys. We
explore possible orderings by a combination of exhaustive and greedy
approach, and pick the cheapest ordering.

But there are global parameters too, in the sense that we can't fully
evaluate them at the point when creating the sort. For example, there
might be an ORDER BY clause, or another ordering-dependent operation,
higher up in the query plan. The path may be already sorted by (some
of) the keys, in which case we can either use incremental sort or even
skip the sort entirely. So we consider various additional pathkeys in
addition to the "cheapers comparion" order.

The original patch was proposed by Teodor Sigaev, and later reworked and
improved by Dmitry Dolgov. Reviews by a number of people, including me,
Andrey Lepikhov, Claudio Freire and Ibrar Ahmed.

Author: Dmitry Dolgov, Teodor Sigaev
Reviewed-by: Tomas Vondra, Andrey Lepikhov, Claudio Freire, Ibrar Ahmed
Discussion: https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ru
Discussion: https://postgr.es/m/CA%2Bq6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag%40mail.gmail.com
---
 .../postgres_fdw/expected/postgres_fdw.out    |  15 +-
 src/backend/optimizer/path/costsize.c         | 369 +++++++++-
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 580 ++++++++++++++++
 src/backend/optimizer/plan/planner.c          | 653 ++++++++++--------
 src/backend/optimizer/util/pathnode.c         |   2 +-
 src/backend/utils/adt/selfuncs.c              |  37 +-
 src/backend/utils/misc/guc.c                  |  32 +
 src/include/nodes/nodes.h                     |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/cost.h                  |   4 +-
 src/include/optimizer/paths.h                 |  11 +
 src/include/utils/selfuncs.h                  |   5 +
 src/test/regress/expected/aggregates.out      | 244 ++++++-
 src/test/regress/expected/guc.out             |   9 +-
 .../regress/expected/incremental_sort.out     |   2 +-
 src/test/regress/expected/join.out            |  51 +-
 .../regress/expected/partition_aggregate.out  | 136 ++--
 src/test/regress/expected/partition_join.out  |  75 +-
 src/test/regress/expected/union.out           |  60 +-
 src/test/regress/sql/aggregates.sql           |  99 +++
 src/test/regress/sql/incremental_sort.sql     |   2 +-
 22 files changed, 1913 insertions(+), 497 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index f210f911880..2ffc836824a 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2741,16 +2741,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
- Sort
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Foreign Scan
    Output: (count(c2)), c2, 5, 7.0, 9
-   Sort Key: ft1.c2
-   ->  Foreign Scan
-         Output: (count(c2)), c2, 5, 7.0, 9
-         Relations: Aggregate on (public.ft1)
-         Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
-(7 rows)
+   Relations: Aggregate on (public.ft1)
+   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+(4 rows)
 
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
   w  | x | y |  z  
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 8dc7dd4ca26..68a32740d7e 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1754,6 +1754,325 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 									rterm->pathtarget->width);
 }
 
+/*
+ * is_fake_var
+ *		Workaround for generate_append_tlist() which generates fake Vars with
+ *		varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ *		Returns relative complexity of comparing two values based on it's width.
+ * The idea behind - long values have more expensive comparison. Return value is
+ * in cpu_operator_cost unit.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double	width = -1.0; /* fake value */
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresponding relation */
+	if (IsA(expr, Var))
+	{
+		Var		*var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo	*rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int	ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, use estimation by type */
+	if (width < 0.0)
+	{
+		Node	*node = (Node*) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Any value in pgsql is passed by Datum type, so any operation with value
+	 * could not be cheaper than operation with Datum type
+	 */
+	if (width <= sizeof(Datum))
+		return 1.0;
+
+	/*
+	 * Seems, cost of comparision is not directly proportional to args width,
+	 * because comparing args could be differ width (we known only average over
+	 * column) and difference often could be defined only by looking on first
+	 * bytes. So, use log16(width) as estimation.
+	 */
+	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ *		compute CPU cost of sort (i.e. in-memory)
+ *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, the columns may have very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algoritm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from:
+ *
+ * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
+ * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of identical
+ * tuples with value Ki, then the estimate of number of comparisons is:
+ *
+ *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ *
+ * We assume all Xi the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ *	N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparisons on ck is roughly
+ *
+ *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ *	N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column width, not just the comparator cost.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys. In this case, it will fallback to
+ * simple comparison cost estimate.
+ */
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+					  Cost comparison_cost, double tuples, double output_tuples,
+					  bool heapSort)
+{
+	Cost		per_tuple_cost = 0.0;
+	ListCell	*lc;
+	List		*pathkeyExprs = NIL;
+	double		tuplesPerPrevGroup = tuples;
+	double		totalFuncCost = 1.0;
+	bool		has_fake_var = false;
+	int			i = 0;
+	Oid			prev_datatype = InvalidOid;
+	Cost		funcCost = 0.0;
+	List		*cache_varinfos = NIL;
+
+	/* fallback if pathkeys is unknown */
+	if (list_length(pathkeys) == 0)
+	{
+		/*
+		 * If we'll use a bounded heap-sort keeping just K tuples in memory, for
+		 * a total number of tuple comparisons of N log2 K; but the constant
+		 * factor is a bit higher than for quicksort.  Tweak it so that the
+		 * cost curve is continuous at the crossover point.
+		 *
+		 * XXX I suppose the "quicksort factor" references to 1.5 at the end
+		 * of this function, but I'm not sure. I suggest we introduce some simple
+		 * constants for that, instead of magic values.
+		 */
+		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+		/* add cost provided by caller */
+		per_tuple_cost += comparison_cost;
+
+		return per_tuple_cost * tuples;
+	}
+
+	/*
+	 * Computing total cost of sorting takes into account:
+	 * - per column comparison function cost
+	 * - we try to compute needed number of comparison per column
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey				*pathkey = (PathKey*) lfirst(lc);
+		EquivalenceMember	*em;
+		double				 nGroups,
+							 correctedNGroups;
+
+		/*
+		 * We believe that equivalence members aren't very different, so, to
+		 * estimate cost we take just first member
+		 */
+		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+		if (em->em_datatype != InvalidOid)
+		{
+			/* do not lookup funcCost if data type is the same as previous */
+			if (prev_datatype != em->em_datatype)
+			{
+				Oid			sortop;
+				QualCost	cost;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+
+				cost.startup = 0;
+				cost.per_tuple = 0;
+				add_function_cost(root, get_opcode(sortop), NULL, &cost);
+				/* we need procost, not product of procost and cpu_operator_cost */
+				funcCost = cost.per_tuple / cpu_operator_cost;
+				prev_datatype = em->em_datatype;
+			}
+		}
+		else
+			funcCost = 1.0; /* fallback */
+
+		/* Try to take into account actual width fee */
+		funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+		totalFuncCost += funcCost;
+
+		/* Remember if we have a fake var in pathkeys */
+		has_fake_var |= is_fake_var(em->em_expr);
+		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+		/*
+		 * Prevent call estimate_num_groups() with fake Var. Note,
+		 * pathkeyExprs contains only previous columns
+		 */
+		if (has_fake_var == false)
+			/*
+			 * Recursively compute number of groups in a group from previous step
+			 */
+			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+													  tuplesPerPrevGroup, NULL, NULL,
+													  &cache_varinfos,
+													  list_length(pathkeyExprs) - 1);
+		else if (tuples > 4.0)
+			/*
+			 * Use geometric mean as estimation if there is no any stats.
+			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
+			 * column while here we try to estimate number of groups over
+			 * set of columns.
+			 *
+			 * XXX Perhaps this should use DEFAULT_NUM_DISTINCT at least to
+			 * limit the calculated values, somehow?
+			 *
+			 * XXX What's the logic of the following formula?
+			 */
+			nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
+		else
+			nGroups = tuples;
+
+		/*
+		 * Presorted keys aren't participated in comparison but still checked
+		 * by qsort comparator.
+		 */
+		if (i >= nPresortedKeys)
+		{
+			if (heapSort)
+			{
+				double heap_tuples;
+
+				/* have to keep at least one group, and a multiple of group size */
+				heap_tuples = Max(ceil(output_tuples / tuplesPerPrevGroup) * tuplesPerPrevGroup,
+								  tuplesPerPrevGroup);
+
+				/* so how many (whole) groups is that? */
+				correctedNGroups = ceil(heap_tuples / tuplesPerPrevGroup);
+			}
+			else
+				/* all groups in the input */
+				correctedNGroups = nGroups;
+
+			correctedNGroups = Max(1.0, ceil(correctedNGroups));
+
+			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+		}
+
+		i++;
+
+		/*
+		 * Real-world distribution isn't uniform but now we don't have a way to
+		 * determine that, so, add multiplier to get closer to worst case.
+		 * But ensure the number of tuples does not exceed the group size in the
+		 * preceding step.
+		 */
+		tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
+								 ceil(1.5 * tuplesPerPrevGroup / nGroups));
+
+		/*
+		 * We could skip all following columns for cost estimation, because we
+		 * believe that tuples are unique by the set of previous columns
+		 */
+		if (tuplesPerPrevGroup <= 1.0)
+			break;
+	}
+
+	list_free(pathkeyExprs);
+
+	/* per_tuple_cost is in cpu_operator_cost units */
+	per_tuple_cost *= cpu_operator_cost;
+
+	/*
+	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E.
+	 * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation
+	 * formula has additional term proportional to number of tuples (See Chapter
+	 * 8.2 and Theorem 4.1). It has meaning with low number of tuples,
+	 * approximately less that 1e4. Of course, it could be implemented as
+	 * additional multiplier under logarithm, but use more complicated formula
+	 * which takes into account number of unique tuples and it isn't clear how
+	 * to combine multiplier with groups. Estimate it as 10 in cpu_operator_cost
+	 * unit.
+	 */
+	per_tuple_cost += 10 * cpu_operator_cost;
+
+	per_tuple_cost += comparison_cost;
+
+	return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+				   double tuples)
+{
+	return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+								0, tuples, tuples, false);
+}
+
 /*
  * cost_tuplesort
  *	  Determines and returns the cost of sorting a relation using tuplesort,
@@ -1770,7 +2089,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * number of initial runs formed and M is the merge order used by tuplesort.c.
  * Since the average initial run should be about sort_mem, we have
  *		disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *		cpu = comparison_cost * t * log2(t)
+ * 		and cpu cost (computed by compute_cpu_sort_cost()).
  *
  * If the sort is bounded (i.e., only the first k result tuples are needed)
  * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1789,9 +2108,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * 'comparison_cost' is the extra cost per comparison, if any
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later.
  */
 static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
 			   double limit_tuples)
@@ -1808,9 +2129,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
-
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
 	{
@@ -1834,12 +2152,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 		double		log_runs;
 		double		npageaccesses;
 
-		/*
-		 * CPU costs
-		 *
-		 * Assume about N log2 N comparisons
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		/* CPU costs */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 
 		/* Disk costs */
 
@@ -1855,18 +2171,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	}
 	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
 	{
-		/*
-		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
-		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		/* We'll use a bounded heap-sort keeping just K tuples in memory. */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  output_tuples, true);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 	}
 
 	/*
@@ -1899,8 +2214,8 @@ cost_incremental_sort(Path *path,
 					  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 					  double limit_tuples)
 {
-	Cost		startup_cost = 0,
-				run_cost = 0,
+	Cost		startup_cost,
+				run_cost,
 				input_run_cost = input_total_cost - input_startup_cost;
 	double		group_tuples,
 				input_groups;
@@ -1985,7 +2300,7 @@ cost_incremental_sort(Path *path,
 	 * pessimistic about incremental sort performance and increase its average
 	 * group size by half.
 	 */
-	cost_tuplesort(&group_startup_cost, &group_run_cost,
+	cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
 				   1.5 * group_tuples, width, comparison_cost, sort_mem,
 				   limit_tuples);
 
@@ -1993,7 +2308,7 @@ cost_incremental_sort(Path *path,
 	 * Startup cost of incremental sort is the startup cost of its first group
 	 * plus the cost of its input.
 	 */
-	startup_cost += group_startup_cost
+	startup_cost = group_startup_cost
 		+ input_startup_cost + group_input_run_cost;
 
 	/*
@@ -2002,7 +2317,7 @@ cost_incremental_sort(Path *path,
 	 * group, plus the total cost to process the remaining groups, plus the
 	 * remaining cost of input.
 	 */
-	run_cost += group_run_cost
+	run_cost = group_run_cost
 		+ (group_run_cost + group_startup_cost) * (input_groups - 1)
 		+ group_input_run_cost * (input_groups - 1);
 
@@ -2042,7 +2357,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	Cost		startup_cost;
 	Cost		run_cost;
 
-	cost_tuplesort(&startup_cost, &run_cost,
+	cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
 				   limit_tuples);
@@ -2140,7 +2455,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2208,7 +2523,7 @@ cost_append(AppendPath *apath)
 					 * any child.
 					 */
 					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
+							  root,
 							  pathkeys,
 							  subpath->total_cost,
 							  subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 8c6770de972..5487ae2ee4c 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -681,7 +681,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 86a35cdef17..f18d4c8e4e7 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,16 +17,19 @@
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
 #include "access/stratnum.h"
 #include "catalog/pg_opfamily.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -334,6 +337,530 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/************************<DEBUG PART>*************************************/
+bool debug_group_by_reorder_by_pathkeys = true;
+bool debug_group_by_match_order_by = true;
+bool debug_cheapest_group_by = true;
+/************************</DEBUG PART>************************************/
+
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List	   *new_group_pathkeys= NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (debug_group_by_reorder_by_pathkeys == false)
+		return 0;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append if to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find first pathkey without a matching GROUP BY key, the rest of
+	 * the pathkeys is useless and can't be used to evaluate the grouping, so
+	 * we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	*sgc;
+
+		/* abort on first mismatch */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* XXX maybe return when (n == 0) */
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * Used to generate all permutations of a pathkey list.
+ */
+typedef struct PathkeyMutatorState {
+	List	   *elemsList;
+	ListCell  **elemCells;
+	void	  **elems;
+	int		   *positions;
+	int			 mutatorNColumns;
+	int			 count;
+} PathkeyMutatorState;
+
+
+/*
+ * PathkeyMutatorInit
+ *		Initialize state of the permutation generator.
+ *
+ * We want to generate permutations of elements in the "elems" list. We may want
+ * to skip some number of elements at the beginning (when treating as presorted)
+ * or at the end (we only permute a limited number of group keys).
+ *
+ * The list is decomposed into elements, and we also keep pointers to individual
+ * cells. This allows us to build the permuted list quickly and cheaply, without
+ * creating any copies.
+ */
+static void
+PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
+{
+	int i;
+	int			n = end - start;
+	ListCell	*lc;
+
+	memset(state, 0, sizeof(*state));
+
+	state->mutatorNColumns = n;
+
+	state->elemsList = list_copy(elems);
+
+	state->elems = palloc(sizeof(void*) * n);
+	state->elemCells = palloc(sizeof(ListCell*) * n);
+	state->positions = palloc(sizeof(int) * n);
+
+	i = 0;
+	for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+	{
+		state->elemCells[i] = lc;
+		state->elems[i] = lfirst(lc);
+		state->positions[i] = i + 1;
+		i++;
+
+		if (i >= n)
+			break;
+	}
+}
+
+/* Swap two elements of an array. */
+static void
+PathkeyMutatorSwap(int *a, int i, int j)
+{
+  int s = a[i];
+
+  a[i] = a[j];
+  a[j] = s;
+}
+
+/*
+ * Generate the next permutation of elements.
+ */
+static bool
+PathkeyMutatorNextSet(int *a, int n)
+{
+	int j, k, l, r;
+
+	j = n - 2;
+
+	while (j >= 0 && a[j] >= a[j + 1])
+		j--;
+
+	if (j < 0)
+		return false;
+
+	k = n - 1;
+
+	while (k >= 0 && a[j] >= a[k])
+		k--;
+
+	PathkeyMutatorSwap(a, j, k);
+
+	l = j + 1;
+	r = n - 1;
+
+	while (l < r)
+		PathkeyMutatorSwap(a, l++, r--);
+
+	return true;
+}
+
+/*
+ * PathkeyMutatorNext
+ *		Generate the next permutation of list of elements.
+ *
+ * Returns the next permutation (as a list of elements) or NIL if there are no
+ * more permutations.
+ */
+static List *
+PathkeyMutatorNext(PathkeyMutatorState *state)
+{
+	int	i;
+
+	state->count++;
+
+	/* first permutation is original list */
+	if (state->count == 1)
+		return state->elemsList;
+
+	/* when there are no more permutations, return NIL */
+	if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
+	{
+		pfree(state->elems);
+		pfree(state->elemCells);
+		pfree(state->positions);
+
+		list_free(state->elemsList);
+
+		return NIL;
+	}
+
+	/* update the list cells to point to the right elements */
+	for(i=0; i<state->mutatorNColumns; i++)
+		lfirst(state->elemCells[i]) =
+			(void *) state->elems[ state->positions[i] - 1 ];
+
+	return state->elemsList;
+}
+
+/*
+ * Cost of comparing pathkeys.
+ */
+typedef struct PathkeySortCost
+{
+	Cost		cost;
+	PathKey	   *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+	const PathkeySortCost *a = (PathkeySortCost *) _a;
+	const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+	if (a->cost < b->cost)
+		return -1;
+	else if (a->cost == b->cost)
+		return 0;
+	return 1;
+}
+
+/*
+ * get_cheapest_group_keys_order
+ *		Returns the pathkeys in an order cheapest to evaluate.
+ *
+ * Given a list of pathkeys, we try to reorder them in a way that minimizes
+ * the CPU cost of sorting. This depends mainly on the cost of comparator
+ * function (the pathkeys may use different data types) and the number of
+ * distinct values in each column (which affects the number of comparator
+ * calls for the following pathkeys).
+ *
+ * In case the input is partially sorted, only the remaining pathkeys are
+ * considered.
+ *
+ * Returns newly allocated lists. If no reordering is possible (or needed),
+ * the lists are set to NIL.
+ */
+static bool
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	List		   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL,
+				   *var_group_pathkeys;
+
+	ListCell	   *cell;
+	PathkeyMutatorState	mstate;
+	double			cheapest_sort_cost = -1.0;
+
+	int nFreeKeys;
+	int nToPermute;
+
+	/* If this optimization is disabled, we're done. */
+	if (!debug_cheapest_group_by)
+		return false;
+
+	/* If there are less than 2 unsorted pathkeys, we're done. */
+	if (list_length(*group_pathkeys) - n_preordered < 2)
+		return false;
+
+	/*
+	 * We could exhaustively cost all possible orderings of the pathkeys, but for
+	 * large number of pathkeys that might be prohibitively expensive. So we try
+	 * to apply a simple cheap heuristics first - we sort the pathkeys by sort
+	 * cost (as if the pathkey was sorted independently) and then check only the
+	 * four cheapest pathkeys. The remaining pathkeys are kept ordered by cost.
+	 *
+	 * XXX This is a very simple heuristics, and likely to work fine for most
+	 * cases (because number of GROUP BY clauses tends to be lower than 4). But
+	 * it ignores how the number of distinct values in each pathkey affects the
+	 * following sorts. It may be better to use "more expensive" pathkey first
+	 * if it has many distinct values, because it then limits the number of
+	 * comparisons for the remaining pathkeys. But evaluating that is kinda the
+	 * expensive bit we're trying to not do.
+	 */
+	nFreeKeys = list_length(*group_pathkeys) - n_preordered;
+	nToPermute = 4;
+	if (nFreeKeys > nToPermute)
+	{
+		int i;
+		PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
+
+		/* skip the pre-ordered pathkeys */
+		cell = list_nth_cell(*group_pathkeys, n_preordered);
+
+		/* estimate cost for sorting individual pathkeys */
+		for (i = 0; cell != NULL; i++, (cell = lnext(*group_pathkeys, cell)))
+		{
+			List *to_cost = list_make1(lfirst(cell));
+
+			Assert(i < nFreeKeys);
+
+			costs[i].pathkey = lfirst(cell);
+			costs[i].cost = cost_sort_estimate(root, to_cost, 0, nrows);
+
+			pfree(to_cost);
+		}
+
+		/* sort the pathkeys by sort cost in ascending order */
+		qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+		/*
+		 * Rebuild the list of pathkeys - first the preordered ones, then the
+		 * rest ordered by cost.
+		 */
+		new_group_pathkeys = list_truncate(list_copy(*group_pathkeys), n_preordered);
+
+		for (i = 0; i < nFreeKeys; i++)
+			new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+		pfree(costs);
+	}
+	else
+	{
+		/*
+		 * Since v13 list_free() can clean list elements so for original list
+		 * not to be modified it should be copied to a new one which can then
+		 * be cleaned safely if needed.
+		 */
+		new_group_pathkeys = list_copy(*group_pathkeys);
+		nToPermute = nFreeKeys;
+	}
+
+	Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
+
+	/*
+	 * Generate pathkey lists with permutations of the first nToPermute pathkeys.
+	 *
+	 * XXX We simply calculate sort cost for each individual pathkey list, but
+	 * there's room for two dynamic programming optimizations here. Firstly, we
+	 * may pass the current "best" cost to cost_sort_estimate so that it can
+	 * "abort" if the estimated pathkeys list exceeds it. Secondly, it could pass
+	 * return information about the position when it exceeded the cost, and we
+	 * could skip all permutations with the same prefix.
+	 *
+	 * Imagine we've already found ordering with cost C1, and we're evaluating
+	 * another ordering - cost_sort_estimate() calculates cost by adding the
+	 * pathkeys one by one (more or less), and the cost only grows. If at any
+	 * point it exceeds C1, it can't possibly be "better" so we can discard it.
+	 * But we also know that we can discard all ordering with the same prefix,
+	 * because if we're estimating (a,b,c,d) and we exceeded C1 at (a,b) then
+	 * the same thing will happen for any ordering with this prefix.
+	 *
+	 *
+	 */
+	PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
+
+	while((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
+	{
+		Cost	cost;
+
+		cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+		if (cost < cheapest_sort_cost || cheapest_sort_cost < 0)
+		{
+			list_free(new_group_pathkeys);
+			new_group_pathkeys = list_copy(var_group_pathkeys);
+			cheapest_sort_cost = cost;
+		}
+	}
+
+	/* Reorder the group clauses according to the reordered pathkeys. */
+	foreach(cell, new_group_pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		new_group_clauses = lappend(new_group_clauses,
+						get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+												*group_clauses));
+	}
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+											   *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+
+	return true;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each items stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to minimize the sort cost
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reoredered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
+ *   the tail reoredered to minimize the sort cost
+ *
+ * There are other potentially interesting orderings (e.g. it might be best to
+ * match the first ORDER BY key, order the remaining keys differently and then
+ * rely on incremental sort to fix this), but we ignore those for now. To make
+ * this work we'd have to pretty much generate all possible permutations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys,
+								List *group_pathkeys, List *group_clauses)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+	int			n_preordered = 0;
+
+	List *pathkeys = group_pathkeys;
+	List *clauses = group_clauses;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+
+	infos = lappend(infos, info);
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost, ignoring both the
+	 * target ordering (ORDER BY) and ordering of the input path.
+	 */
+	if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered))
+	{
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to match
+	 * as much of the ordering as possible - we get this sort for free (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefiting from incremental sort
+	 * as much as possible.
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (path_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/* reorder the tail to minimize sort cost */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause, but only if set debug_group_by_match_order_by).
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (root->sort_pathkeys && debug_group_by_match_order_by)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to sort_pathkeys.
+		 */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1862,6 +2389,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the odering. The ordering
+ * may not be "complete" and may require incremental sort, but that's fine. So
+ * we simply count prefix pathkeys with a matching group key, and stop once we
+ * find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordeding not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1876,6 +2451,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1911,6 +2489,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index bd09f85aea1..fac5822e5b5 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6156,24 +6156,124 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
 
-			if (path == cheapest_path || is_sorted)
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest-total path if it isn't already sorted */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_path || is_sorted)
+				{
+					/* Sort the cheapest-total path if it isn't already sorted */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					/* Now decide what to stick atop it */
+					if (parse->groupingSets)
+					{
+						consider_groupingsets_paths(root, grouped_rel,
+													path, true, can_hash,
+													gd, agg_costs, dNumGroups);
+					}
+					else if (parse->hasAggs)
+					{
+						/*
+						 * We have aggregation, possibly with plain GROUP BY. Make
+						 * an AggPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_SIMPLE,
+												 info->clauses,
+												 havingQual,
+												 agg_costs,
+												 dNumGroups));
+					}
+					else if (group_clauses)
+					{
+						/*
+						 * We have GROUP BY without aggregation or grouping sets.
+						 * Make a GroupPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+					}
+					else
+					{
+						/* Other cases should have been handled above */
+						Assert(false);
+					}
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, no point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6185,17 +6285,17 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->hasAggs)
 				{
 					/*
-					 * We have aggregation, possibly with plain GROUP BY. Make
-					 * an AggPath.
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 info->clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6203,14 +6303,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->groupClause)
 				{
 					/*
-					 * We have GROUP BY without aggregation or grouping sets.
-					 * Make a GroupPath.
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6220,79 +6320,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					Assert(false);
 				}
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, no point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_agg_path(root,
-										 grouped_rel,
-										 path,
-										 grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_SIMPLE,
-										 parse->groupClause,
-										 havingQual,
-										 agg_costs,
-										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_group_path(root,
-										   grouped_rel,
-										   path,
-										   parse->groupClause,
-										   havingQual,
-										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
-			}
 		}
 
 		/*
@@ -6303,100 +6330,125 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
 				Path	   *path_original = path;
-				bool		is_sorted;
-				int			presorted_keys;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
+				List	   *pathkey_orderings = NIL;
 
-				/*
-				 * Insert a Sort node, if required.  But there's no point in
-				 * sorting anything but the cheapest path.
-				 */
-				if (!is_sorted)
+				List	   *group_pathkeys = root->group_pathkeys;
+				List	   *group_clauses = parse->groupClause;
+
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys,
+																	group_pathkeys,
+																	group_clauses);
+
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach (lc2, pathkey_orderings)
 				{
-					if (path != partially_grouped_rel->cheapest_total_path)
-						continue;
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				}
+					bool		is_sorted;
+					int			presorted_keys = 0;
+					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* restore the path (we replace it in the loop) */
+					path = path_original;
 
-				/*
-				 * Now we may consider incremental sort on this path, but only
-				 * when the path is not already sorted and when incremental
-				 * sort is enabled.
-				 */
-				if (is_sorted || !enable_incremental_sort)
-					continue;
+					is_sorted = pathkeys_count_contained_in(info->pathkeys,
+															path->pathkeys,
+															&presorted_keys);
 
-				/* Restore the input path (we might have added Sort on top). */
-				path = path_original;
+					/*
+					 * Insert a Sort node, if required.  But there's no point in
+					 * sorting anything but the cheapest path.
+					 */
+					if (!is_sorted)
+					{
+						if (path != partially_grouped_rel->cheapest_total_path)
+							continue;
 
-				/* no shared prefix, not point in building incremental sort */
-				if (presorted_keys == 0)
-					continue;
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
 
-				/*
-				 * We should have already excluded pathkeys of length 1
-				 * because then presorted_keys > 0 would imply is_sorted was
-				 * true.
-				 */
-				Assert(list_length(root->group_pathkeys) != 1);
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
 
-				path = (Path *) create_incremental_sort_path(root,
-															 grouped_rel,
-															 path,
-															 root->group_pathkeys,
-															 presorted_keys,
-															 -1.0);
+					/*
+					 * Now we may consider incremental sort on this path, but only
+					 * when the path is not already sorted and when incremental
+					 * sort is enabled.
+					 */
+					if (is_sorted || !enable_incremental_sort)
+						continue;
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* Restore the input path (we might have added Sort on top). */
+					path = path_original;
+
+					/* no shared prefix, not point in building incremental sort */
+					if (presorted_keys == 0)
+						continue;
+
+					/*
+					 * We should have already excluded pathkeys of length 1
+					 * because then presorted_keys > 0 would imply is_sorted was
+					 * true.
+					 */
+					Assert(list_length(root->group_pathkeys) != 1);
+
+					path = (Path *) create_incremental_sort_path(root,
+																 grouped_rel,
+																 path,
+																 info->pathkeys,
+																 presorted_keys,
+																 -1.0);
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+				}
 			}
 		}
 	}
@@ -6599,41 +6651,71 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
+			Path	   *path_save = path;
+
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
-			if (path == cheapest_total_path || is_sorted)
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(partially_grouped_rel, (Path *)
-							 create_agg_path(root,
-											 partially_grouped_rel,
-											 path,
-											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
-											 NIL,
-											 agg_partial_costs,
-											 dNumPartialGroups));
-				else
-					add_path(partially_grouped_rel, (Path *)
-							 create_group_path(root,
-											   partially_grouped_rel,
-											   path,
-											   parse->groupClause,
-											   NIL,
-											   dNumPartialGroups));
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_total_path || is_sorted)
+				{
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_path(partially_grouped_rel, (Path *)
+								 create_agg_path(root,
+												 partially_grouped_rel,
+												 path,
+												 partially_grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_INITIAL_SERIAL,
+												 info->clauses,
+												 NIL,
+												 agg_partial_costs,
+												 dNumPartialGroups));
+					else
+						add_path(partially_grouped_rel, (Path *)
+								 create_group_path(root,
+												   partially_grouped_rel,
+												   path,
+												   info->clauses,
+												   NIL,
+												   dNumPartialGroups));
+				}
 			}
 		}
 
@@ -6643,6 +6725,8 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 * We can also skip the entire loop when we only have a single-item
 		 * group_pathkeys because then we can't possibly have a presorted
 		 * prefix of the list without having the list be fully sorted.
+		 *
+		 * XXX Shouldn't this also consider the group-key-reordering?
 		 */
 		if (enable_incremental_sort && list_length(root->group_pathkeys) > 1)
 		{
@@ -6701,24 +6785,100 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
 
-			if (path == cheapest_partial_path || is_sorted)
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_original;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_partial_path || is_sorted)
+				{
+
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_agg_path(root,
+														 partially_grouped_rel,
+														 path,
+														 partially_grouped_rel->reltarget,
+														 info->clauses ? AGG_SORTED : AGG_PLAIN,
+														 AGGSPLIT_INITIAL_SERIAL,
+														 info->clauses,
+														 NIL,
+														 agg_partial_costs,
+														 dNumPartialPartialGroups));
+					else
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_group_path(root,
+														   partially_grouped_rel,
+														   path,
+														   info->clauses,
+														   NIL,
+														   dNumPartialPartialGroups));
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, not point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 partially_grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
@@ -6726,9 +6886,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 info->clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 info->clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6737,59 +6897,10 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   info->clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, not point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 partially_grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_agg_path(root,
-												 partially_grouped_rel,
-												 path,
-												 partially_grouped_rel->reltarget,
-												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-												 AGGSPLIT_INITIAL_SERIAL,
-												 parse->groupClause,
-												 NIL,
-												 agg_partial_costs,
-												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_group_path(root,
-												   partially_grouped_rel,
-												   path,
-												   parse->groupClause,
-												   NIL,
-												   dNumPartialPartialGroups));
 		}
 	}
 
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 5c32c96b71c..cd7c671a706 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1342,7 +1342,7 @@ create_append_path(PlannerInfo *root,
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 1fbb0b28c3b..36c303e45c5 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3294,7 +3294,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 }
 
 /*
- * estimate_num_groups		- Estimate number of groups in a grouped query
+ * estimate_num_groups/estimate_num_groups_incremental
+ *		- Estimate number of groups in a grouped query.
+ *		  _incremental variant is performance optimization for
+ *		  case of adding one-by-one column
  *
  * Given a query having a GROUP BY clause, estimate how many groups there
  * will be --- ie, the number of distinct combinations of the GROUP BY
@@ -3368,11 +3371,22 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset, EstimationInfo *estinfo)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, estinfo,
+										   NULL, 0);
+}
+
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows,
+					List **pgset, EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i, j;
 
 	/* Zero the estinfo output parameter, if non-NULL */
 	if (estinfo != NULL)
@@ -3403,7 +3417,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3412,6 +3426,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++; /* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3481,7 +3503,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3498,6 +3524,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e7f0a380e60..935d5bd9f6d 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2139,6 +2139,38 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+/************************<DEBUG OPT GROUP BY>*********************************/
+	{
+		{"debug_group_by_reorder_by_pathkeys", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reorder GROUP BY by pathkeys"),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&debug_group_by_reorder_by_pathkeys,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_group_by_match_order_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable matching GROUP BY by ORDER BY."),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&debug_group_by_match_order_by,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("find a cheapest order of columns in GROUP BY."),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&debug_cheapest_group_by,
+		true,
+		NULL, NULL, NULL
+	},
+/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 5d075f0c346..34b9fbcf69e 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -266,6 +266,7 @@ typedef enum NodeTag
 	T_EquivalenceClass,
 	T_EquivalenceMember,
 	T_PathKey,
+	T_PathKeyInfo,
 	T_PathTarget,
 	T_RestrictInfo,
 	T_IndexClause,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 1f3845b3fec..83582301908 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1070,6 +1070,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 356a51f370a..0b083a27b3c 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -112,7 +112,9 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *path);
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+							   int nPresortedKeys, double tuples);
+extern void cost_append(AppendPath *path, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 0c3a0b90c85..525b594cbd0 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -203,6 +203,17 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
+/************************<DEBUG OPT GROUP BY>*********************************/
+extern bool debug_group_by_reorder_by_pathkeys;
+extern bool debug_group_by_match_order_by;
+extern bool debug_cheapest_group_by;
+/************************</DEBUG OPT GROUP BY>********************************/
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys,
+											 List *pathkeys, List *clauses);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 8f3d73edfb2..c313a08d541 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -214,6 +214,11 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset,
 								  EstimationInfo *estinfo);
 
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows, List **pgset,
+					EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs);
+
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
 									   Selectivity *mcv_freq,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 0a23a39aa29..601047fa3dd 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1210,7 +1210,8 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+   Group Key: $0, $1
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1233,10 +1234,8 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+   ->  Result
+(25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -2448,6 +2447,241 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, d, c, v
+   ->  Sort
+         Sort Key: p, d, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Incremental Sort
+         Sort Key: p, c, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Incremental Sort
+         Sort Key: p, v, c
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, d, v
+   ->  Incremental Sort
+         Sort Key: p, c, d, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Incremental Sort
+         Sort Key: p, v, c, d
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/guc.out b/src/test/regress/expected/guc.out
index 3de6404ba5b..98a16c118f2 100644
--- a/src/test/regress/expected/guc.out
+++ b/src/test/regress/expected/guc.out
@@ -858,10 +858,13 @@ SELECT name FROM tab_settings_flags
 SELECT name FROM tab_settings_flags
   WHERE category ~ '^Query Tuning' AND NOT explain
   ORDER BY 1;
-           name            
----------------------------
+                 name                 
+--------------------------------------
+ debug_enable_cheapest_group_by
+ debug_enable_group_by_match_order_by
+ debug_group_by_reorder_by_pathkeys
  default_statistics_target
-(1 row)
+(4 rows)
 
 -- Runtime-computed GUCs should be part of the preset category.
 SELECT name FROM tab_settings_flags
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 545e301e482..21c429226f7 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1439,7 +1439,7 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 19caebabd01..bf1a2db2cf0 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1984,8 +1984,8 @@ USING (name);
 ------+----+----
  bb   | 12 | 13
  cc   | 22 | 23
- dd   |    | 33
  ee   | 42 |   
+ dd   |    | 33
 (4 rows)
 
 -- Cases with non-nullable expressions in subquery results;
@@ -2019,8 +2019,8 @@ NATURAL FULL JOIN
 ------+------+------+------+------
  bb   |   12 |    2 |   13 |    3
  cc   |   22 |    2 |   23 |    3
- dd   |      |      |   33 |    3
  ee   |   42 |    2 |      |     
+ dd   |      |      |   33 |    3
 (4 rows)
 
 SELECT * FROM
@@ -4618,18 +4618,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
@@ -6336,44 +6338,39 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Unique
+   ->  HashAggregate
          Output: j3.id
-         ->  Sort
+         Group Key: j3.id
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(13 rows)
+(11 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Group
+   ->  HashAggregate
          Output: j3.id
          Group Key: j3.id
-         ->  Sort
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(14 rows)
+(11 rows)
 
 drop table j1;
 drop table j2;
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index dfa4b036b52..a08a3825ff6 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -952,32 +952,30 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 --------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Gather
-         Workers Planned: 2
-         ->  Parallel Append
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml.a
-                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml.a
-                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_5.a
-                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_5.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_2.a
-                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_2.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(27 rows)
+   ->  Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(25 rows)
 
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
  a  | sum  |  array_agg  | count 
@@ -996,34 +994,32 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 -- Without ORDER BY clause, to test Gather at top-most path
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                                QUERY PLAN                                 
----------------------------------------------------------------------------
- Gather
-   Workers Planned: 2
-   ->  Parallel Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(25 rows)
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Append
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml.a
+         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml.a
+               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_2.a
+         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_2.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_5.a
+         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_5.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(23 rows)
 
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -1379,28 +1375,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
 -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
 EXPLAIN (COSTS OFF)
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize GroupAggregate
+   ->  Finalize HashAggregate
          Group Key: pagg_tab_para.y
          Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather Merge
+         ->  Gather
                Workers Planned: 2
-               ->  Sort
-                     Sort Key: pagg_tab_para.y
-                     ->  Parallel Append
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_1.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_2.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+               ->  Parallel Append
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_1.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_2.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
 
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
  y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index bb5b7c47a45..03926a84138 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -466,52 +466,41 @@ EXPLAIN (COSTS OFF)
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
   GROUP BY 1, 2 ORDER BY 1, 2;
-                                                   QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Group
    Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Merge Append
+   ->  Sort
          Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Group
-               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
-                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1.a, prt1.b
-                                 ->  Seq Scan on prt1_p1 prt1
-                           ->  Sort
-                                 Sort Key: p2.a, p2.b
-                                 ->  Seq Scan on prt2_p1 p2
-         ->  Group
-               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_1.a, prt1_1.b
-                                 ->  Seq Scan on prt1_p2 prt1_1
-                           ->  Sort
-                                 Sort Key: p2_1.a, p2_1.b
-                                 ->  Seq Scan on prt2_p2 p2_1
-         ->  Group
-               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_2.a, prt1_2.b
-                                 ->  Seq Scan on prt1_p3 prt1_2
-                           ->  Sort
-                                 Sort Key: p2_2.a, p2_2.b
-                                 ->  Seq Scan on prt2_p3 p2_2
-(43 rows)
+         ->  Append
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_1.a, prt1_1.b
+                           ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Sort
+                           Sort Key: p2_1.a, p2_1.b
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_2.a, prt1_2.b
+                           ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Sort
+                           Sort Key: p2_2.a, p2_2.b
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a))
+                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_3.b, prt1_3.a
+                           ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Sort
+                           Sort Key: p2_3.b, p2_3.a
+                           ->  Seq Scan on prt2_p3 p2_3
+(32 rows)
 
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index dece7310cfe..7ac4a9380e2 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1303,24 +1303,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2;
-                        QUERY PLAN                        
-----------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: (q2 IS NOT NULL)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: (q2 IS NOT NULL)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: (q2 IS NOT NULL)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: (q2 IS NOT NULL)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
@@ -1339,24 +1337,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2;
-                       QUERY PLAN                       
---------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                    QUERY PLAN                    
+--------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: ((- q1) = q2)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: ((- q1) = q2)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: ((- q1) = q2)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: ((- q1) = q2)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 2f5d0e00f3d..c6e0d7ba2b7 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1025,6 +1025,105 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index d8768a6b54d..1de163e0395 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -213,7 +213,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 
-- 
2.34.1

#67

Andrey V. Lepikhov

a.lepikhov@postgrespro.ru

almost 4 years ago

In reply to: Tomas Vondra (#66)

Re: POC: GROUP BY optimization

On 3/15/22 13:26, Tomas Vondra wrote:

Thanks for the rebase. The two proposed changes (tweaked costing and
simplified fake_var handling) seem fine to me. I think the last thing
that needs to be done is cleanup of the debug GUCs, which I added to
allow easier experimentation with the patch.

Thanks, I'm waiting for the last step.

I probably won't remove the GUCs entirely, though. I plan to add a
single GUC that would enable/disable this optimization. I'm not a huge
fan of adding more and more GUCs, but in this case it's probably the
right thing to do given the complexity of estimating cost with
correlated columns etc.

Agree. Because it is a kind of automation we should allow user to switch
it off in the case of problems or manual tuning.

Also, I looked through this patch. It has some minor problems:
1. Multiple typos in the patch comment.
2. The term 'cardinality of a key' - may be replace with 'number of
duplicates'?

--
regards,
Andrey Lepikhov
Postgres Professional

#68

Tomas Vondra

tomas.vondra@enterprisedb.com

almost 4 years ago

In reply to: Andrey V. Lepikhov (#67)

7 attachment(s)

Re: POC: GROUP BY optimization

Hi,

Here's a rebased/improved version of the patch, with smaller parts
addressing various issues. There are seven parts:

0001 - main part, just rebased

0002 - replace the debug GUC options with a single GUC to disable the
optimization if needed

0003 - minor code cleanup, removal of unnecessary variable

0004 - various comment fixes (rewordings, typos, ...)

0005 - a minor code simplification, addressing FIXMEs from 0004

0006 - adds the new GUC to the docs

0007 - demonstrates plan changes with a disabled optimization

The first 6 parts should be squashed and committed at one, I only kept
them separate for clarity. The 0007 is merely a demonstration of the new
GUC and that it disables the optimization.

Agree. Because it is a kind of automation we should allow user to switch
it off in the case of problems or manual tuning.

Also, I looked through this patch. It has some minor problems:

1. Multiple typos in the patch comment.

I went through the comments and checked all of them for grammar mistakes
and typos using a word processor, so hopefully that should be OK. But
maybe there's still something wrong.

2. The term 'cardinality of a key' - may be replace with 'number of
duplicates'?

No, cardinality means "number of distinct values", so "duplicates" would
be wrong. And I think "cardinality" is well established term, so I think
it's fine.

BTW I named the GUC enable_group_by_reordering, I wonder if it should be
named differently, e.g. enable_groupby_reordering? Opinions?

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

Attachments:

0001-Optimize-order-of-GROUP-BY-keys-20220328.patchtext/x-patch; charset=UTF-8; name=0001-Optimize-order-of-GROUP-BY-keys-20220328.patchDownload

From 61369b499770918edb929d0ca837a123bea3fcdb Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Fri, 21 Jan 2022 20:22:14 +0100
Subject: [PATCH 1/7] Optimize order of GROUP BY keys

When evaluating a query with a multi-column GROUP BY clause using sort,
the cost may be heavily dependent on the order in which the keys are
compared when building the groups. Grouping does not imply any ordering,
so we're allowed to compare the keys in arbitrary order, and a Hash Agg
leverages this. But for Group Agg, we simply compared keys in the order
as specified in the query. This commit explores alternative ordering of
the keys, trying to find a cheaper one.

To pick the optimal order of comparisons, multiple parametes need to be
considered, both local and global ones. Intuitively, it's better to
leave more expensive comparions (for complex data types etc.) at the
very end, because maybe one of the earlier comparisons will be enough to
make a decision, and the expensive comparison will be unnecessary. The
other local parameter is cardinality - the higher the cardinality of a
key, the lower the probability we'll need to compare additional keys. We
explore possible orderings by a combination of exhaustive and greedy
approach, and pick the cheapest ordering.

But there are global parameters too, in the sense that we can't fully
evaluate them at the point when creating the sort. For example, there
might be an ORDER BY clause, or another ordering-dependent operation,
higher up in the query plan. The path may be already sorted by (some
of) the keys, in which case we can either use incremental sort or even
skip the sort entirely. So we consider various additional pathkeys in
addition to the "cheapers comparion" order.

The original patch was proposed by Teodor Sigaev, and later reworked and
improved by Dmitry Dolgov. Reviews by a number of people, including me,
Andrey Lepikhov, Claudio Freire and Ibrar Ahmed.

Author: Dmitry Dolgov, Teodor Sigaev
Reviewed-by: Tomas Vondra, Andrey Lepikhov, Claudio Freire, Ibrar Ahmed
Discussion: https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ru
Discussion: https://postgr.es/m/CA%2Bq6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag%40mail.gmail.com
---
 .../postgres_fdw/expected/postgres_fdw.out    |  15 +-
 src/backend/optimizer/path/costsize.c         | 369 +++++++++-
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 580 ++++++++++++++++
 src/backend/optimizer/plan/planner.c          | 653 ++++++++++--------
 src/backend/optimizer/util/pathnode.c         |   2 +-
 src/backend/utils/adt/selfuncs.c              |  37 +-
 src/backend/utils/misc/guc.c                  |  32 +
 src/include/nodes/nodes.h                     |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/cost.h                  |   4 +-
 src/include/optimizer/paths.h                 |  11 +
 src/include/utils/selfuncs.h                  |   5 +
 src/test/regress/expected/aggregates.out      | 244 ++++++-
 src/test/regress/expected/guc.out             |   9 +-
 .../regress/expected/incremental_sort.out     |   2 +-
 src/test/regress/expected/join.out            |  51 +-
 .../regress/expected/partition_aggregate.out  | 136 ++--
 src/test/regress/expected/partition_join.out  |  75 +-
 src/test/regress/expected/union.out           |  60 +-
 src/test/regress/sql/aggregates.sql           |  99 +++
 src/test/regress/sql/incremental_sort.sql     |   2 +-
 22 files changed, 1913 insertions(+), 497 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index f210f911880..2ffc836824a 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2741,16 +2741,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
- Sort
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Foreign Scan
    Output: (count(c2)), c2, 5, 7.0, 9
-   Sort Key: ft1.c2
-   ->  Foreign Scan
-         Output: (count(c2)), c2, 5, 7.0, 9
-         Relations: Aggregate on (public.ft1)
-         Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
-(7 rows)
+   Relations: Aggregate on (public.ft1)
+   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+(4 rows)
 
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
   w  | x | y |  z  
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1b07ea392d9..f4f7ce01065 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1755,6 +1755,325 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 									rterm->pathtarget->width);
 }
 
+/*
+ * is_fake_var
+ *		Workaround for generate_append_tlist() which generates fake Vars with
+ *		varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ *		Returns relative complexity of comparing two values based on it's width.
+ * The idea behind - long values have more expensive comparison. Return value is
+ * in cpu_operator_cost unit.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double	width = -1.0; /* fake value */
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresponding relation */
+	if (IsA(expr, Var))
+	{
+		Var		*var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo	*rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int	ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, use estimation by type */
+	if (width < 0.0)
+	{
+		Node	*node = (Node*) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Any value in pgsql is passed by Datum type, so any operation with value
+	 * could not be cheaper than operation with Datum type
+	 */
+	if (width <= sizeof(Datum))
+		return 1.0;
+
+	/*
+	 * Seems, cost of comparision is not directly proportional to args width,
+	 * because comparing args could be differ width (we known only average over
+	 * column) and difference often could be defined only by looking on first
+	 * bytes. So, use log16(width) as estimation.
+	 */
+	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ *		compute CPU cost of sort (i.e. in-memory)
+ *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, the columns may have very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algoritm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from:
+ *
+ * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
+ * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of identical
+ * tuples with value Ki, then the estimate of number of comparisons is:
+ *
+ *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ *
+ * We assume all Xi the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ *	N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparisons on ck is roughly
+ *
+ *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ *	N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column width, not just the comparator cost.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys. In this case, it will fallback to
+ * simple comparison cost estimate.
+ */
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+					  Cost comparison_cost, double tuples, double output_tuples,
+					  bool heapSort)
+{
+	Cost		per_tuple_cost = 0.0;
+	ListCell	*lc;
+	List		*pathkeyExprs = NIL;
+	double		tuplesPerPrevGroup = tuples;
+	double		totalFuncCost = 1.0;
+	bool		has_fake_var = false;
+	int			i = 0;
+	Oid			prev_datatype = InvalidOid;
+	Cost		funcCost = 0.0;
+	List		*cache_varinfos = NIL;
+
+	/* fallback if pathkeys is unknown */
+	if (list_length(pathkeys) == 0)
+	{
+		/*
+		 * If we'll use a bounded heap-sort keeping just K tuples in memory, for
+		 * a total number of tuple comparisons of N log2 K; but the constant
+		 * factor is a bit higher than for quicksort.  Tweak it so that the
+		 * cost curve is continuous at the crossover point.
+		 *
+		 * XXX I suppose the "quicksort factor" references to 1.5 at the end
+		 * of this function, but I'm not sure. I suggest we introduce some simple
+		 * constants for that, instead of magic values.
+		 */
+		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+		/* add cost provided by caller */
+		per_tuple_cost += comparison_cost;
+
+		return per_tuple_cost * tuples;
+	}
+
+	/*
+	 * Computing total cost of sorting takes into account:
+	 * - per column comparison function cost
+	 * - we try to compute needed number of comparison per column
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey				*pathkey = (PathKey*) lfirst(lc);
+		EquivalenceMember	*em;
+		double				 nGroups,
+							 correctedNGroups;
+
+		/*
+		 * We believe that equivalence members aren't very different, so, to
+		 * estimate cost we take just first member
+		 */
+		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+		if (em->em_datatype != InvalidOid)
+		{
+			/* do not lookup funcCost if data type is the same as previous */
+			if (prev_datatype != em->em_datatype)
+			{
+				Oid			sortop;
+				QualCost	cost;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+
+				cost.startup = 0;
+				cost.per_tuple = 0;
+				add_function_cost(root, get_opcode(sortop), NULL, &cost);
+				/* we need procost, not product of procost and cpu_operator_cost */
+				funcCost = cost.per_tuple / cpu_operator_cost;
+				prev_datatype = em->em_datatype;
+			}
+		}
+		else
+			funcCost = 1.0; /* fallback */
+
+		/* Try to take into account actual width fee */
+		funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+		totalFuncCost += funcCost;
+
+		/* Remember if we have a fake var in pathkeys */
+		has_fake_var |= is_fake_var(em->em_expr);
+		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+		/*
+		 * Prevent call estimate_num_groups() with fake Var. Note,
+		 * pathkeyExprs contains only previous columns
+		 */
+		if (has_fake_var == false)
+			/*
+			 * Recursively compute number of groups in a group from previous step
+			 */
+			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+													  tuplesPerPrevGroup, NULL, NULL,
+													  &cache_varinfos,
+													  list_length(pathkeyExprs) - 1);
+		else if (tuples > 4.0)
+			/*
+			 * Use geometric mean as estimation if there is no any stats.
+			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
+			 * column while here we try to estimate number of groups over
+			 * set of columns.
+			 *
+			 * XXX Perhaps this should use DEFAULT_NUM_DISTINCT at least to
+			 * limit the calculated values, somehow?
+			 *
+			 * XXX What's the logic of the following formula?
+			 */
+			nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
+		else
+			nGroups = tuples;
+
+		/*
+		 * Presorted keys aren't participated in comparison but still checked
+		 * by qsort comparator.
+		 */
+		if (i >= nPresortedKeys)
+		{
+			if (heapSort)
+			{
+				double heap_tuples;
+
+				/* have to keep at least one group, and a multiple of group size */
+				heap_tuples = Max(ceil(output_tuples / tuplesPerPrevGroup) * tuplesPerPrevGroup,
+								  tuplesPerPrevGroup);
+
+				/* so how many (whole) groups is that? */
+				correctedNGroups = ceil(heap_tuples / tuplesPerPrevGroup);
+			}
+			else
+				/* all groups in the input */
+				correctedNGroups = nGroups;
+
+			correctedNGroups = Max(1.0, ceil(correctedNGroups));
+
+			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+		}
+
+		i++;
+
+		/*
+		 * Real-world distribution isn't uniform but now we don't have a way to
+		 * determine that, so, add multiplier to get closer to worst case.
+		 * But ensure the number of tuples does not exceed the group size in the
+		 * preceding step.
+		 */
+		tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
+								 ceil(1.5 * tuplesPerPrevGroup / nGroups));
+
+		/*
+		 * We could skip all following columns for cost estimation, because we
+		 * believe that tuples are unique by the set of previous columns
+		 */
+		if (tuplesPerPrevGroup <= 1.0)
+			break;
+	}
+
+	list_free(pathkeyExprs);
+
+	/* per_tuple_cost is in cpu_operator_cost units */
+	per_tuple_cost *= cpu_operator_cost;
+
+	/*
+	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E.
+	 * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation
+	 * formula has additional term proportional to number of tuples (See Chapter
+	 * 8.2 and Theorem 4.1). It has meaning with low number of tuples,
+	 * approximately less that 1e4. Of course, it could be implemented as
+	 * additional multiplier under logarithm, but use more complicated formula
+	 * which takes into account number of unique tuples and it isn't clear how
+	 * to combine multiplier with groups. Estimate it as 10 in cpu_operator_cost
+	 * unit.
+	 */
+	per_tuple_cost += 10 * cpu_operator_cost;
+
+	per_tuple_cost += comparison_cost;
+
+	return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+				   double tuples)
+{
+	return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+								0, tuples, tuples, false);
+}
+
 /*
  * cost_tuplesort
  *	  Determines and returns the cost of sorting a relation using tuplesort,
@@ -1771,7 +2090,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * number of initial runs formed and M is the merge order used by tuplesort.c.
  * Since the average initial run should be about sort_mem, we have
  *		disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *		cpu = comparison_cost * t * log2(t)
+ * 		and cpu cost (computed by compute_cpu_sort_cost()).
  *
  * If the sort is bounded (i.e., only the first k result tuples are needed)
  * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1790,9 +2109,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * 'comparison_cost' is the extra cost per comparison, if any
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later.
  */
 static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
 			   double limit_tuples)
@@ -1809,9 +2130,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
-
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
 	{
@@ -1835,12 +2153,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 		double		log_runs;
 		double		npageaccesses;
 
-		/*
-		 * CPU costs
-		 *
-		 * Assume about N log2 N comparisons
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		/* CPU costs */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 
 		/* Disk costs */
 
@@ -1856,18 +2172,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	}
 	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
 	{
-		/*
-		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
-		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		/* We'll use a bounded heap-sort keeping just K tuples in memory. */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  output_tuples, true);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 	}
 
 	/*
@@ -1900,8 +2215,8 @@ cost_incremental_sort(Path *path,
 					  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 					  double limit_tuples)
 {
-	Cost		startup_cost = 0,
-				run_cost = 0,
+	Cost		startup_cost,
+				run_cost,
 				input_run_cost = input_total_cost - input_startup_cost;
 	double		group_tuples,
 				input_groups;
@@ -1986,7 +2301,7 @@ cost_incremental_sort(Path *path,
 	 * pessimistic about incremental sort performance and increase its average
 	 * group size by half.
 	 */
-	cost_tuplesort(&group_startup_cost, &group_run_cost,
+	cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
 				   1.5 * group_tuples, width, comparison_cost, sort_mem,
 				   limit_tuples);
 
@@ -1994,7 +2309,7 @@ cost_incremental_sort(Path *path,
 	 * Startup cost of incremental sort is the startup cost of its first group
 	 * plus the cost of its input.
 	 */
-	startup_cost += group_startup_cost
+	startup_cost = group_startup_cost
 		+ input_startup_cost + group_input_run_cost;
 
 	/*
@@ -2003,7 +2318,7 @@ cost_incremental_sort(Path *path,
 	 * group, plus the total cost to process the remaining groups, plus the
 	 * remaining cost of input.
 	 */
-	run_cost += group_run_cost
+	run_cost = group_run_cost
 		+ (group_run_cost + group_startup_cost) * (input_groups - 1)
 		+ group_input_run_cost * (input_groups - 1);
 
@@ -2043,7 +2358,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	Cost		startup_cost;
 	Cost		run_cost;
 
-	cost_tuplesort(&startup_cost, &run_cost,
+	cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
 				   limit_tuples);
@@ -2141,7 +2456,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2209,7 +2524,7 @@ cost_append(AppendPath *apath)
 					 * any child.
 					 */
 					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
+							  root,
 							  pathkeys,
 							  subpath->total_cost,
 							  subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 8c6770de972..5487ae2ee4c 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -681,7 +681,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy sortref if it wasn't set yet, it's possible if ec was
+				 * constructed from WHERE clause, ie it doesn't have target
+				 * reference at all
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 86a35cdef17..f18d4c8e4e7 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,16 +17,19 @@
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
 #include "access/stratnum.h"
 #include "catalog/pg_opfamily.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -334,6 +337,530 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/************************<DEBUG PART>*************************************/
+bool debug_group_by_reorder_by_pathkeys = true;
+bool debug_group_by_match_order_by = true;
+bool debug_cheapest_group_by = true;
+/************************</DEBUG PART>************************************/
+
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List	   *new_group_pathkeys= NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (debug_group_by_reorder_by_pathkeys == false)
+		return 0;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append if to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find first pathkey without a matching GROUP BY key, the rest of
+	 * the pathkeys is useless and can't be used to evaluate the grouping, so
+	 * we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	*sgc;
+
+		/* abort on first mismatch */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* XXX maybe return when (n == 0) */
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * Used to generate all permutations of a pathkey list.
+ */
+typedef struct PathkeyMutatorState {
+	List	   *elemsList;
+	ListCell  **elemCells;
+	void	  **elems;
+	int		   *positions;
+	int			 mutatorNColumns;
+	int			 count;
+} PathkeyMutatorState;
+
+
+/*
+ * PathkeyMutatorInit
+ *		Initialize state of the permutation generator.
+ *
+ * We want to generate permutations of elements in the "elems" list. We may want
+ * to skip some number of elements at the beginning (when treating as presorted)
+ * or at the end (we only permute a limited number of group keys).
+ *
+ * The list is decomposed into elements, and we also keep pointers to individual
+ * cells. This allows us to build the permuted list quickly and cheaply, without
+ * creating any copies.
+ */
+static void
+PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
+{
+	int i;
+	int			n = end - start;
+	ListCell	*lc;
+
+	memset(state, 0, sizeof(*state));
+
+	state->mutatorNColumns = n;
+
+	state->elemsList = list_copy(elems);
+
+	state->elems = palloc(sizeof(void*) * n);
+	state->elemCells = palloc(sizeof(ListCell*) * n);
+	state->positions = palloc(sizeof(int) * n);
+
+	i = 0;
+	for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+	{
+		state->elemCells[i] = lc;
+		state->elems[i] = lfirst(lc);
+		state->positions[i] = i + 1;
+		i++;
+
+		if (i >= n)
+			break;
+	}
+}
+
+/* Swap two elements of an array. */
+static void
+PathkeyMutatorSwap(int *a, int i, int j)
+{
+  int s = a[i];
+
+  a[i] = a[j];
+  a[j] = s;
+}
+
+/*
+ * Generate the next permutation of elements.
+ */
+static bool
+PathkeyMutatorNextSet(int *a, int n)
+{
+	int j, k, l, r;
+
+	j = n - 2;
+
+	while (j >= 0 && a[j] >= a[j + 1])
+		j--;
+
+	if (j < 0)
+		return false;
+
+	k = n - 1;
+
+	while (k >= 0 && a[j] >= a[k])
+		k--;
+
+	PathkeyMutatorSwap(a, j, k);
+
+	l = j + 1;
+	r = n - 1;
+
+	while (l < r)
+		PathkeyMutatorSwap(a, l++, r--);
+
+	return true;
+}
+
+/*
+ * PathkeyMutatorNext
+ *		Generate the next permutation of list of elements.
+ *
+ * Returns the next permutation (as a list of elements) or NIL if there are no
+ * more permutations.
+ */
+static List *
+PathkeyMutatorNext(PathkeyMutatorState *state)
+{
+	int	i;
+
+	state->count++;
+
+	/* first permutation is original list */
+	if (state->count == 1)
+		return state->elemsList;
+
+	/* when there are no more permutations, return NIL */
+	if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
+	{
+		pfree(state->elems);
+		pfree(state->elemCells);
+		pfree(state->positions);
+
+		list_free(state->elemsList);
+
+		return NIL;
+	}
+
+	/* update the list cells to point to the right elements */
+	for(i=0; i<state->mutatorNColumns; i++)
+		lfirst(state->elemCells[i]) =
+			(void *) state->elems[ state->positions[i] - 1 ];
+
+	return state->elemsList;
+}
+
+/*
+ * Cost of comparing pathkeys.
+ */
+typedef struct PathkeySortCost
+{
+	Cost		cost;
+	PathKey	   *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+	const PathkeySortCost *a = (PathkeySortCost *) _a;
+	const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+	if (a->cost < b->cost)
+		return -1;
+	else if (a->cost == b->cost)
+		return 0;
+	return 1;
+}
+
+/*
+ * get_cheapest_group_keys_order
+ *		Returns the pathkeys in an order cheapest to evaluate.
+ *
+ * Given a list of pathkeys, we try to reorder them in a way that minimizes
+ * the CPU cost of sorting. This depends mainly on the cost of comparator
+ * function (the pathkeys may use different data types) and the number of
+ * distinct values in each column (which affects the number of comparator
+ * calls for the following pathkeys).
+ *
+ * In case the input is partially sorted, only the remaining pathkeys are
+ * considered.
+ *
+ * Returns newly allocated lists. If no reordering is possible (or needed),
+ * the lists are set to NIL.
+ */
+static bool
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	List		   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL,
+				   *var_group_pathkeys;
+
+	ListCell	   *cell;
+	PathkeyMutatorState	mstate;
+	double			cheapest_sort_cost = -1.0;
+
+	int nFreeKeys;
+	int nToPermute;
+
+	/* If this optimization is disabled, we're done. */
+	if (!debug_cheapest_group_by)
+		return false;
+
+	/* If there are less than 2 unsorted pathkeys, we're done. */
+	if (list_length(*group_pathkeys) - n_preordered < 2)
+		return false;
+
+	/*
+	 * We could exhaustively cost all possible orderings of the pathkeys, but for
+	 * large number of pathkeys that might be prohibitively expensive. So we try
+	 * to apply a simple cheap heuristics first - we sort the pathkeys by sort
+	 * cost (as if the pathkey was sorted independently) and then check only the
+	 * four cheapest pathkeys. The remaining pathkeys are kept ordered by cost.
+	 *
+	 * XXX This is a very simple heuristics, and likely to work fine for most
+	 * cases (because number of GROUP BY clauses tends to be lower than 4). But
+	 * it ignores how the number of distinct values in each pathkey affects the
+	 * following sorts. It may be better to use "more expensive" pathkey first
+	 * if it has many distinct values, because it then limits the number of
+	 * comparisons for the remaining pathkeys. But evaluating that is kinda the
+	 * expensive bit we're trying to not do.
+	 */
+	nFreeKeys = list_length(*group_pathkeys) - n_preordered;
+	nToPermute = 4;
+	if (nFreeKeys > nToPermute)
+	{
+		int i;
+		PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
+
+		/* skip the pre-ordered pathkeys */
+		cell = list_nth_cell(*group_pathkeys, n_preordered);
+
+		/* estimate cost for sorting individual pathkeys */
+		for (i = 0; cell != NULL; i++, (cell = lnext(*group_pathkeys, cell)))
+		{
+			List *to_cost = list_make1(lfirst(cell));
+
+			Assert(i < nFreeKeys);
+
+			costs[i].pathkey = lfirst(cell);
+			costs[i].cost = cost_sort_estimate(root, to_cost, 0, nrows);
+
+			pfree(to_cost);
+		}
+
+		/* sort the pathkeys by sort cost in ascending order */
+		qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+		/*
+		 * Rebuild the list of pathkeys - first the preordered ones, then the
+		 * rest ordered by cost.
+		 */
+		new_group_pathkeys = list_truncate(list_copy(*group_pathkeys), n_preordered);
+
+		for (i = 0; i < nFreeKeys; i++)
+			new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+		pfree(costs);
+	}
+	else
+	{
+		/*
+		 * Since v13 list_free() can clean list elements so for original list
+		 * not to be modified it should be copied to a new one which can then
+		 * be cleaned safely if needed.
+		 */
+		new_group_pathkeys = list_copy(*group_pathkeys);
+		nToPermute = nFreeKeys;
+	}
+
+	Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
+
+	/*
+	 * Generate pathkey lists with permutations of the first nToPermute pathkeys.
+	 *
+	 * XXX We simply calculate sort cost for each individual pathkey list, but
+	 * there's room for two dynamic programming optimizations here. Firstly, we
+	 * may pass the current "best" cost to cost_sort_estimate so that it can
+	 * "abort" if the estimated pathkeys list exceeds it. Secondly, it could pass
+	 * return information about the position when it exceeded the cost, and we
+	 * could skip all permutations with the same prefix.
+	 *
+	 * Imagine we've already found ordering with cost C1, and we're evaluating
+	 * another ordering - cost_sort_estimate() calculates cost by adding the
+	 * pathkeys one by one (more or less), and the cost only grows. If at any
+	 * point it exceeds C1, it can't possibly be "better" so we can discard it.
+	 * But we also know that we can discard all ordering with the same prefix,
+	 * because if we're estimating (a,b,c,d) and we exceeded C1 at (a,b) then
+	 * the same thing will happen for any ordering with this prefix.
+	 *
+	 *
+	 */
+	PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
+
+	while((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
+	{
+		Cost	cost;
+
+		cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+		if (cost < cheapest_sort_cost || cheapest_sort_cost < 0)
+		{
+			list_free(new_group_pathkeys);
+			new_group_pathkeys = list_copy(var_group_pathkeys);
+			cheapest_sort_cost = cost;
+		}
+	}
+
+	/* Reorder the group clauses according to the reordered pathkeys. */
+	foreach(cell, new_group_pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		new_group_clauses = lappend(new_group_clauses,
+						get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+												*group_clauses));
+	}
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+											   *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+
+	return true;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each items stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to minimize the sort cost
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reoredered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
+ *   the tail reoredered to minimize the sort cost
+ *
+ * There are other potentially interesting orderings (e.g. it might be best to
+ * match the first ORDER BY key, order the remaining keys differently and then
+ * rely on incremental sort to fix this), but we ignore those for now. To make
+ * this work we'd have to pretty much generate all possible permutations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys,
+								List *group_pathkeys, List *group_clauses)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+	int			n_preordered = 0;
+
+	List *pathkeys = group_pathkeys;
+	List *clauses = group_clauses;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+
+	infos = lappend(infos, info);
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost, ignoring both the
+	 * target ordering (ORDER BY) and ordering of the input path.
+	 */
+	if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered))
+	{
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to match
+	 * as much of the ordering as possible - we get this sort for free (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefiting from incremental sort
+	 * as much as possible.
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (path_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/* reorder the tail to minimize sort cost */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause, but only if set debug_group_by_match_order_by).
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (root->sort_pathkeys && debug_group_by_match_order_by)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to sort_pathkeys.
+		 */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1862,6 +2389,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the odering. The ordering
+ * may not be "complete" and may require incremental sort, but that's fine. So
+ * we simply count prefix pathkeys with a matching group key, and stop once we
+ * find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordeding not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1876,6 +2451,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1911,6 +2489,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 547fda20a23..bb957d50d23 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6218,24 +6218,124 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
 
-			if (path == cheapest_path || is_sorted)
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest-total path if it isn't already sorted */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_path || is_sorted)
+				{
+					/* Sort the cheapest-total path if it isn't already sorted */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					/* Now decide what to stick atop it */
+					if (parse->groupingSets)
+					{
+						consider_groupingsets_paths(root, grouped_rel,
+													path, true, can_hash,
+													gd, agg_costs, dNumGroups);
+					}
+					else if (parse->hasAggs)
+					{
+						/*
+						 * We have aggregation, possibly with plain GROUP BY. Make
+						 * an AggPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_SIMPLE,
+												 info->clauses,
+												 havingQual,
+												 agg_costs,
+												 dNumGroups));
+					}
+					else if (group_clauses)
+					{
+						/*
+						 * We have GROUP BY without aggregation or grouping sets.
+						 * Make a GroupPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+					}
+					else
+					{
+						/* Other cases should have been handled above */
+						Assert(false);
+					}
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, no point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6247,17 +6347,17 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->hasAggs)
 				{
 					/*
-					 * We have aggregation, possibly with plain GROUP BY. Make
-					 * an AggPath.
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 info->clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6265,14 +6365,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->groupClause)
 				{
 					/*
-					 * We have GROUP BY without aggregation or grouping sets.
-					 * Make a GroupPath.
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6282,79 +6382,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					Assert(false);
 				}
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, no point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_agg_path(root,
-										 grouped_rel,
-										 path,
-										 grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_SIMPLE,
-										 parse->groupClause,
-										 havingQual,
-										 agg_costs,
-										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_group_path(root,
-										   grouped_rel,
-										   path,
-										   parse->groupClause,
-										   havingQual,
-										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
-			}
 		}
 
 		/*
@@ -6365,100 +6392,125 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
 				Path	   *path_original = path;
-				bool		is_sorted;
-				int			presorted_keys;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
+				List	   *pathkey_orderings = NIL;
 
-				/*
-				 * Insert a Sort node, if required.  But there's no point in
-				 * sorting anything but the cheapest path.
-				 */
-				if (!is_sorted)
+				List	   *group_pathkeys = root->group_pathkeys;
+				List	   *group_clauses = parse->groupClause;
+
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys,
+																	group_pathkeys,
+																	group_clauses);
+
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach (lc2, pathkey_orderings)
 				{
-					if (path != partially_grouped_rel->cheapest_total_path)
-						continue;
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				}
+					bool		is_sorted;
+					int			presorted_keys = 0;
+					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* restore the path (we replace it in the loop) */
+					path = path_original;
 
-				/*
-				 * Now we may consider incremental sort on this path, but only
-				 * when the path is not already sorted and when incremental
-				 * sort is enabled.
-				 */
-				if (is_sorted || !enable_incremental_sort)
-					continue;
+					is_sorted = pathkeys_count_contained_in(info->pathkeys,
+															path->pathkeys,
+															&presorted_keys);
 
-				/* Restore the input path (we might have added Sort on top). */
-				path = path_original;
+					/*
+					 * Insert a Sort node, if required.  But there's no point in
+					 * sorting anything but the cheapest path.
+					 */
+					if (!is_sorted)
+					{
+						if (path != partially_grouped_rel->cheapest_total_path)
+							continue;
 
-				/* no shared prefix, not point in building incremental sort */
-				if (presorted_keys == 0)
-					continue;
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
 
-				/*
-				 * We should have already excluded pathkeys of length 1
-				 * because then presorted_keys > 0 would imply is_sorted was
-				 * true.
-				 */
-				Assert(list_length(root->group_pathkeys) != 1);
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
 
-				path = (Path *) create_incremental_sort_path(root,
-															 grouped_rel,
-															 path,
-															 root->group_pathkeys,
-															 presorted_keys,
-															 -1.0);
+					/*
+					 * Now we may consider incremental sort on this path, but only
+					 * when the path is not already sorted and when incremental
+					 * sort is enabled.
+					 */
+					if (is_sorted || !enable_incremental_sort)
+						continue;
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* Restore the input path (we might have added Sort on top). */
+					path = path_original;
+
+					/* no shared prefix, not point in building incremental sort */
+					if (presorted_keys == 0)
+						continue;
+
+					/*
+					 * We should have already excluded pathkeys of length 1
+					 * because then presorted_keys > 0 would imply is_sorted was
+					 * true.
+					 */
+					Assert(list_length(root->group_pathkeys) != 1);
+
+					path = (Path *) create_incremental_sort_path(root,
+																 grouped_rel,
+																 path,
+																 info->pathkeys,
+																 presorted_keys,
+																 -1.0);
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+				}
 			}
 		}
 	}
@@ -6661,41 +6713,71 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
+			Path	   *path_save = path;
+
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
-			if (path == cheapest_total_path || is_sorted)
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(partially_grouped_rel, (Path *)
-							 create_agg_path(root,
-											 partially_grouped_rel,
-											 path,
-											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
-											 NIL,
-											 agg_partial_costs,
-											 dNumPartialGroups));
-				else
-					add_path(partially_grouped_rel, (Path *)
-							 create_group_path(root,
-											   partially_grouped_rel,
-											   path,
-											   parse->groupClause,
-											   NIL,
-											   dNumPartialGroups));
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_total_path || is_sorted)
+				{
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_path(partially_grouped_rel, (Path *)
+								 create_agg_path(root,
+												 partially_grouped_rel,
+												 path,
+												 partially_grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_INITIAL_SERIAL,
+												 info->clauses,
+												 NIL,
+												 agg_partial_costs,
+												 dNumPartialGroups));
+					else
+						add_path(partially_grouped_rel, (Path *)
+								 create_group_path(root,
+												   partially_grouped_rel,
+												   path,
+												   info->clauses,
+												   NIL,
+												   dNumPartialGroups));
+				}
 			}
 		}
 
@@ -6705,6 +6787,8 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 * We can also skip the entire loop when we only have a single-item
 		 * group_pathkeys because then we can't possibly have a presorted
 		 * prefix of the list without having the list be fully sorted.
+		 *
+		 * XXX Shouldn't this also consider the group-key-reordering?
 		 */
 		if (enable_incremental_sort && list_length(root->group_pathkeys) > 1)
 		{
@@ -6763,24 +6847,100 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
 
-			if (path == cheapest_partial_path || is_sorted)
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_original;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_partial_path || is_sorted)
+				{
+
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_agg_path(root,
+														 partially_grouped_rel,
+														 path,
+														 partially_grouped_rel->reltarget,
+														 info->clauses ? AGG_SORTED : AGG_PLAIN,
+														 AGGSPLIT_INITIAL_SERIAL,
+														 info->clauses,
+														 NIL,
+														 agg_partial_costs,
+														 dNumPartialPartialGroups));
+					else
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_group_path(root,
+														   partially_grouped_rel,
+														   path,
+														   info->clauses,
+														   NIL,
+														   dNumPartialPartialGroups));
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, not point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 partially_grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
@@ -6788,9 +6948,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 info->clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 info->clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6799,59 +6959,10 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   info->clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, not point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 partially_grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_agg_path(root,
-												 partially_grouped_rel,
-												 path,
-												 partially_grouped_rel->reltarget,
-												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-												 AGGSPLIT_INITIAL_SERIAL,
-												 parse->groupClause,
-												 NIL,
-												 agg_partial_costs,
-												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_group_path(root,
-												   partially_grouped_rel,
-												   path,
-												   parse->groupClause,
-												   NIL,
-												   dNumPartialPartialGroups));
 		}
 	}
 
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 99df76b6b71..1670e546440 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1342,7 +1342,7 @@ create_append_path(PlannerInfo *root,
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 1fbb0b28c3b..36c303e45c5 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3294,7 +3294,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 }
 
 /*
- * estimate_num_groups		- Estimate number of groups in a grouped query
+ * estimate_num_groups/estimate_num_groups_incremental
+ *		- Estimate number of groups in a grouped query.
+ *		  _incremental variant is performance optimization for
+ *		  case of adding one-by-one column
  *
  * Given a query having a GROUP BY clause, estimate how many groups there
  * will be --- ie, the number of distinct combinations of the GROUP BY
@@ -3368,11 +3371,22 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset, EstimationInfo *estinfo)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, estinfo,
+										   NULL, 0);
+}
+
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows,
+					List **pgset, EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i, j;
 
 	/* Zero the estinfo output parameter, if non-NULL */
 	if (estinfo != NULL)
@@ -3403,7 +3417,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3412,6 +3426,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++; /* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3481,7 +3503,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3498,6 +3524,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index eb3a03b9762..31f347d469d 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2140,6 +2140,38 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+/************************<DEBUG OPT GROUP BY>*********************************/
+	{
+		{"debug_group_by_reorder_by_pathkeys", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reorder GROUP BY by pathkeys"),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&debug_group_by_reorder_by_pathkeys,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_group_by_match_order_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable matching GROUP BY by ORDER BY."),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&debug_group_by_match_order_by,
+		true,
+		NULL, NULL, NULL
+	},
+	{
+		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("find a cheapest order of columns in GROUP BY."),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&debug_cheapest_group_by,
+		true,
+		NULL, NULL, NULL
+	},
+/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index e8f30367a48..e74a7c72ea6 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -271,6 +271,7 @@ typedef enum NodeTag
 	T_EquivalenceClass,
 	T_EquivalenceMember,
 	T_PathKey,
+	T_PathKeyInfo,
 	T_PathTarget,
 	T_RestrictInfo,
 	T_IndexClause,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 365000bdcdb..6cbcb67bdf1 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1070,6 +1070,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index bc12071af6e..dc7fc174114 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -114,7 +114,9 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *path);
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+							   int nPresortedKeys, double tuples);
+extern void cost_append(AppendPath *path, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 0c3a0b90c85..525b594cbd0 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -203,6 +203,17 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
+/************************<DEBUG OPT GROUP BY>*********************************/
+extern bool debug_group_by_reorder_by_pathkeys;
+extern bool debug_group_by_match_order_by;
+extern bool debug_cheapest_group_by;
+/************************</DEBUG OPT GROUP BY>********************************/
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys,
+											 List *pathkeys, List *clauses);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 8f3d73edfb2..c313a08d541 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -214,6 +214,11 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset,
 								  EstimationInfo *estinfo);
 
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows, List **pgset,
+					EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs);
+
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
 									   Selectivity *mcv_freq,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 0a23a39aa29..601047fa3dd 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1210,7 +1210,8 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+   Group Key: $0, $1
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1233,10 +1234,8 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+   ->  Result
+(25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -2448,6 +2447,241 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, d, c, v
+   ->  Sort
+         Sort Key: p, d, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Incremental Sort
+         Sort Key: p, c, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Incremental Sort
+         Sort Key: p, v, c
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, d, v
+   ->  Incremental Sort
+         Sort Key: p, c, d, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Incremental Sort
+         Sort Key: p, v, c, d
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/guc.out b/src/test/regress/expected/guc.out
index 3de6404ba5b..98a16c118f2 100644
--- a/src/test/regress/expected/guc.out
+++ b/src/test/regress/expected/guc.out
@@ -858,10 +858,13 @@ SELECT name FROM tab_settings_flags
 SELECT name FROM tab_settings_flags
   WHERE category ~ '^Query Tuning' AND NOT explain
   ORDER BY 1;
-           name            
----------------------------
+                 name                 
+--------------------------------------
+ debug_enable_cheapest_group_by
+ debug_enable_group_by_match_order_by
+ debug_group_by_reorder_by_pathkeys
  default_statistics_target
-(1 row)
+(4 rows)
 
 -- Runtime-computed GUCs should be part of the preset category.
 SELECT name FROM tab_settings_flags
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 545e301e482..21c429226f7 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1439,7 +1439,7 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 19caebabd01..bf1a2db2cf0 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1984,8 +1984,8 @@ USING (name);
 ------+----+----
  bb   | 12 | 13
  cc   | 22 | 23
- dd   |    | 33
  ee   | 42 |   
+ dd   |    | 33
 (4 rows)
 
 -- Cases with non-nullable expressions in subquery results;
@@ -2019,8 +2019,8 @@ NATURAL FULL JOIN
 ------+------+------+------+------
  bb   |   12 |    2 |   13 |    3
  cc   |   22 |    2 |   23 |    3
- dd   |      |      |   33 |    3
  ee   |   42 |    2 |      |     
+ dd   |      |      |   33 |    3
 (4 rows)
 
 SELECT * FROM
@@ -4618,18 +4618,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
@@ -6336,44 +6338,39 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Unique
+   ->  HashAggregate
          Output: j3.id
-         ->  Sort
+         Group Key: j3.id
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(13 rows)
+(11 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Group
+   ->  HashAggregate
          Output: j3.id
          Group Key: j3.id
-         ->  Sort
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(14 rows)
+(11 rows)
 
 drop table j1;
 drop table j2;
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index dfa4b036b52..a08a3825ff6 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -952,32 +952,30 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 --------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Gather
-         Workers Planned: 2
-         ->  Parallel Append
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml.a
-                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml.a
-                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_5.a
-                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_5.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_2.a
-                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_2.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(27 rows)
+   ->  Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(25 rows)
 
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
  a  | sum  |  array_agg  | count 
@@ -996,34 +994,32 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 -- Without ORDER BY clause, to test Gather at top-most path
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                                QUERY PLAN                                 
----------------------------------------------------------------------------
- Gather
-   Workers Planned: 2
-   ->  Parallel Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(25 rows)
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Append
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml.a
+         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml.a
+               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_2.a
+         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_2.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_5.a
+         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_5.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(23 rows)
 
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -1379,28 +1375,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
 -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
 EXPLAIN (COSTS OFF)
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize GroupAggregate
+   ->  Finalize HashAggregate
          Group Key: pagg_tab_para.y
          Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather Merge
+         ->  Gather
                Workers Planned: 2
-               ->  Sort
-                     Sort Key: pagg_tab_para.y
-                     ->  Parallel Append
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_1.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_2.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+               ->  Parallel Append
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_1.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_2.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
 
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
  y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index bb5b7c47a45..03926a84138 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -466,52 +466,41 @@ EXPLAIN (COSTS OFF)
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
   GROUP BY 1, 2 ORDER BY 1, 2;
-                                                   QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Group
    Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Merge Append
+   ->  Sort
          Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Group
-               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
-                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1.a, prt1.b
-                                 ->  Seq Scan on prt1_p1 prt1
-                           ->  Sort
-                                 Sort Key: p2.a, p2.b
-                                 ->  Seq Scan on prt2_p1 p2
-         ->  Group
-               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_1.a, prt1_1.b
-                                 ->  Seq Scan on prt1_p2 prt1_1
-                           ->  Sort
-                                 Sort Key: p2_1.a, p2_1.b
-                                 ->  Seq Scan on prt2_p2 p2_1
-         ->  Group
-               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_2.a, prt1_2.b
-                                 ->  Seq Scan on prt1_p3 prt1_2
-                           ->  Sort
-                                 Sort Key: p2_2.a, p2_2.b
-                                 ->  Seq Scan on prt2_p3 p2_2
-(43 rows)
+         ->  Append
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_1.a, prt1_1.b
+                           ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Sort
+                           Sort Key: p2_1.a, p2_1.b
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_2.a, prt1_2.b
+                           ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Sort
+                           Sort Key: p2_2.a, p2_2.b
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a))
+                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_3.b, prt1_3.a
+                           ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Sort
+                           Sort Key: p2_3.b, p2_3.a
+                           ->  Seq Scan on prt2_p3 p2_3
+(32 rows)
 
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index dece7310cfe..7ac4a9380e2 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1303,24 +1303,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2;
-                        QUERY PLAN                        
-----------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: (q2 IS NOT NULL)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: (q2 IS NOT NULL)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: (q2 IS NOT NULL)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: (q2 IS NOT NULL)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
@@ -1339,24 +1337,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2;
-                       QUERY PLAN                       
---------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                    QUERY PLAN                    
+--------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: ((- q1) = q2)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: ((- q1) = q2)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: ((- q1) = q2)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: ((- q1) = q2)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 2f5d0e00f3d..c6e0d7ba2b7 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1025,6 +1025,105 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index d8768a6b54d..1de163e0395 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -213,7 +213,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 
-- 
2.34.1

0002-fixup-rework-GUC-options-20220328.patchtext/x-patch; charset=UTF-8; name=0002-fixup-rework-GUC-options-20220328.patchDownload

From 03e21ff5c7f6a0c4e2d43a0be647f0cd75a04786 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Mon, 28 Mar 2022 17:14:54 +0200
Subject: [PATCH 2/7] fixup: rework GUC options

Replace the three GUC debug options with a single GUC that just disables
the optimization and reverts back to the order specified in the query.
---
 src/backend/optimizer/path/pathkeys.c  | 24 ++++++---------
 src/backend/utils/misc/guc.c           | 42 ++++++--------------------
 src/include/optimizer/paths.h          |  6 +---
 src/test/regress/expected/guc.out      |  9 ++----
 src/test/regress/expected/sysviews.out |  3 +-
 5 files changed, 26 insertions(+), 58 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index f18d4c8e4e7..fcc99f0fb1c 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -31,6 +31,8 @@
 #include "utils/lsyscache.h"
 #include "utils/selfuncs.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -337,12 +339,6 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
-/************************<DEBUG PART>*************************************/
-bool debug_group_by_reorder_by_pathkeys = true;
-bool debug_group_by_match_order_by = true;
-bool debug_cheapest_group_by = true;
-/************************</DEBUG PART>************************************/
-
 /*
  * group_keys_reorder_by_pathkeys
  *		Reorder GROUP BY keys to match pathkeys of input path.
@@ -361,9 +357,6 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	ListCell   *lc;
 	int			n;
 
-	if (debug_group_by_reorder_by_pathkeys == false)
-		return 0;
-
 	if (pathkeys == NIL || *group_pathkeys == NIL)
 		return 0;
 
@@ -598,10 +591,6 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
 	int nFreeKeys;
 	int nToPermute;
 
-	/* If this optimization is disabled, we're done. */
-	if (!debug_cheapest_group_by)
-		return false;
-
 	/* If there are less than 2 unsorted pathkeys, we're done. */
 	if (list_length(*group_pathkeys) - n_preordered < 2)
 		return false;
@@ -771,6 +760,13 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
 
 	infos = lappend(infos, info);
 
+	/*
+	 * If the optimization is disabled, we consider only the pathkey order as
+	 * specified in the query. We don't do any reorderings.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
 	/* for grouping sets we can't do any reordering */
 	if (parse->groupingSets)
 		return infos;
@@ -834,7 +830,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
 	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
 	 * info in this case.
 	 */
-	if (root->sort_pathkeys && debug_group_by_match_order_by)
+	if (root->sort_pathkeys)
 	{
 		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
 													  &pathkeys,
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 31f347d469d..9e8ab1420d9 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1182,6 +1182,16 @@ static struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reordering of GROUP BY key"),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
@@ -2140,38 +2150,6 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
-/************************<DEBUG OPT GROUP BY>*********************************/
-	{
-		{"debug_group_by_reorder_by_pathkeys", PGC_USERSET, QUERY_TUNING_METHOD,
-			gettext_noop("enable reorder GROUP BY by pathkeys"),
-			NULL,
-			GUC_NOT_IN_SAMPLE
-		},
-		&debug_group_by_reorder_by_pathkeys,
-		true,
-		NULL, NULL, NULL
-	},
-	{
-		{"debug_enable_group_by_match_order_by", PGC_USERSET, QUERY_TUNING_METHOD,
-			gettext_noop("enable matching GROUP BY by ORDER BY."),
-			NULL,
-			GUC_NOT_IN_SAMPLE
-		},
-		&debug_group_by_match_order_by,
-		true,
-		NULL, NULL, NULL
-	},
-	{
-		{"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
-			gettext_noop("find a cheapest order of columns in GROUP BY."),
-			NULL,
-			GUC_NOT_IN_SAMPLE
-		},
-		&debug_cheapest_group_by,
-		true,
-		NULL, NULL, NULL
-	},
-/************************</DEBUG OPT GROUP BY>********************************/
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 525b594cbd0..2ffa24aa899 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -206,11 +207,6 @@ extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
 extern int group_keys_reorder_by_pathkeys(List *pathkeys,
 										  List **group_pathkeys,
 										  List **group_clauses);
-/************************<DEBUG OPT GROUP BY>*********************************/
-extern bool debug_group_by_reorder_by_pathkeys;
-extern bool debug_group_by_match_order_by;
-extern bool debug_cheapest_group_by;
-/************************</DEBUG OPT GROUP BY>********************************/
 extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
 											 List *path_pathkeys,
 											 List *pathkeys, List *clauses);
diff --git a/src/test/regress/expected/guc.out b/src/test/regress/expected/guc.out
index 98a16c118f2..3de6404ba5b 100644
--- a/src/test/regress/expected/guc.out
+++ b/src/test/regress/expected/guc.out
@@ -858,13 +858,10 @@ SELECT name FROM tab_settings_flags
 SELECT name FROM tab_settings_flags
   WHERE category ~ '^Query Tuning' AND NOT explain
   ORDER BY 1;
-                 name                 
---------------------------------------
- debug_enable_cheapest_group_by
- debug_enable_group_by_match_order_by
- debug_group_by_reorder_by_pathkeys
+           name            
+---------------------------
  default_statistics_target
-(4 rows)
+(1 row)
 
 -- Runtime-computed GUCs should be part of the preset category.
 SELECT name FROM tab_settings_flags
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 442eeb1e3fe..72891690399 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -105,6 +105,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -122,7 +123,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(20 rows)
+(21 rows)
 
 -- Test that the pg_timezone_names and pg_timezone_abbrevs views are
 -- more-or-less working.  We can't test their contents in any great detail
-- 
2.34.1

0003-fixup-minor-code-cleanup-20220328.patchtext/x-patch; charset=UTF-8; name=0003-fixup-minor-code-cleanup-20220328.patchDownload

From 3beb6e4669f47f78f6356a2ba52ebe013262bda6 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Mon, 28 Mar 2022 18:40:22 +0200
Subject: [PATCH 3/7] fixup: minor code cleanup

---
 src/backend/optimizer/plan/planner.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index bb957d50d23..b2569c5d0c7 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6220,7 +6220,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			Path	   *path_save = path;
 			Path	   *path_original = path;
 
 			List	   *pathkey_orderings = NIL;
@@ -6245,7 +6244,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
-				path = path_save;
+				path = path_original;
 
 				is_sorted = pathkeys_count_contained_in(info->pathkeys,
 														path->pathkeys,
@@ -6255,13 +6254,11 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				{
 					/* Sort the cheapest-total path if it isn't already sorted */
 					if (!is_sorted)
-					{
 						path = (Path *) create_sort_path(root,
 														 grouped_rel,
 														 path,
 														 info->pathkeys,
 														 -1.0);
-					}
 
 					/* Now decide what to stick atop it */
 					if (parse->groupingSets)
@@ -6432,7 +6429,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					{
 						if (path != partially_grouped_rel->cheapest_total_path)
 							continue;
-
 						path = (Path *) create_sort_path(root,
 														 grouped_rel,
 														 path,
-- 
2.34.1

0004-fixup-comments-cleanup-type-fixes-and-rewor-20220328.patchtext/x-patch; charset=UTF-8; name=0004-fixup-comments-cleanup-type-fixes-and-rewor-20220328.patchDownload

From 7a3a7facd15d2c0034eeff7041df56a4e0d456f4 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Tue, 29 Mar 2022 01:12:15 +0200
Subject: [PATCH 4/7] fixup: comments cleanup, type fixes and rewording

---
 src/backend/optimizer/path/costsize.c   | 124 +++++++++++++-----------
 src/backend/optimizer/path/equivclass.c |   6 +-
 src/backend/optimizer/path/pathkeys.c   |  68 ++++++-------
 src/backend/utils/adt/selfuncs.c        |  13 ++-
 4 files changed, 105 insertions(+), 106 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index f4f7ce01065..bb0fe0c22ee 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1773,9 +1773,9 @@ is_fake_var(Expr *expr)
 
 /*
  * get_width_cost_multiplier
- *		Returns relative complexity of comparing two values based on it's width.
- * The idea behind - long values have more expensive comparison. Return value is
- * in cpu_operator_cost unit.
+ *		Returns relative complexity of comparing two values based on its width.
+ * The idea behind is that the comparison becomes more expensive the longer the
+ * value is. Return value is in cpu_operator_cost units.
  */
 static double
 get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
@@ -1806,7 +1806,7 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
 		}
 	}
 
-	/* Didn't find any actual stats, use estimation by type */
+	/* Didn't find any actual stats, try using type width instead. */
 	if (width < 0.0)
 	{
 		Node	*node = (Node*) expr;
@@ -1815,17 +1815,20 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
 	}
 
 	/*
-	 * Any value in pgsql is passed by Datum type, so any operation with value
-	 * could not be cheaper than operation with Datum type
+	 * Values are passed as Datum type, so comparisons can't be cheaper than
+	 * comparing a Datum value.
+	 *
+	 * FIXME I find this reasoning questionable. We may pass int2, and comparing
+	 * it is probably a bit cheaper than comparing a bigint.
 	 */
 	if (width <= sizeof(Datum))
 		return 1.0;
 
 	/*
-	 * Seems, cost of comparision is not directly proportional to args width,
-	 * because comparing args could be differ width (we known only average over
-	 * column) and difference often could be defined only by looking on first
-	 * bytes. So, use log16(width) as estimation.
+	 * We consider the cost of a comparison not to be directly proportional to
+	 * width of the argument, because widths of the arguments could be slightly
+	 * different (we only know the average width for the whole column). So we
+	 * use log16(width) as an estimate.
 	 */
 	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
 }
@@ -1837,11 +1840,11 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
  * The main thing we need to calculate to estimate sort CPU costs is the number
  * of calls to the comparator functions. The difficulty is that for multi-column
  * sorts there may be different data types involved (for some of which the calls
- * may be much more expensive). Furthermore, the columns may have very different
+ * may be much more expensive). Furthermore, columns may have a very different
  * number of distinct values - the higher the number, the fewer comparisons will
  * be needed for the following columns.
  *
- * The algoritm is incremental - we add pathkeys one by one, and at each step we
+ * The algorithm is incremental - we add pathkeys one by one, and at each step we
  * estimate the number of necessary comparisons (based on the number of distinct
  * groups in the current pathkey prefix and the new pathkey), and the comparison
  * costs (which is data type specific).
@@ -1892,7 +1895,6 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 	bool		has_fake_var = false;
 	int			i = 0;
 	Oid			prev_datatype = InvalidOid;
-	Cost		funcCost = 0.0;
 	List		*cache_varinfos = NIL;
 
 	/* fallback if pathkeys is unknown */
@@ -1901,12 +1903,8 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		/*
 		 * If we'll use a bounded heap-sort keeping just K tuples in memory, for
 		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 *
-		 * XXX I suppose the "quicksort factor" references to 1.5 at the end
-		 * of this function, but I'm not sure. I suggest we introduce some simple
-		 * constants for that, instead of magic values.
+		 * factor is a bit higher than for quicksort. Tweak it so that the cost
+		 * curve is continuous at the crossover point.
 		 */
 		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
 		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
@@ -1924,20 +1922,21 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 	 */
 	foreach(lc, pathkeys)
 	{
-		PathKey				*pathkey = (PathKey*) lfirst(lc);
-		EquivalenceMember	*em;
-		double				 nGroups,
-							 correctedNGroups;
+		PathKey			   *pathkey = (PathKey*) lfirst(lc);
+		EquivalenceMember  *em;
+		double				nGroups,
+							correctedNGroups;
+		Cost				funcCost = 1.0;
 
 		/*
 		 * We believe that equivalence members aren't very different, so, to
-		 * estimate cost we take just first member
+		 * estimate cost we consider just the first member.
 		 */
 		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
 
 		if (em->em_datatype != InvalidOid)
 		{
-			/* do not lookup funcCost if data type is the same as previous */
+			/* do not lookup funcCost if the data type is the same */
 			if (prev_datatype != em->em_datatype)
 			{
 				Oid			sortop;
@@ -1950,54 +1949,56 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 				cost.startup = 0;
 				cost.per_tuple = 0;
 				add_function_cost(root, get_opcode(sortop), NULL, &cost);
-				/* we need procost, not product of procost and cpu_operator_cost */
+
+				/*
+				 * add_function_cost returns the product of cpu_operator_cost
+				 * and procost, but we need just procost, co undo that.
+				 */
 				funcCost = cost.per_tuple / cpu_operator_cost;
+
 				prev_datatype = em->em_datatype;
 			}
 		}
-		else
-			funcCost = 1.0; /* fallback */
 
-		/* Try to take into account actual width fee */
+		/* factor in the width of the values in this column */
 		funcCost *= get_width_cost_multiplier(root, em->em_expr);
 
+		/* now we have per-key cost, so add to the running total */
 		totalFuncCost += funcCost;
 
-		/* Remember if we have a fake var in pathkeys */
+		/* remember if we have found a fake Var in pathkeys */
 		has_fake_var |= is_fake_var(em->em_expr);
 		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
 
 		/*
-		 * Prevent call estimate_num_groups() with fake Var. Note,
-		 * pathkeyExprs contains only previous columns
+		 * We need to calculate the number of comparisons for this column, which
+		 * requires knowing the group size. So we estimate the number of groups
+		 * by calling estimate_num_groups_incremental(), which estimates the
+		 * group size for "new" pathkeys.
+		 *
+		 * Note: estimate_num_groups_incremntal does not handle fake Vars, so use
+		 * a default estimate otherwise.
 		 */
-		if (has_fake_var == false)
-			/*
-			 * Recursively compute number of groups in a group from previous step
-			 */
+		if (!has_fake_var)
 			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
 													  tuplesPerPrevGroup, NULL, NULL,
 													  &cache_varinfos,
 													  list_length(pathkeyExprs) - 1);
 		else if (tuples > 4.0)
 			/*
-			 * Use geometric mean as estimation if there is no any stats.
-			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
-			 * column while here we try to estimate number of groups over
-			 * set of columns.
-			 *
-			 * XXX Perhaps this should use DEFAULT_NUM_DISTINCT at least to
-			 * limit the calculated values, somehow?
+			 * Use geometric mean as estimation if there are no stats.
 			 *
-			 * XXX What's the logic of the following formula?
+			 * We don't use DEFAULT_NUM_DISTINCT here, because that’s used for
+			 * a single column, but here we’re dealing with multiple columns.
 			 */
 			nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
 		else
 			nGroups = tuples;
 
 		/*
-		 * Presorted keys aren't participated in comparison but still checked
-		 * by qsort comparator.
+		 * Presorted keys are not considered in the cost above, but we still do
+		 * have to compare them in the qsort comparator. So make sure to factor
+		 * in the cost in that case.
 		 */
 		if (i >= nPresortedKeys)
 		{
@@ -2006,10 +2007,13 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 				double heap_tuples;
 
 				/* have to keep at least one group, and a multiple of group size */
+				/* FIXME Do we actually need the Max, here? Surely the ceil() returns
+				 * at least 1.0 here, no? */
 				heap_tuples = Max(ceil(output_tuples / tuplesPerPrevGroup) * tuplesPerPrevGroup,
 								  tuplesPerPrevGroup);
 
 				/* so how many (whole) groups is that? */
+				/* FIXME so why not just ceil(putput_tuples / tuplesPerPrevGroup)? */
 				correctedNGroups = ceil(heap_tuples / tuplesPerPrevGroup);
 			}
 			else
@@ -2024,17 +2028,19 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		i++;
 
 		/*
-		 * Real-world distribution isn't uniform but now we don't have a way to
-		 * determine that, so, add multiplier to get closer to worst case.
-		 * But ensure the number of tuples does not exceed the group size in the
-		 * preceding step.
+		 * Uniform distributions with all groups being of the same size are the
+		 * best case, with nice smooth behavior. Real-world distributions tend
+		 * not to be uniform, though, and we don’t have any reliable easy-to-use
+		 * information. As a basic defense against skewed distributions, we use
+		 * a 1.5 factor to make the expected group a bit larger, but we need to
+		 * be careful not to make the group larger than in the preceding step.
 		 */
 		tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
 								 ceil(1.5 * tuplesPerPrevGroup / nGroups));
 
 		/*
-		 * We could skip all following columns for cost estimation, because we
-		 * believe that tuples are unique by the set of previous columns
+		 * Once we get single-row group, it means tuples in the group are unique
+		 * and we can skip all remaining columns.
 		 */
 		if (tuplesPerPrevGroup <= 1.0)
 			break;
@@ -2049,12 +2055,12 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E.
 	 * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation
 	 * formula has additional term proportional to number of tuples (See Chapter
-	 * 8.2 and Theorem 4.1). It has meaning with low number of tuples,
-	 * approximately less that 1e4. Of course, it could be implemented as
-	 * additional multiplier under logarithm, but use more complicated formula
-	 * which takes into account number of unique tuples and it isn't clear how
-	 * to combine multiplier with groups. Estimate it as 10 in cpu_operator_cost
-	 * unit.
+	 * 8.2 and Theorem 4.1). That affects  cases with a low number of tuples,
+	 * approximately less than 1e4. We could implement it as an additional
+	 * multiplier under the logarithm, but we use a bit more complex formula
+	 * which takes into account the number of unique tuples and it’s not clear
+	 * how to combine the multiplier with the number of groups. Estimate it as
+	 * 10 in cpu_operator_cost unit.
 	 */
 	per_tuple_cost += 10 * cpu_operator_cost;
 
@@ -2110,7 +2116,7 @@ cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
  * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
- * be added by caller later.
+ * be added by caller later
  */
 static void
 cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 5487ae2ee4c..258302840f8 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -685,9 +685,9 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 				/*
 				 * Match!
 				 *
-				 * Copy sortref if it wasn't set yet, it's possible if ec was
-				 * constructed from WHERE clause, ie it doesn't have target
-				 * reference at all
+				 * Copy the sortref if it wasn't set yet. That may happen if the
+				 * ec was constructed from WHERE clause, i.e. it doesn't have a
+				 * target reference at all.
 				 */
 				if (cur_ec->ec_sortref == 0 && sortref > 0)
 					cur_ec->ec_sortref = sortref;
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index fcc99f0fb1c..7913d62625d 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -362,12 +362,12 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 
 	/*
 	 * Walk the pathkeys (determining ordering of the input path) and see if
-	 * there's a matching GROUP BY key. If we find one, we append if to the
+	 * there's a matching GROUP BY key. If we find one, we append it to the
 	 * list, and do the same for the clauses.
 	 *
-	 * Once we find first pathkey without a matching GROUP BY key, the rest of
-	 * the pathkeys is useless and can't be used to evaluate the grouping, so
-	 * we abort the loop and ignore the remaining pathkeys.
+	 * Once we find the first pathkey without a matching GROUP BY key, the rest
+	 * of the pathkeys are useless and can't be used to evaluate the grouping,
+	 * so we abort the loop and ignore the remaining pathkeys.
 	 *
 	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
 	 */
@@ -391,8 +391,6 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	/* remember the number of pathkeys with a matching GROUP BY key */
 	n = list_length(new_group_pathkeys);
 
-	/* XXX maybe return when (n == 0) */
-
 	/* append the remaining group pathkeys (will be treated as not sorted) */
 	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
 											 *group_pathkeys);
@@ -530,7 +528,7 @@ PathkeyMutatorNext(PathkeyMutatorState *state)
 	}
 
 	/* update the list cells to point to the right elements */
-	for(i=0; i<state->mutatorNColumns; i++)
+	for(i = 0; i < state->mutatorNColumns; i++)
 		lfirst(state->elemCells[i]) =
 			(void *) state->elems[ state->positions[i] - 1 ];
 
@@ -597,18 +595,18 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
 
 	/*
 	 * We could exhaustively cost all possible orderings of the pathkeys, but for
-	 * large number of pathkeys that might be prohibitively expensive. So we try
-	 * to apply a simple cheap heuristics first - we sort the pathkeys by sort
-	 * cost (as if the pathkey was sorted independently) and then check only the
-	 * four cheapest pathkeys. The remaining pathkeys are kept ordered by cost.
+	 * a large number of pathkeys it might be prohibitively expensive. So we try
+	 * to apply simple cheap heuristics first - we sort the pathkeys by sort cost
+	 * (as if the pathkey was sorted independently) and then check only the four
+	 * cheapest pathkeys. The remaining pathkeys are kept ordered by cost.
 	 *
-	 * XXX This is a very simple heuristics, and likely to work fine for most
-	 * cases (because number of GROUP BY clauses tends to be lower than 4). But
-	 * it ignores how the number of distinct values in each pathkey affects the
-	 * following sorts. It may be better to use "more expensive" pathkey first
-	 * if it has many distinct values, because it then limits the number of
-	 * comparisons for the remaining pathkeys. But evaluating that is kinda the
-	 * expensive bit we're trying to not do.
+	 * XXX This is a very simple heuristics, but likely to work fine for most
+	 * cases (because the number of GROUP BY clauses tends to be lower than 4).
+	 * But it ignores how the number of distinct values in each pathkey affects
+	 * the following steps. It might be better to use "more expensive" pathkey
+	 * first if it has many distinct values, because it then limits the number
+	 * of comparisons for the remaining pathkeys. But evaluating that is likely
+	 * quite the expensive.
 	 */
 	nFreeKeys = list_length(*group_pathkeys) - n_preordered;
 	nToPermute = 4;
@@ -649,11 +647,7 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
 	}
 	else
 	{
-		/*
-		 * Since v13 list_free() can clean list elements so for original list
-		 * not to be modified it should be copied to a new one which can then
-		 * be cleaned safely if needed.
-		 */
+		/* Copy the list, so that we can free the new list by list_free. */
 		new_group_pathkeys = list_copy(*group_pathkeys);
 		nToPermute = nFreeKeys;
 	}
@@ -667,18 +661,16 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
 	 * there's room for two dynamic programming optimizations here. Firstly, we
 	 * may pass the current "best" cost to cost_sort_estimate so that it can
 	 * "abort" if the estimated pathkeys list exceeds it. Secondly, it could pass
-	 * return information about the position when it exceeded the cost, and we
-	 * could skip all permutations with the same prefix.
+	 * the return information about the position when it exceeded the cost, and
+	 * we could skip all permutations with the same prefix.
 	 *
 	 * Imagine we've already found ordering with cost C1, and we're evaluating
 	 * another ordering - cost_sort_estimate() calculates cost by adding the
 	 * pathkeys one by one (more or less), and the cost only grows. If at any
 	 * point it exceeds C1, it can't possibly be "better" so we can discard it.
 	 * But we also know that we can discard all ordering with the same prefix,
-	 * because if we're estimating (a,b,c,d) and we exceeded C1 at (a,b) then
-	 * the same thing will happen for any ordering with this prefix.
-	 *
-	 *
+	 * because if we're estimating (a,b,c,d) and we exceed C1 at (a,b) then the
+	 * same thing will happen for any ordering with this prefix.
 	 */
 	PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
 
@@ -721,7 +713,7 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
  *		Determine which orderings of GROUP BY keys are potentially interesting.
  *
  * Returns list of PathKeyInfo items, each representing an interesting ordering
- * of GROUP BY keys. Each items stores pathkeys and clauses in matching order.
+ * of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
  *
  * The function considers (and keeps) multiple group by orderings:
  *
@@ -730,15 +722,15 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
  * - GROUP BY keys reordered to minimize the sort cost
  *
  * - GROUP BY keys reordered to match path ordering (as much as possible), with
- *   the tail reoredered to minimize the sort cost
+ *   the tail reordered to minimize the sort cost
  *
  * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
- *   the tail reoredered to minimize the sort cost
+ *   the tail reordered to minimize the sort cost
  *
  * There are other potentially interesting orderings (e.g. it might be best to
  * match the first ORDER BY key, order the remaining keys differently and then
- * rely on incremental sort to fix this), but we ignore those for now. To make
- * this work we'd have to pretty much generate all possible permutations.
+ * rely on the incremental sort to fix this), but we ignore those for now. To
+ * make this work we'd have to pretty much generate all possible permutations.
  */
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
@@ -761,8 +753,9 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
 	infos = lappend(infos, info);
 
 	/*
-	 * If the optimization is disabled, we consider only the pathkey order as
-	 * specified in the query. We don't do any reorderings.
+	 * Should we try generating alternative orderings of the group keys? If not,
+	 * we produce only the order specified in the query, i.e. the optimization
+	 * is effectively disabled.
 	 */
 	if (!enable_group_by_reordering)
 		return infos;
@@ -826,9 +819,6 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
 	/*
 	 * Try reordering pathkeys to minimize the sort cost (this time consider
 	 * the ORDER BY clause, but only if set debug_group_by_match_order_by).
-	 *
-	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
-	 * info in this case.
 	 */
 	if (root->sort_pathkeys)
 	{
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 36c303e45c5..fb4fb987e7f 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3294,10 +3294,7 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 }
 
 /*
- * estimate_num_groups/estimate_num_groups_incremental
- *		- Estimate number of groups in a grouped query.
- *		  _incremental variant is performance optimization for
- *		  case of adding one-by-one column
+ * estimate_num_groups		- Estimate number of groups in a grouped query
  *
  * Given a query having a GROUP BY clause, estimate how many groups there
  * will be --- ie, the number of distinct combinations of the GROUP BY
@@ -3376,6 +3373,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 										   NULL, 0);
 }
 
+/*
+ * estimate_num_groups_incremental
+ *		An estimate_num_groups variant, optimized for cases that are adding the
+ *		expressions incrementally (e.g. one by one).
+ */
 double
 estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
 					double input_rows,
@@ -3386,7 +3388,8 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i, j;
+	int			i,
+				j;
 
 	/* Zero the estinfo output parameter, if non-NULL */
 	if (estinfo != NULL)
-- 
2.34.1

0005-fixup-minor-simplification-20220328.patchtext/x-patch; charset=UTF-8; name=0005-fixup-minor-simplification-20220328.patchDownload

From 23c94d59b410930c1a0187c1c8eb131995ba3ccc Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Tue, 29 Mar 2022 02:26:13 +0200
Subject: [PATCH 5/7] fixup: minor simplification

---
 src/backend/optimizer/path/costsize.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index bb0fe0c22ee..3e1daf8b893 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2004,17 +2004,8 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		{
 			if (heapSort)
 			{
-				double heap_tuples;
-
 				/* have to keep at least one group, and a multiple of group size */
-				/* FIXME Do we actually need the Max, here? Surely the ceil() returns
-				 * at least 1.0 here, no? */
-				heap_tuples = Max(ceil(output_tuples / tuplesPerPrevGroup) * tuplesPerPrevGroup,
-								  tuplesPerPrevGroup);
-
-				/* so how many (whole) groups is that? */
-				/* FIXME so why not just ceil(putput_tuples / tuplesPerPrevGroup)? */
-				correctedNGroups = ceil(heap_tuples / tuplesPerPrevGroup);
+				correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup);
 			}
 			else
 				/* all groups in the input */
-- 
2.34.1

0006-fixup-add-GUC-to-docs-20220328.patchtext/x-patch; charset=UTF-8; name=0006-fixup-add-GUC-to-docs-20220328.patchDownload

From 31a138efbb203caebcfe8636fd7b873c3362208e Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Tue, 29 Mar 2022 02:45:52 +0200
Subject: [PATCH 6/7] fixup: add GUC to docs

---
 doc/src/sgml/config.sgml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 05df48131d7..0c0de61b855 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4961,6 +4961,20 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-enable-groupby-reordering" xreflabel="enable_group_by_reordering">
+      <term><varname>enable_group_by_reordering</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>enable_group_by_reordering</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Enables or disables reodering of keys in <literal>GROUP BY</literal>
+        clause. The default is <literal>on</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-enable-hashagg" xreflabel="enable_hashagg">
       <term><varname>enable_hashagg</varname> (<type>boolean</type>)
       <indexterm>
-- 
2.34.1

0007-demonstrate-disabled-optimization-20220328.patchtext/x-patch; charset=UTF-8; name=0007-demonstrate-disabled-optimization-20220328.patchDownload

From 4dd50166459d6fb1c131d215cf79237dfdfd4b05 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Mon, 28 Mar 2022 23:31:29 +0200
Subject: [PATCH 7/7] demonstrate disabled optimization

tweak tests to show costing, disable optimization to show what changes
---
 .../postgres_fdw/expected/postgres_fdw.out    |   4 +-
 contrib/postgres_fdw/sql/postgres_fdw.sql     |   2 +-
 src/backend/optimizer/path/pathkeys.c         |   2 +-
 src/backend/utils/misc/guc.c                  |   2 +-
 src/test/regress/expected/aggregates.out      | 168 +++++++++---------
 src/test/regress/sql/aggregates.sql           |  26 +--
 6 files changed, 102 insertions(+), 102 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 2ffc836824a..bd826eaedd5 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2739,11 +2739,11 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 (9 rows)
 
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
-explain (verbose, costs off)
+explain (verbose, costs on)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
                                                  QUERY PLAN                                                 
 ------------------------------------------------------------------------------------------------------------
- Foreign Scan
+ Foreign Scan  (cost=110.50..132.85 rows=10 width=52)
    Output: (count(c2)), c2, 5, 7.0, 9
    Relations: Aggregate on (public.ft1)
    Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 95b6b7192e6..61b04d6f9a0 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -732,7 +732,7 @@ explain (verbose, costs off)
 select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::int order by 1;
 
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
-explain (verbose, costs off)
+explain (verbose, costs on)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 7913d62625d..9488798d445 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -32,7 +32,7 @@
 #include "utils/selfuncs.h"
 
 /* Consider reordering of GROUP BY keys? */
-bool enable_group_by_reordering = true;
+bool enable_group_by_reordering = false;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 9e8ab1420d9..ebc9904487f 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1189,7 +1189,7 @@ static struct config_bool ConfigureNamesBool[] =
 			GUC_EXPLAIN
 		},
 		&enable_group_by_reordering,
-		true,
+		false,
 		NULL, NULL, NULL
 	},
 	{
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 601047fa3dd..26ea59f015e 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1206,35 +1206,35 @@ select min(f1), max(f1) from minmaxtest;
 (1 row)
 
 -- DISTINCT doesn't do anything useful here, but it shouldn't fail
-explain (costs off)
+explain (costs on)
   select distinct min(f1), max(f1) from minmaxtest;
-                                         QUERY PLAN                                          
----------------------------------------------------------------------------------------------
- HashAggregate
+                                                            QUERY PLAN                                                             
+-----------------------------------------------------------------------------------------------------------------------------------
+ HashAggregate  (cost=1.40..1.41 rows=1 width=8)
    Group Key: $0, $1
    InitPlan 1 (returns $0)
-     ->  Limit
-           ->  Merge Append
+     ->  Limit  (cost=0.64..0.69 rows=1 width=4)
+           ->  Merge Append  (cost=0.64..394.50 rows=7865 width=4)
                  Sort Key: minmaxtest.f1
-                 ->  Index Only Scan using minmaxtesti on minmaxtest minmaxtest_1
+                 ->  Index Only Scan using minmaxtesti on minmaxtest minmaxtest_1  (cost=0.14..16.59 rows=254 width=4)
                        Index Cond: (f1 IS NOT NULL)
-                 ->  Index Only Scan using minmaxtest1i on minmaxtest1 minmaxtest_2
+                 ->  Index Only Scan using minmaxtest1i on minmaxtest1 minmaxtest_2  (cost=0.15..92.55 rows=2537 width=4)
                        Index Cond: (f1 IS NOT NULL)
-                 ->  Index Only Scan Backward using minmaxtest2i on minmaxtest2 minmaxtest_3
+                 ->  Index Only Scan Backward using minmaxtest2i on minmaxtest2 minmaxtest_3  (cost=0.15..92.55 rows=2537 width=4)
                        Index Cond: (f1 IS NOT NULL)
-                 ->  Index Only Scan using minmaxtest3i on minmaxtest3 minmaxtest_4
+                 ->  Index Only Scan using minmaxtest3i on minmaxtest3 minmaxtest_4  (cost=0.14..74.79 rows=2537 width=4)
    InitPlan 2 (returns $1)
-     ->  Limit
-           ->  Merge Append
+     ->  Limit  (cost=0.64..0.69 rows=1 width=4)
+           ->  Merge Append  (cost=0.64..394.50 rows=7865 width=4)
                  Sort Key: minmaxtest_5.f1 DESC
-                 ->  Index Only Scan Backward using minmaxtesti on minmaxtest minmaxtest_6
+                 ->  Index Only Scan Backward using minmaxtesti on minmaxtest minmaxtest_6  (cost=0.14..16.59 rows=254 width=4)
                        Index Cond: (f1 IS NOT NULL)
-                 ->  Index Only Scan Backward using minmaxtest1i on minmaxtest1 minmaxtest_7
+                 ->  Index Only Scan Backward using minmaxtest1i on minmaxtest1 minmaxtest_7  (cost=0.15..92.55 rows=2537 width=4)
                        Index Cond: (f1 IS NOT NULL)
-                 ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
+                 ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8  (cost=0.15..92.55 rows=2537 width=4)
                        Index Cond: (f1 IS NOT NULL)
-                 ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Result
+                 ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9  (cost=0.14..74.79 rows=2537 width=4)
+   ->  Result  (cost=1.38..1.39 rows=1 width=8)
 (25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
@@ -2454,7 +2454,7 @@ SELECT
 	format('%60s', i%2) AS v,
 	i/4 AS c,
 	i/8 AS d,
-	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	mod(i * 7703, 10000/8)::int as e --the same as d but no correlation with p
 	INTO btg
 FROM
 	generate_series(1, 10000) i;
@@ -2464,127 +2464,127 @@ ANALYZE btg;
 SET enable_hashagg=off;
 SET max_parallel_workers= 0;
 SET max_parallel_workers_per_gather = 0;
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY p, v;
-         QUERY PLAN          
------------------------------
- GroupAggregate
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ GroupAggregate  (cost=1240.79..1390.80 rows=5001 width=73)
    Group Key: p, v
-   ->  Sort
+   ->  Sort  (cost=1240.79..1265.79 rows=10000 width=65)
          Sort Key: p, v
-         ->  Seq Scan on btg
+         ->  Seq Scan on btg  (cost=0.00..243.00 rows=10000 width=65)
 (5 rows)
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY v, p;
-         QUERY PLAN          
------------------------------
- GroupAggregate
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ GroupAggregate  (cost=1240.79..1390.80 rows=5001 width=73)
    Group Key: p, v
-   ->  Sort
+   ->  Sort  (cost=1240.79..1265.79 rows=10000 width=65)
          Sort Key: p, v
-         ->  Seq Scan on btg
+         ->  Seq Scan on btg  (cost=0.00..243.00 rows=10000 width=65)
 (5 rows)
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY v, p, c;
-         QUERY PLAN          
------------------------------
- GroupAggregate
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ GroupAggregate  (cost=1335.43..1510.44 rows=5001 width=77)
    Group Key: p, c, v
-   ->  Sort
+   ->  Sort  (cost=1335.43..1360.43 rows=10000 width=69)
          Sort Key: p, c, v
-         ->  Seq Scan on btg
+         ->  Seq Scan on btg  (cost=0.00..243.00 rows=10000 width=69)
 (5 rows)
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
-         QUERY PLAN          
------------------------------
- GroupAggregate
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ GroupAggregate  (cost=1759.31..1934.32 rows=5001 width=77)
    Group Key: v, p, c
-   ->  Sort
+   ->  Sort  (cost=1759.31..1784.31 rows=10000 width=69)
          Sort Key: v, p, c
-         ->  Seq Scan on btg
+         ->  Seq Scan on btg  (cost=0.00..243.00 rows=10000 width=69)
 (5 rows)
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY v, p, d, c;
-          QUERY PLAN          
-------------------------------
- GroupAggregate
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ GroupAggregate  (cost=1460.43..1660.44 rows=5001 width=81)
    Group Key: p, d, c, v
-   ->  Sort
+   ->  Sort  (cost=1460.43..1485.43 rows=10000 width=73)
          Sort Key: p, d, c, v
-         ->  Seq Scan on btg
+         ->  Seq Scan on btg  (cost=0.00..243.00 rows=10000 width=73)
 (5 rows)
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
-          QUERY PLAN          
-------------------------------
- GroupAggregate
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ GroupAggregate  (cost=1893.47..2093.48 rows=5001 width=81)
    Group Key: v, p, d, c
-   ->  Sort
+   ->  Sort  (cost=1893.47..1918.47 rows=10000 width=73)
          Sort Key: v, p, d, c
-         ->  Seq Scan on btg
+         ->  Seq Scan on btg  (cost=0.00..243.00 rows=10000 width=73)
 (5 rows)
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
-          QUERY PLAN          
-------------------------------
- GroupAggregate
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ GroupAggregate  (cost=1484.11..1684.12 rows=5001 width=81)
    Group Key: p, v, d, c
-   ->  Sort
+   ->  Sort  (cost=1484.11..1509.11 rows=10000 width=73)
          Sort Key: p, v, d, c
-         ->  Seq Scan on btg
+         ->  Seq Scan on btg  (cost=0.00..243.00 rows=10000 width=73)
 (5 rows)
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY p, d, e;
-         QUERY PLAN          
------------------------------
- GroupAggregate
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ GroupAggregate  (cost=1326.27..1501.28 rows=5001 width=20)
    Group Key: p, d, e
-   ->  Sort
+   ->  Sort  (cost=1326.27..1351.27 rows=10000 width=12)
          Sort Key: p, d, e
-         ->  Seq Scan on btg
+         ->  Seq Scan on btg  (cost=0.00..243.00 rows=10000 width=12)
 (5 rows)
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY p, e, d;
-         QUERY PLAN          
------------------------------
- GroupAggregate
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ GroupAggregate  (cost=1326.27..1501.28 rows=5001 width=20)
    Group Key: p, e, d
-   ->  Sort
+   ->  Sort  (cost=1326.27..1351.27 rows=10000 width=12)
          Sort Key: p, e, d
-         ->  Seq Scan on btg
+         ->  Seq Scan on btg  (cost=0.00..243.00 rows=10000 width=12)
 (5 rows)
 
 CREATE STATISTICS btg_dep ON d, e, p FROM btg;
 ANALYZE btg;
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY p, d, e;
-         QUERY PLAN          
------------------------------
- GroupAggregate
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ GroupAggregate  (cost=1326.27..1551.27 rows=10000 width=20)
    Group Key: p, d, e
-   ->  Sort
+   ->  Sort  (cost=1326.27..1351.27 rows=10000 width=12)
          Sort Key: p, d, e
-         ->  Seq Scan on btg
+         ->  Seq Scan on btg  (cost=0.00..243.00 rows=10000 width=12)
 (5 rows)
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY p, e, d;
-         QUERY PLAN          
------------------------------
- GroupAggregate
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ GroupAggregate  (cost=1326.27..1551.27 rows=10000 width=20)
    Group Key: p, e, d
-   ->  Sort
+   ->  Sort  (cost=1326.27..1351.27 rows=10000 width=12)
          Sort Key: p, e, d
-         ->  Seq Scan on btg
+         ->  Seq Scan on btg  (cost=0.00..243.00 rows=10000 width=12)
 (5 rows)
 
 -- GROUP BY optimization by reorder columns by index scan
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index c6e0d7ba2b7..671a9daa696 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -425,7 +425,7 @@ explain (costs off)
 select min(f1), max(f1) from minmaxtest;
 
 -- DISTINCT doesn't do anything useful here, but it shouldn't fail
-explain (costs off)
+explain (costs on)
   select distinct min(f1), max(f1) from minmaxtest;
 select distinct min(f1), max(f1) from minmaxtest;
 
@@ -1033,7 +1033,7 @@ SELECT
 	format('%60s', i%2) AS v,
 	i/4 AS c,
 	i/8 AS d,
-	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	mod(i * 7703, 10000/8)::int as e --the same as d but no correlation with p
 	INTO btg
 FROM
 	generate_series(1, 10000) i;
@@ -1047,40 +1047,40 @@ SET enable_hashagg=off;
 SET max_parallel_workers= 0;
 SET max_parallel_workers_per_gather = 0;
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY p, v;
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY v, p;
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY v, p, c;
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY v, p, d, c;
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY p, d, e;
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY p, e, d;
 
 CREATE STATISTICS btg_dep ON d, e, p FROM btg;
 ANALYZE btg;
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY p, d, e;
 
-EXPLAIN (COSTS off)
+EXPLAIN (COSTS on)
 SELECT count(*) FROM btg GROUP BY p, e, d;
 
 
-- 
2.34.1

#69

Zhihong Yu

zyu@yugabyte.com

almost 4 years ago

In reply to: Tomas Vondra (#68)

Re: POC: GROUP BY optimization

On Mon, Mar 28, 2022 at 5:49 PM Tomas Vondra <tomas.vondra@enterprisedb.com>
wrote:

Hi,

Here's a rebased/improved version of the patch, with smaller parts
addressing various issues. There are seven parts:

0001 - main part, just rebased

0002 - replace the debug GUC options with a single GUC to disable the
optimization if needed

0003 - minor code cleanup, removal of unnecessary variable

0004 - various comment fixes (rewordings, typos, ...)

0005 - a minor code simplification, addressing FIXMEs from 0004

0006 - adds the new GUC to the docs

0007 - demonstrates plan changes with a disabled optimization

The first 6 parts should be squashed and committed at one, I only kept
them separate for clarity. The 0007 is merely a demonstration of the new
GUC and that it disables the optimization.

Agree. Because it is a kind of automation we should allow user to switch
it off in the case of problems or manual tuning.

Also, I looked through this patch. It has some minor problems:

1. Multiple typos in the patch comment.

I went through the comments and checked all of them for grammar mistakes
and typos using a word processor, so hopefully that should be OK. But
maybe there's still something wrong.

2. The term 'cardinality of a key' - may be replace with 'number of
duplicates'?

No, cardinality means "number of distinct values", so "duplicates" would
be wrong. And I think "cardinality" is well established term, so I think
it's fine.

BTW I named the GUC enable_group_by_reordering, I wonder if it should be
named differently, e.g. enable_groupby_reordering? Opinions?

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

Hi,

For 0001-Optimize-order-of-GROUP-BY-keys-20220328.patch:

multiple parametes need to be

parametes -> parameters

leave more expensive comparions

comparions -> comparisons

+ if (has_fake_var == false)

The above can be written as:

if (!has_fake_var)

+ nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) /
list_length(pathkeys));

Looks like the value of tuples doesn't change inside the loop.
You can precompute sqrt(tuples) outside the loop and store the value in a
variable.

+       return -1;
+   else if (a->cost == b->cost)
+       return 0;
+   return 1;

the keyword 'else' is not needed.

+ * Returns newly allocated lists. If no reordering is possible (or needed),
+ * the lists are set to NIL.
+ */
+static bool
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,

It seems the comment for return value doesn't match the bool return type.

+   /* If this optimization is disabled, we're done. */
+   if (!debug_cheapest_group_by)

It seems enable_cheapest_group_by would be better name for the flag.

Cheers

#70

Tomas Vondra

tomas.vondra@enterprisedb.com

almost 4 years ago

In reply to: Zhihong Yu (#69)

1 attachment(s)

Re: POC: GROUP BY optimization

On 3/29/22 04:02, Zhihong Yu wrote:

On Mon, Mar 28, 2022 at 5:49 PM Tomas Vondra
<tomas.vondra@enterprisedb.com <mailto:tomas.vondra@enterprisedb.com>>
wrote:

Hi,

Here's a rebased/improved version of the patch, with smaller parts
addressing various issues. There are seven parts:

0001 - main part, just rebased

0002 - replace the debug GUC options with a single GUC to disable the
optimization if needed

0003 - minor code cleanup, removal of unnecessary variable

0004 - various comment fixes (rewordings, typos, ...)

0005 - a minor code simplification, addressing FIXMEs from 0004

0006 - adds the new GUC to the docs

0007 - demonstrates plan changes with a disabled optimization

The first 6 parts should be squashed and committed at one, I only kept
them separate for clarity. The 0007 is merely a demonstration of the new
GUC and that it disables the optimization.

Agree. Because it is a kind of automation we should allow user to

switch

it off in the case of problems or manual tuning.

Also, I looked through this patch. It has some minor problems:

1. Multiple typos in the patch comment.

I went through the comments and checked all of them for grammar mistakes
and typos using a word processor, so hopefully that should be OK. But
maybe there's still something wrong.

2. The term 'cardinality of a key' - may be replace with 'number of
duplicates'?

No, cardinality means "number of distinct values", so "duplicates" would
be wrong. And I think "cardinality" is well established term, so I think
it's fine.

BTW I named the GUC enable_group_by_reordering, I wonder if it should be
named differently, e.g. enable_groupby_reordering? Opinions?

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com <http://www.enterprisedb.com>
The Enterprise PostgreSQL Company

Hi,

For 0001-Optimize-order-of-GROUP-BY-keys-20220328.patch:

multiple parametes need to be

parametes -> parameters

leave more expensive comparions

comparions -> comparisons

+ if (has_fake_var == false)

The above can be written as:

if (!has_fake_var)

All of this was already fixed in one of the subsequent "fixup" patches.
Attached is a patch merging 0001-0006, which is what I proposed to get
committed.

+ nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) /
list_length(pathkeys));

Looks like the value of tuples doesn't change inside the loop.
You can precompute sqrt(tuples) outside the loop and store the value in
a variable.

IMHO it makes the formula harder to read, and the effect is not going to
be measurable - we're processing only a couple elements. If the loop was
~100 iterations or more, maybe it'd have impact.

+       return -1;
+   else if (a->cost == b->cost)
+       return 0;
+   return 1;

the keyword 'else' is not needed.

True, but this is how comparators are usually implemented.

+ * Returns newly allocated lists. If no reordering is possible (or needed),
+ * the lists are set to NIL.
+ */
+static bool
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,

It seems the comment for return value doesn't match the bool return type.

Yup, this is a valid point. I've fixed/reworded the comment a bit.

+   /* If this optimization is disabled, we're done. */
+   if (!debug_cheapest_group_by)
It seems enable_cheapest_group_by would be better name for the flag.

This was renamed to enable_order_by_reordering in one of the fixup patches.

Attached is a patch merging 0001 and all the fixup patches, i.e. the
patch I plan to commit.

There was a minor test failure - the new GUC was not added to the sample
config file, so 003_check_guc.pl was failing.

I'm not including the 0007 part, because that's meant to demonstrate
plan changes with disabled optimization, which would confuse cfbot.

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

Attachments:

0001-Optimize-order-of-GROUP-BY-keys-20220329.patchtext/x-patch; charset=UTF-8; name=0001-Optimize-order-of-GROUP-BY-keys-20220329.patchDownload

From 66835c4ae52dc61d580b84ab15004c8f46919550 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Fri, 21 Jan 2022 20:22:14 +0100
Subject: [PATCH 1/2] Optimize order of GROUP BY keys

When evaluating a query with a multi-column GROUP BY clause using sort,
the cost may be heavily dependent on the order in which the keys are
compared when building the groups. Grouping does not imply any ordering,
so we're allowed to compare the keys in arbitrary order, and a Hash Agg
leverages this. But for Group Agg, we simply compared keys in the order
as specified in the query. This commit explores alternative ordering of
the keys, trying to find a cheaper one.

To pick the optimal order of comparisons, multiple parametes need to be
considered, both local and global ones. Intuitively, it's better to
leave more expensive comparions (for complex data types etc.) at the
very end, because maybe one of the earlier comparisons will be enough to
make a decision, and the expensive comparison will be unnecessary. The
other local parameter is cardinality - the higher the cardinality of a
key, the lower the probability we'll need to compare additional keys. We
explore possible orderings by a combination of exhaustive and greedy
approach, and pick the cheapest ordering.

But there are global parameters too, in the sense that we can't fully
evaluate them at the point when creating the sort. For example, there
might be an ORDER BY clause, or another ordering-dependent operation,
higher up in the query plan. The path may be already sorted by (some
of) the keys, in which case we can either use incremental sort or even
skip the sort entirely. So we consider various additional pathkeys in
addition to the "cheapers comparion" order.

The original patch was proposed by Teodor Sigaev, and later reworked and
improved by Dmitry Dolgov. Reviews by a number of people, including me,
Andrey Lepikhov, Claudio Freire and Ibrar Ahmed.

Author: Dmitry Dolgov, Teodor Sigaev, Tomas Vondra
Reviewed-by: Tomas Vondra, Andrey Lepikhov, Claudio Freire, Ibrar Ahmed, Zhihong Yu
Discussion: https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ru
Discussion: https://postgr.es/m/CA%2Bq6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag%40mail.gmail.com
---
 .../postgres_fdw/expected/postgres_fdw.out    |  15 +-
 doc/src/sgml/config.sgml                      |  14 +
 src/backend/optimizer/path/costsize.c         | 366 +++++++++-
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 569 +++++++++++++++
 src/backend/optimizer/plan/planner.c          | 649 ++++++++++--------
 src/backend/optimizer/util/pathnode.c         |   2 +-
 src/backend/utils/adt/selfuncs.c              |  38 +-
 src/backend/utils/misc/guc.c                  |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/nodes/nodes.h                     |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/cost.h                  |   4 +-
 src/include/optimizer/paths.h                 |   7 +
 src/include/utils/selfuncs.h                  |   5 +
 src/test/regress/expected/aggregates.out      | 244 ++++++-
 .../regress/expected/incremental_sort.out     |   2 +-
 src/test/regress/expected/join.out            |  51 +-
 .../regress/expected/partition_aggregate.out  | 136 ++--
 src/test/regress/expected/partition_join.out  |  75 +-
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/expected/union.out           |  60 +-
 src/test/regress/sql/aggregates.sql           |  99 +++
 src/test/regress/sql/incremental_sort.sql     |   2 +-
 24 files changed, 1882 insertions(+), 494 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index f210f911880..2ffc836824a 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2741,16 +2741,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
- Sort
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Foreign Scan
    Output: (count(c2)), c2, 5, 7.0, 9
-   Sort Key: ft1.c2
-   ->  Foreign Scan
-         Output: (count(c2)), c2, 5, 7.0, 9
-         Relations: Aggregate on (public.ft1)
-         Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
-(7 rows)
+   Relations: Aggregate on (public.ft1)
+   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+(4 rows)
 
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
   w  | x | y |  z  
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 05df48131d7..0c0de61b855 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4961,6 +4961,20 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-enable-groupby-reordering" xreflabel="enable_group_by_reordering">
+      <term><varname>enable_group_by_reordering</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>enable_group_by_reordering</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Enables or disables reodering of keys in <literal>GROUP BY</literal>
+        clause. The default is <literal>on</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-enable-hashagg" xreflabel="enable_hashagg">
       <term><varname>enable_hashagg</varname> (<type>boolean</type>)
       <indexterm>
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1b07ea392d9..3e1daf8b893 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1755,6 +1755,322 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 									rterm->pathtarget->width);
 }
 
+/*
+ * is_fake_var
+ *		Workaround for generate_append_tlist() which generates fake Vars with
+ *		varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ *		Returns relative complexity of comparing two values based on its width.
+ * The idea behind is that the comparison becomes more expensive the longer the
+ * value is. Return value is in cpu_operator_cost units.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double	width = -1.0; /* fake value */
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresponding relation */
+	if (IsA(expr, Var))
+	{
+		Var		*var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo	*rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int	ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, try using type width instead. */
+	if (width < 0.0)
+	{
+		Node	*node = (Node*) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Values are passed as Datum type, so comparisons can't be cheaper than
+	 * comparing a Datum value.
+	 *
+	 * FIXME I find this reasoning questionable. We may pass int2, and comparing
+	 * it is probably a bit cheaper than comparing a bigint.
+	 */
+	if (width <= sizeof(Datum))
+		return 1.0;
+
+	/*
+	 * We consider the cost of a comparison not to be directly proportional to
+	 * width of the argument, because widths of the arguments could be slightly
+	 * different (we only know the average width for the whole column). So we
+	 * use log16(width) as an estimate.
+	 */
+	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ *		compute CPU cost of sort (i.e. in-memory)
+ *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, columns may have a very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algorithm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from:
+ *
+ * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
+ * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of identical
+ * tuples with value Ki, then the estimate of number of comparisons is:
+ *
+ *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ *
+ * We assume all Xi the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ *	N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparisons on ck is roughly
+ *
+ *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ *	N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column width, not just the comparator cost.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys. In this case, it will fallback to
+ * simple comparison cost estimate.
+ */
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+					  Cost comparison_cost, double tuples, double output_tuples,
+					  bool heapSort)
+{
+	Cost		per_tuple_cost = 0.0;
+	ListCell	*lc;
+	List		*pathkeyExprs = NIL;
+	double		tuplesPerPrevGroup = tuples;
+	double		totalFuncCost = 1.0;
+	bool		has_fake_var = false;
+	int			i = 0;
+	Oid			prev_datatype = InvalidOid;
+	List		*cache_varinfos = NIL;
+
+	/* fallback if pathkeys is unknown */
+	if (list_length(pathkeys) == 0)
+	{
+		/*
+		 * If we'll use a bounded heap-sort keeping just K tuples in memory, for
+		 * a total number of tuple comparisons of N log2 K; but the constant
+		 * factor is a bit higher than for quicksort. Tweak it so that the cost
+		 * curve is continuous at the crossover point.
+		 */
+		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+		/* add cost provided by caller */
+		per_tuple_cost += comparison_cost;
+
+		return per_tuple_cost * tuples;
+	}
+
+	/*
+	 * Computing total cost of sorting takes into account:
+	 * - per column comparison function cost
+	 * - we try to compute needed number of comparison per column
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			   *pathkey = (PathKey*) lfirst(lc);
+		EquivalenceMember  *em;
+		double				nGroups,
+							correctedNGroups;
+		Cost				funcCost = 1.0;
+
+		/*
+		 * We believe that equivalence members aren't very different, so, to
+		 * estimate cost we consider just the first member.
+		 */
+		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+		if (em->em_datatype != InvalidOid)
+		{
+			/* do not lookup funcCost if the data type is the same */
+			if (prev_datatype != em->em_datatype)
+			{
+				Oid			sortop;
+				QualCost	cost;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+
+				cost.startup = 0;
+				cost.per_tuple = 0;
+				add_function_cost(root, get_opcode(sortop), NULL, &cost);
+
+				/*
+				 * add_function_cost returns the product of cpu_operator_cost
+				 * and procost, but we need just procost, co undo that.
+				 */
+				funcCost = cost.per_tuple / cpu_operator_cost;
+
+				prev_datatype = em->em_datatype;
+			}
+		}
+
+		/* factor in the width of the values in this column */
+		funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+		/* now we have per-key cost, so add to the running total */
+		totalFuncCost += funcCost;
+
+		/* remember if we have found a fake Var in pathkeys */
+		has_fake_var |= is_fake_var(em->em_expr);
+		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+		/*
+		 * We need to calculate the number of comparisons for this column, which
+		 * requires knowing the group size. So we estimate the number of groups
+		 * by calling estimate_num_groups_incremental(), which estimates the
+		 * group size for "new" pathkeys.
+		 *
+		 * Note: estimate_num_groups_incremntal does not handle fake Vars, so use
+		 * a default estimate otherwise.
+		 */
+		if (!has_fake_var)
+			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+													  tuplesPerPrevGroup, NULL, NULL,
+													  &cache_varinfos,
+													  list_length(pathkeyExprs) - 1);
+		else if (tuples > 4.0)
+			/*
+			 * Use geometric mean as estimation if there are no stats.
+			 *
+			 * We don't use DEFAULT_NUM_DISTINCT here, because that’s used for
+			 * a single column, but here we’re dealing with multiple columns.
+			 */
+			nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
+		else
+			nGroups = tuples;
+
+		/*
+		 * Presorted keys are not considered in the cost above, but we still do
+		 * have to compare them in the qsort comparator. So make sure to factor
+		 * in the cost in that case.
+		 */
+		if (i >= nPresortedKeys)
+		{
+			if (heapSort)
+			{
+				/* have to keep at least one group, and a multiple of group size */
+				correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup);
+			}
+			else
+				/* all groups in the input */
+				correctedNGroups = nGroups;
+
+			correctedNGroups = Max(1.0, ceil(correctedNGroups));
+
+			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+		}
+
+		i++;
+
+		/*
+		 * Uniform distributions with all groups being of the same size are the
+		 * best case, with nice smooth behavior. Real-world distributions tend
+		 * not to be uniform, though, and we don’t have any reliable easy-to-use
+		 * information. As a basic defense against skewed distributions, we use
+		 * a 1.5 factor to make the expected group a bit larger, but we need to
+		 * be careful not to make the group larger than in the preceding step.
+		 */
+		tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
+								 ceil(1.5 * tuplesPerPrevGroup / nGroups));
+
+		/*
+		 * Once we get single-row group, it means tuples in the group are unique
+		 * and we can skip all remaining columns.
+		 */
+		if (tuplesPerPrevGroup <= 1.0)
+			break;
+	}
+
+	list_free(pathkeyExprs);
+
+	/* per_tuple_cost is in cpu_operator_cost units */
+	per_tuple_cost *= cpu_operator_cost;
+
+	/*
+	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E.
+	 * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation
+	 * formula has additional term proportional to number of tuples (See Chapter
+	 * 8.2 and Theorem 4.1). That affects  cases with a low number of tuples,
+	 * approximately less than 1e4. We could implement it as an additional
+	 * multiplier under the logarithm, but we use a bit more complex formula
+	 * which takes into account the number of unique tuples and it’s not clear
+	 * how to combine the multiplier with the number of groups. Estimate it as
+	 * 10 in cpu_operator_cost unit.
+	 */
+	per_tuple_cost += 10 * cpu_operator_cost;
+
+	per_tuple_cost += comparison_cost;
+
+	return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+				   double tuples)
+{
+	return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+								0, tuples, tuples, false);
+}
+
 /*
  * cost_tuplesort
  *	  Determines and returns the cost of sorting a relation using tuplesort,
@@ -1771,7 +2087,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * number of initial runs formed and M is the merge order used by tuplesort.c.
  * Since the average initial run should be about sort_mem, we have
  *		disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *		cpu = comparison_cost * t * log2(t)
+ * 		and cpu cost (computed by compute_cpu_sort_cost()).
  *
  * If the sort is bounded (i.e., only the first k result tuples are needed)
  * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1790,9 +2106,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * 'comparison_cost' is the extra cost per comparison, if any
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later
  */
 static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
 			   double limit_tuples)
@@ -1809,9 +2127,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
-
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
 	{
@@ -1835,12 +2150,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 		double		log_runs;
 		double		npageaccesses;
 
-		/*
-		 * CPU costs
-		 *
-		 * Assume about N log2 N comparisons
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		/* CPU costs */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 
 		/* Disk costs */
 
@@ -1856,18 +2169,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	}
 	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
 	{
-		/*
-		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
-		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		/* We'll use a bounded heap-sort keeping just K tuples in memory. */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  output_tuples, true);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 	}
 
 	/*
@@ -1900,8 +2212,8 @@ cost_incremental_sort(Path *path,
 					  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 					  double limit_tuples)
 {
-	Cost		startup_cost = 0,
-				run_cost = 0,
+	Cost		startup_cost,
+				run_cost,
 				input_run_cost = input_total_cost - input_startup_cost;
 	double		group_tuples,
 				input_groups;
@@ -1986,7 +2298,7 @@ cost_incremental_sort(Path *path,
 	 * pessimistic about incremental sort performance and increase its average
 	 * group size by half.
 	 */
-	cost_tuplesort(&group_startup_cost, &group_run_cost,
+	cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
 				   1.5 * group_tuples, width, comparison_cost, sort_mem,
 				   limit_tuples);
 
@@ -1994,7 +2306,7 @@ cost_incremental_sort(Path *path,
 	 * Startup cost of incremental sort is the startup cost of its first group
 	 * plus the cost of its input.
 	 */
-	startup_cost += group_startup_cost
+	startup_cost = group_startup_cost
 		+ input_startup_cost + group_input_run_cost;
 
 	/*
@@ -2003,7 +2315,7 @@ cost_incremental_sort(Path *path,
 	 * group, plus the total cost to process the remaining groups, plus the
 	 * remaining cost of input.
 	 */
-	run_cost += group_run_cost
+	run_cost = group_run_cost
 		+ (group_run_cost + group_startup_cost) * (input_groups - 1)
 		+ group_input_run_cost * (input_groups - 1);
 
@@ -2043,7 +2355,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	Cost		startup_cost;
 	Cost		run_cost;
 
-	cost_tuplesort(&startup_cost, &run_cost,
+	cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
 				   limit_tuples);
@@ -2141,7 +2453,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2209,7 +2521,7 @@ cost_append(AppendPath *apath)
 					 * any child.
 					 */
 					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
+							  root,
 							  pathkeys,
 							  subpath->total_cost,
 							  subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 8c6770de972..258302840f8 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -681,7 +681,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy the sortref if it wasn't set yet. That may happen if the
+				 * ec was constructed from WHERE clause, i.e. it doesn't have a
+				 * target reference at all.
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 86a35cdef17..75fe03fd04b 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,17 +17,22 @@
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
 #include "access/stratnum.h"
 #include "catalog/pg_opfamily.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -334,6 +339,517 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses)
+{
+	List	   *new_group_pathkeys= NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append it to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find the first pathkey without a matching GROUP BY key, the rest
+	 * of the pathkeys are useless and can't be used to evaluate the grouping,
+	 * so we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	*sgc;
+
+		/* abort on first mismatch */
+		if (!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+									  *group_clauses);
+
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * Used to generate all permutations of a pathkey list.
+ */
+typedef struct PathkeyMutatorState {
+	List	   *elemsList;
+	ListCell  **elemCells;
+	void	  **elems;
+	int		   *positions;
+	int			 mutatorNColumns;
+	int			 count;
+} PathkeyMutatorState;
+
+
+/*
+ * PathkeyMutatorInit
+ *		Initialize state of the permutation generator.
+ *
+ * We want to generate permutations of elements in the "elems" list. We may want
+ * to skip some number of elements at the beginning (when treating as presorted)
+ * or at the end (we only permute a limited number of group keys).
+ *
+ * The list is decomposed into elements, and we also keep pointers to individual
+ * cells. This allows us to build the permuted list quickly and cheaply, without
+ * creating any copies.
+ */
+static void
+PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
+{
+	int i;
+	int			n = end - start;
+	ListCell	*lc;
+
+	memset(state, 0, sizeof(*state));
+
+	state->mutatorNColumns = n;
+
+	state->elemsList = list_copy(elems);
+
+	state->elems = palloc(sizeof(void*) * n);
+	state->elemCells = palloc(sizeof(ListCell*) * n);
+	state->positions = palloc(sizeof(int) * n);
+
+	i = 0;
+	for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+	{
+		state->elemCells[i] = lc;
+		state->elems[i] = lfirst(lc);
+		state->positions[i] = i + 1;
+		i++;
+
+		if (i >= n)
+			break;
+	}
+}
+
+/* Swap two elements of an array. */
+static void
+PathkeyMutatorSwap(int *a, int i, int j)
+{
+  int s = a[i];
+
+  a[i] = a[j];
+  a[j] = s;
+}
+
+/*
+ * Generate the next permutation of elements.
+ */
+static bool
+PathkeyMutatorNextSet(int *a, int n)
+{
+	int j, k, l, r;
+
+	j = n - 2;
+
+	while (j >= 0 && a[j] >= a[j + 1])
+		j--;
+
+	if (j < 0)
+		return false;
+
+	k = n - 1;
+
+	while (k >= 0 && a[j] >= a[k])
+		k--;
+
+	PathkeyMutatorSwap(a, j, k);
+
+	l = j + 1;
+	r = n - 1;
+
+	while (l < r)
+		PathkeyMutatorSwap(a, l++, r--);
+
+	return true;
+}
+
+/*
+ * PathkeyMutatorNext
+ *		Generate the next permutation of list of elements.
+ *
+ * Returns the next permutation (as a list of elements) or NIL if there are no
+ * more permutations.
+ */
+static List *
+PathkeyMutatorNext(PathkeyMutatorState *state)
+{
+	int	i;
+
+	state->count++;
+
+	/* first permutation is original list */
+	if (state->count == 1)
+		return state->elemsList;
+
+	/* when there are no more permutations, return NIL */
+	if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
+	{
+		pfree(state->elems);
+		pfree(state->elemCells);
+		pfree(state->positions);
+
+		list_free(state->elemsList);
+
+		return NIL;
+	}
+
+	/* update the list cells to point to the right elements */
+	for(i = 0; i < state->mutatorNColumns; i++)
+		lfirst(state->elemCells[i]) =
+			(void *) state->elems[ state->positions[i] - 1 ];
+
+	return state->elemsList;
+}
+
+/*
+ * Cost of comparing pathkeys.
+ */
+typedef struct PathkeySortCost
+{
+	Cost		cost;
+	PathKey	   *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+	const PathkeySortCost *a = (PathkeySortCost *) _a;
+	const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+	if (a->cost < b->cost)
+		return -1;
+	else if (a->cost == b->cost)
+		return 0;
+	return 1;
+}
+
+/*
+ * get_cheapest_group_keys_order
+ *		Reorders the group pathkeys/clauses to minimize the comparison cost.
+ *
+ * Given a list of pathkeys, we try to reorder them in a way that minimizes
+ * the CPU cost of sorting. This depends mainly on the cost of comparator
+ * function (the pathkeys may use different data types) and the number of
+ * distinct values in each column (which affects the number of comparator
+ * calls for the following pathkeys).
+ *
+ * In case the input is partially sorted, only the remaining pathkeys are
+ * considered.
+ *
+ * Returns true if the keys/clauses have been reordered (or might have been),
+ * and a new list is returned through an argument. The list is a new copy
+ * and may be freed using list_free.
+ *
+ * Returns false if no reordering was possible.
+ */
+static bool
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	List		   *new_group_pathkeys = NIL,
+				   *new_group_clauses = NIL,
+				   *var_group_pathkeys;
+
+	ListCell	   *cell;
+	PathkeyMutatorState	mstate;
+	double			cheapest_sort_cost = -1.0;
+
+	int nFreeKeys;
+	int nToPermute;
+
+	/* If there are less than 2 unsorted pathkeys, we're done. */
+	if (list_length(*group_pathkeys) - n_preordered < 2)
+		return false;
+
+	/*
+	 * We could exhaustively cost all possible orderings of the pathkeys, but for
+	 * a large number of pathkeys it might be prohibitively expensive. So we try
+	 * to apply simple cheap heuristics first - we sort the pathkeys by sort cost
+	 * (as if the pathkey was sorted independently) and then check only the four
+	 * cheapest pathkeys. The remaining pathkeys are kept ordered by cost.
+	 *
+	 * XXX This is a very simple heuristics, but likely to work fine for most
+	 * cases (because the number of GROUP BY clauses tends to be lower than 4).
+	 * But it ignores how the number of distinct values in each pathkey affects
+	 * the following steps. It might be better to use "more expensive" pathkey
+	 * first if it has many distinct values, because it then limits the number
+	 * of comparisons for the remaining pathkeys. But evaluating that is likely
+	 * quite the expensive.
+	 */
+	nFreeKeys = list_length(*group_pathkeys) - n_preordered;
+	nToPermute = 4;
+	if (nFreeKeys > nToPermute)
+	{
+		int i;
+		PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
+
+		/* skip the pre-ordered pathkeys */
+		cell = list_nth_cell(*group_pathkeys, n_preordered);
+
+		/* estimate cost for sorting individual pathkeys */
+		for (i = 0; cell != NULL; i++, (cell = lnext(*group_pathkeys, cell)))
+		{
+			List *to_cost = list_make1(lfirst(cell));
+
+			Assert(i < nFreeKeys);
+
+			costs[i].pathkey = lfirst(cell);
+			costs[i].cost = cost_sort_estimate(root, to_cost, 0, nrows);
+
+			pfree(to_cost);
+		}
+
+		/* sort the pathkeys by sort cost in ascending order */
+		qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+		/*
+		 * Rebuild the list of pathkeys - first the preordered ones, then the
+		 * rest ordered by cost.
+		 */
+		new_group_pathkeys = list_truncate(list_copy(*group_pathkeys), n_preordered);
+
+		for (i = 0; i < nFreeKeys; i++)
+			new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+		pfree(costs);
+	}
+	else
+	{
+		/* Copy the list, so that we can free the new list by list_free. */
+		new_group_pathkeys = list_copy(*group_pathkeys);
+		nToPermute = nFreeKeys;
+	}
+
+	Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
+
+	/*
+	 * Generate pathkey lists with permutations of the first nToPermute pathkeys.
+	 *
+	 * XXX We simply calculate sort cost for each individual pathkey list, but
+	 * there's room for two dynamic programming optimizations here. Firstly, we
+	 * may pass the current "best" cost to cost_sort_estimate so that it can
+	 * "abort" if the estimated pathkeys list exceeds it. Secondly, it could pass
+	 * the return information about the position when it exceeded the cost, and
+	 * we could skip all permutations with the same prefix.
+	 *
+	 * Imagine we've already found ordering with cost C1, and we're evaluating
+	 * another ordering - cost_sort_estimate() calculates cost by adding the
+	 * pathkeys one by one (more or less), and the cost only grows. If at any
+	 * point it exceeds C1, it can't possibly be "better" so we can discard it.
+	 * But we also know that we can discard all ordering with the same prefix,
+	 * because if we're estimating (a,b,c,d) and we exceed C1 at (a,b) then the
+	 * same thing will happen for any ordering with this prefix.
+	 */
+	PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
+
+	while((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
+	{
+		Cost	cost;
+
+		cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+		if (cost < cheapest_sort_cost || cheapest_sort_cost < 0)
+		{
+			list_free(new_group_pathkeys);
+			new_group_pathkeys = list_copy(var_group_pathkeys);
+			cheapest_sort_cost = cost;
+		}
+	}
+
+	/* Reorder the group clauses according to the reordered pathkeys. */
+	foreach(cell, new_group_pathkeys)
+	{
+		PathKey			*pathkey = (PathKey *) lfirst(cell);
+
+		new_group_clauses = lappend(new_group_clauses,
+						get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+												*group_clauses));
+	}
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+											   *group_clauses);
+
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+
+	return true;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to minimize the sort cost
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reordered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
+ *   the tail reordered to minimize the sort cost
+ *
+ * There are other potentially interesting orderings (e.g. it might be best to
+ * match the first ORDER BY key, order the remaining keys differently and then
+ * rely on the incremental sort to fix this), but we ignore those for now. To
+ * make this work we'd have to pretty much generate all possible permutations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys,
+								List *group_pathkeys, List *group_clauses)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+	int			n_preordered = 0;
+
+	List *pathkeys = group_pathkeys;
+	List *clauses = group_clauses;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+
+	infos = lappend(infos, info);
+
+	/*
+	 * Should we try generating alternative orderings of the group keys? If not,
+	 * we produce only the order specified in the query, i.e. the optimization
+	 * is effectively disabled.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost, ignoring both the
+	 * target ordering (ORDER BY) and ordering of the input path.
+	 */
+	if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered))
+	{
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to match
+	 * as much of the ordering as possible - we get this sort for free (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefiting from incremental sort
+	 * as much as possible.
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (path_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/* reorder the tail to minimize sort cost */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause, but only if set debug_group_by_match_order_by).
+	 */
+	if (root->sort_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  &pathkeys,
+													  &clauses);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to sort_pathkeys.
+		 */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1862,6 +2378,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the odering. The ordering
+ * may not be "complete" and may require incremental sort, but that's fine. So
+ * we simply count prefix pathkeys with a matching group key, and stop once we
+ * find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordeding not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *key;
+	int		  n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey	*pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1876,6 +2440,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1911,6 +2478,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 547fda20a23..b2569c5d0c7 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6218,24 +6218,121 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
 
-			if (path == cheapest_path || is_sorted)
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest-total path if it isn't already sorted */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_original;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_path || is_sorted)
+				{
+					/* Sort the cheapest-total path if it isn't already sorted */
+					if (!is_sorted)
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+
+					/* Now decide what to stick atop it */
+					if (parse->groupingSets)
+					{
+						consider_groupingsets_paths(root, grouped_rel,
+													path, true, can_hash,
+													gd, agg_costs, dNumGroups);
+					}
+					else if (parse->hasAggs)
+					{
+						/*
+						 * We have aggregation, possibly with plain GROUP BY. Make
+						 * an AggPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_SIMPLE,
+												 info->clauses,
+												 havingQual,
+												 agg_costs,
+												 dNumGroups));
+					}
+					else if (group_clauses)
+					{
+						/*
+						 * We have GROUP BY without aggregation or grouping sets.
+						 * Make a GroupPath.
+						 */
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+					}
+					else
+					{
+						/* Other cases should have been handled above */
+						Assert(false);
+					}
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, no point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
@@ -6247,17 +6344,17 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->hasAggs)
 				{
 					/*
-					 * We have aggregation, possibly with plain GROUP BY. Make
-					 * an AggPath.
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 info->clauses ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_SIMPLE,
-											 parse->groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_costs,
 											 dNumGroups));
@@ -6265,14 +6362,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				else if (parse->groupClause)
 				{
 					/*
-					 * We have GROUP BY without aggregation or grouping sets.
-					 * Make a GroupPath.
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
 					 */
 					add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 				}
@@ -6282,79 +6379,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 					Assert(false);
 				}
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, no point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_agg_path(root,
-										 grouped_rel,
-										 path,
-										 grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_SIMPLE,
-										 parse->groupClause,
-										 havingQual,
-										 agg_costs,
-										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_group_path(root,
-										   grouped_rel,
-										   path,
-										   parse->groupClause,
-										   havingQual,
-										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
-			}
 		}
 
 		/*
@@ -6365,100 +6389,124 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
 				Path	   *path_original = path;
-				bool		is_sorted;
-				int			presorted_keys;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
+				List	   *pathkey_orderings = NIL;
 
-				/*
-				 * Insert a Sort node, if required.  But there's no point in
-				 * sorting anything but the cheapest path.
-				 */
-				if (!is_sorted)
+				List	   *group_pathkeys = root->group_pathkeys;
+				List	   *group_clauses = parse->groupClause;
+
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys,
+																	group_pathkeys,
+																	group_clauses);
+
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach (lc2, pathkey_orderings)
 				{
-					if (path != partially_grouped_rel->cheapest_total_path)
-						continue;
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				}
+					bool		is_sorted;
+					int			presorted_keys = 0;
+					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* restore the path (we replace it in the loop) */
+					path = path_original;
 
-				/*
-				 * Now we may consider incremental sort on this path, but only
-				 * when the path is not already sorted and when incremental
-				 * sort is enabled.
-				 */
-				if (is_sorted || !enable_incremental_sort)
-					continue;
+					is_sorted = pathkeys_count_contained_in(info->pathkeys,
+															path->pathkeys,
+															&presorted_keys);
 
-				/* Restore the input path (we might have added Sort on top). */
-				path = path_original;
+					/*
+					 * Insert a Sort node, if required.  But there's no point in
+					 * sorting anything but the cheapest path.
+					 */
+					if (!is_sorted)
+					{
+						if (path != partially_grouped_rel->cheapest_total_path)
+							continue;
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
 
-				/* no shared prefix, not point in building incremental sort */
-				if (presorted_keys == 0)
-					continue;
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
 
-				/*
-				 * We should have already excluded pathkeys of length 1
-				 * because then presorted_keys > 0 would imply is_sorted was
-				 * true.
-				 */
-				Assert(list_length(root->group_pathkeys) != 1);
+					/*
+					 * Now we may consider incremental sort on this path, but only
+					 * when the path is not already sorted and when incremental
+					 * sort is enabled.
+					 */
+					if (is_sorted || !enable_incremental_sort)
+						continue;
 
-				path = (Path *) create_incremental_sort_path(root,
-															 grouped_rel,
-															 path,
-															 root->group_pathkeys,
-															 presorted_keys,
-															 -1.0);
+					/* Restore the input path (we might have added Sort on top). */
+					path = path_original;
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
-							 create_agg_path(root,
-											 grouped_rel,
-											 path,
-											 grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
-											 havingQual,
-											 agg_final_costs,
-											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
-							 create_group_path(root,
-											   grouped_rel,
-											   path,
-											   parse->groupClause,
-											   havingQual,
-											   dNumGroups));
+					/* no shared prefix, not point in building incremental sort */
+					if (presorted_keys == 0)
+						continue;
+
+					/*
+					 * We should have already excluded pathkeys of length 1
+					 * because then presorted_keys > 0 would imply is_sorted was
+					 * true.
+					 */
+					Assert(list_length(root->group_pathkeys) != 1);
+
+					path = (Path *) create_incremental_sort_path(root,
+																 grouped_rel,
+																 path,
+																 info->pathkeys,
+																 presorted_keys,
+																 -1.0);
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+				}
 			}
 		}
 	}
@@ -6661,41 +6709,71 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
+			Path	   *path_save = path;
 
-			is_sorted = pathkeys_contained_in(root->group_pathkeys,
-											  path->pathkeys);
-			if (path == cheapest_total_path || is_sorted)
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				if (parse->hasAggs)
-					add_path(partially_grouped_rel, (Path *)
-							 create_agg_path(root,
-											 partially_grouped_rel,
-											 path,
-											 partially_grouped_rel->reltarget,
-											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_INITIAL_SERIAL,
-											 parse->groupClause,
-											 NIL,
-											 agg_partial_costs,
-											 dNumPartialGroups));
-				else
-					add_path(partially_grouped_rel, (Path *)
-							 create_group_path(root,
-											   partially_grouped_rel,
-											   path,
-											   parse->groupClause,
-											   NIL,
-											   dNumPartialGroups));
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_total_path || is_sorted)
+				{
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_path(partially_grouped_rel, (Path *)
+								 create_agg_path(root,
+												 partially_grouped_rel,
+												 path,
+												 partially_grouped_rel->reltarget,
+												 info->clauses ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_INITIAL_SERIAL,
+												 info->clauses,
+												 NIL,
+												 agg_partial_costs,
+												 dNumPartialGroups));
+					else
+						add_path(partially_grouped_rel, (Path *)
+								 create_group_path(root,
+												   partially_grouped_rel,
+												   path,
+												   info->clauses,
+												   NIL,
+												   dNumPartialGroups));
+				}
 			}
 		}
 
@@ -6705,6 +6783,8 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 * We can also skip the entire loop when we only have a single-item
 		 * group_pathkeys because then we can't possibly have a presorted
 		 * prefix of the list without having the list be fully sorted.
+		 *
+		 * XXX Shouldn't this also consider the group-key-reordering?
 		 */
 		if (enable_incremental_sort && list_length(root->group_pathkeys) > 1)
 		{
@@ -6763,24 +6843,100 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
 			Path	   *path_original = path;
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			List	   *pathkey_orderings = NIL;
+
+			List	   *group_pathkeys = root->group_pathkeys;
+			List	   *group_clauses = parse->groupClause;
 
-			if (path == cheapest_partial_path || is_sorted)
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys,
+																group_pathkeys,
+																group_clauses);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach (lc2, pathkey_orderings)
 			{
-				/* Sort the cheapest partial path, if it isn't already */
-				if (!is_sorted)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
+				bool		is_sorted;
+				int			presorted_keys = 0;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_original;
+
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (path == cheapest_partial_path || is_sorted)
+				{
+
+					/* Sort the cheapest partial path, if it isn't already */
+					if (!is_sorted)
+					{
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_agg_path(root,
+														 partially_grouped_rel,
+														 path,
+														 partially_grouped_rel->reltarget,
+														 info->clauses ? AGG_SORTED : AGG_PLAIN,
+														 AGGSPLIT_INITIAL_SERIAL,
+														 info->clauses,
+														 NIL,
+														 agg_partial_costs,
+														 dNumPartialPartialGroups));
+					else
+						add_partial_path(partially_grouped_rel, (Path *)
+										 create_group_path(root,
+														   partially_grouped_rel,
+														   path,
+														   info->clauses,
+														   NIL,
+														   dNumPartialPartialGroups));
+				}
+
+				/*
+				 * Now we may consider incremental sort on this path, but only
+				 * when the path is not already sorted and when incremental sort
+				 * is enabled.
+				 */
+				if (is_sorted || !enable_incremental_sort)
+					continue;
+
+				/* Restore the input path (we might have added Sort on top). */
+				path = path_original;
+
+				/* no shared prefix, not point in building incremental sort */
+				if (presorted_keys == 0)
+					continue;
+
+				/*
+				 * We should have already excluded pathkeys of length 1 because
+				 * then presorted_keys > 0 would imply is_sorted was true.
+				 */
+				Assert(list_length(root->group_pathkeys) != 1);
+
+				path = (Path *) create_incremental_sort_path(root,
+															 partially_grouped_rel,
+															 path,
+															 info->pathkeys,
+															 presorted_keys,
+															 -1.0);
 
 				if (parse->hasAggs)
 					add_partial_path(partially_grouped_rel, (Path *)
@@ -6788,9 +6944,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 													 partially_grouped_rel,
 													 path,
 													 partially_grouped_rel->reltarget,
-													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 info->clauses ? AGG_SORTED : AGG_PLAIN,
 													 AGGSPLIT_INITIAL_SERIAL,
-													 parse->groupClause,
+													 info->clauses,
 													 NIL,
 													 agg_partial_costs,
 													 dNumPartialPartialGroups));
@@ -6799,59 +6955,10 @@ create_partial_grouping_paths(PlannerInfo *root,
 									 create_group_path(root,
 													   partially_grouped_rel,
 													   path,
-													   parse->groupClause,
+													   info->clauses,
 													   NIL,
 													   dNumPartialPartialGroups));
 			}
-
-			/*
-			 * Now we may consider incremental sort on this path, but only
-			 * when the path is not already sorted and when incremental sort
-			 * is enabled.
-			 */
-			if (is_sorted || !enable_incremental_sort)
-				continue;
-
-			/* Restore the input path (we might have added Sort on top). */
-			path = path_original;
-
-			/* no shared prefix, not point in building incremental sort */
-			if (presorted_keys == 0)
-				continue;
-
-			/*
-			 * We should have already excluded pathkeys of length 1 because
-			 * then presorted_keys > 0 would imply is_sorted was true.
-			 */
-			Assert(list_length(root->group_pathkeys) != 1);
-
-			path = (Path *) create_incremental_sort_path(root,
-														 partially_grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 presorted_keys,
-														 -1.0);
-
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_agg_path(root,
-												 partially_grouped_rel,
-												 path,
-												 partially_grouped_rel->reltarget,
-												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-												 AGGSPLIT_INITIAL_SERIAL,
-												 parse->groupClause,
-												 NIL,
-												 agg_partial_costs,
-												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_group_path(root,
-												   partially_grouped_rel,
-												   path,
-												   parse->groupClause,
-												   NIL,
-												   dNumPartialPartialGroups));
 		}
 	}
 
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 99df76b6b71..1670e546440 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1342,7 +1342,7 @@ create_append_path(PlannerInfo *root,
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 1fbb0b28c3b..fb4fb987e7f 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3368,11 +3368,28 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset, EstimationInfo *estinfo)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, estinfo,
+										   NULL, 0);
+}
+
+/*
+ * estimate_num_groups_incremental
+ *		An estimate_num_groups variant, optimized for cases that are adding the
+ *		expressions incrementally (e.g. one by one).
+ */
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows,
+					List **pgset, EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i,
+				j;
 
 	/* Zero the estinfo output parameter, if non-NULL */
 	if (estinfo != NULL)
@@ -3403,7 +3420,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3412,6 +3429,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++; /* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3481,7 +3506,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3498,6 +3527,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index eb3a03b9762..9e8ab1420d9 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1182,6 +1182,16 @@ static struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("enable reordering of GROUP BY key"),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b933fade8c6..93d221a37b1 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -382,6 +382,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_group_by_reordering = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index e8f30367a48..e74a7c72ea6 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -271,6 +271,7 @@ typedef enum NodeTag
 	T_EquivalenceClass,
 	T_EquivalenceMember,
 	T_PathKey,
+	T_PathKeyInfo,
 	T_PathTarget,
 	T_RestrictInfo,
 	T_IndexClause,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 365000bdcdb..6cbcb67bdf1 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1070,6 +1070,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index bc12071af6e..dc7fc174114 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -114,7 +114,9 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *path);
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+							   int nPresortedKeys, double tuples);
+extern void cost_append(AppendPath *path, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 0c3a0b90c85..2ffa24aa899 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -203,6 +204,12 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+										  List **group_pathkeys,
+										  List **group_clauses);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys,
+											 List *pathkeys, List *clauses);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 8f3d73edfb2..c313a08d541 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -214,6 +214,11 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset,
 								  EstimationInfo *estinfo);
 
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+					double input_rows, List **pgset,
+					EstimationInfo *estinfo,
+					List **cache_varinfos, int prevNExprs);
+
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
 									   Selectivity *mcv_freq,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 0a23a39aa29..601047fa3dd 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1210,7 +1210,8 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+   Group Key: $0, $1
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1233,10 +1234,8 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+   ->  Result
+(25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -2448,6 +2447,241 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, d, c, v
+   ->  Sort
+         Sort Key: p, d, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Incremental Sort
+         Sort Key: p, c, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Incremental Sort
+         Sort Key: p, v, c
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, d, v
+   ->  Incremental Sort
+         Sort Key: p, c, d, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Incremental Sort
+         Sort Key: p, v, c, d
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 545e301e482..21c429226f7 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1439,7 +1439,7 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 19caebabd01..bf1a2db2cf0 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1984,8 +1984,8 @@ USING (name);
 ------+----+----
  bb   | 12 | 13
  cc   | 22 | 23
- dd   |    | 33
  ee   | 42 |   
+ dd   |    | 33
 (4 rows)
 
 -- Cases with non-nullable expressions in subquery results;
@@ -2019,8 +2019,8 @@ NATURAL FULL JOIN
 ------+------+------+------+------
  bb   |   12 |    2 |   13 |    3
  cc   |   22 |    2 |   23 |    3
- dd   |      |      |   33 |    3
  ee   |   42 |    2 |      |     
+ dd   |      |      |   33 |    3
 (4 rows)
 
 SELECT * FROM
@@ -4618,18 +4618,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
@@ -6336,44 +6338,39 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Unique
+   ->  HashAggregate
          Output: j3.id
-         ->  Sort
+         Group Key: j3.id
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(13 rows)
+(11 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Group
+   ->  HashAggregate
          Output: j3.id
          Group Key: j3.id
-         ->  Sort
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(14 rows)
+(11 rows)
 
 drop table j1;
 drop table j2;
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index dfa4b036b52..a08a3825ff6 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -952,32 +952,30 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 --------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Gather
-         Workers Planned: 2
-         ->  Parallel Append
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml.a
-                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml.a
-                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_5.a
-                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_5.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-               ->  GroupAggregate
-                     Group Key: pagg_tab_ml_2.a
-                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-                     ->  Sort
-                           Sort Key: pagg_tab_ml_2.a
-                           ->  Append
-                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(27 rows)
+   ->  Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(25 rows)
 
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
  a  | sum  |  array_agg  | count 
@@ -996,34 +994,32 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 -- Without ORDER BY clause, to test Gather at top-most path
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                                QUERY PLAN                                 
----------------------------------------------------------------------------
- Gather
-   Workers Planned: 2
-   ->  Parallel Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-(25 rows)
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Append
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml.a
+         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml.a
+               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_2.a
+         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_2.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+   ->  GroupAggregate
+         Group Key: pagg_tab_ml_5.a
+         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+         ->  Sort
+               Sort Key: pagg_tab_ml_5.a
+               ->  Append
+                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+(23 rows)
 
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -1379,28 +1375,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
 -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
 EXPLAIN (COSTS OFF)
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize GroupAggregate
+   ->  Finalize HashAggregate
          Group Key: pagg_tab_para.y
          Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather Merge
+         ->  Gather
                Workers Planned: 2
-               ->  Sort
-                     Sort Key: pagg_tab_para.y
-                     ->  Parallel Append
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_1.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_2.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+               ->  Parallel Append
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_1.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_2.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
 
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
  y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index bb5b7c47a45..03926a84138 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -466,52 +466,41 @@ EXPLAIN (COSTS OFF)
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
   GROUP BY 1, 2 ORDER BY 1, 2;
-                                                   QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Group
    Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Merge Append
+   ->  Sort
          Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Group
-               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
-                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1.a, prt1.b
-                                 ->  Seq Scan on prt1_p1 prt1
-                           ->  Sort
-                                 Sort Key: p2.a, p2.b
-                                 ->  Seq Scan on prt2_p1 p2
-         ->  Group
-               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_1.a, prt1_1.b
-                                 ->  Seq Scan on prt1_p2 prt1_1
-                           ->  Sort
-                                 Sort Key: p2_1.a, p2_1.b
-                                 ->  Seq Scan on prt2_p2 p2_1
-         ->  Group
-               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_2.a, prt1_2.b
-                                 ->  Seq Scan on prt1_p3 prt1_2
-                           ->  Sort
-                                 Sort Key: p2_2.a, p2_2.b
-                                 ->  Seq Scan on prt2_p3 p2_2
-(43 rows)
+         ->  Append
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_1.a, prt1_1.b
+                           ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Sort
+                           Sort Key: p2_1.a, p2_1.b
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_2.a, prt1_2.b
+                           ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Sort
+                           Sort Key: p2_2.a, p2_2.b
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a))
+                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_3.b, prt1_3.a
+                           ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Sort
+                           Sort Key: p2_3.b, p2_3.a
+                           ->  Seq Scan on prt2_p3 p2_3
+(32 rows)
 
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 579b861d84f..4e775af1758 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -131,7 +132,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(20 rows)
+(21 rows)
 
 -- Test that the pg_timezone_names and pg_timezone_abbrevs views are
 -- more-or-less working.  We can't test their contents in any great detail
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index dece7310cfe..7ac4a9380e2 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1303,24 +1303,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2;
-                        QUERY PLAN                        
-----------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: (q2 IS NOT NULL)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: (q2 IS NOT NULL)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: (q2 IS NOT NULL)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: (q2 IS NOT NULL)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
@@ -1339,24 +1337,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2;
-                       QUERY PLAN                       
---------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                    QUERY PLAN                    
+--------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: ((- q1) = q2)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: ((- q1) = q2)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: ((- q1) = q2)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: ((- q1) = q2)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 2f5d0e00f3d..c6e0d7ba2b7 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1025,6 +1025,105 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index d8768a6b54d..1de163e0395 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -213,7 +213,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 
-- 
2.34.1

#71

Tomas Vondra

tomas.vondra@enterprisedb.com

almost 4 years ago

In reply to: Tomas Vondra (#70)

Re: POC: GROUP BY optimization

Pushed, after going through the patch once more, running check-world
under valgrind, and updating the commit message.

Thanks to everyone who reviewed/tested this patch!

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#72

David Rowley

dgrowleyml@gmail.com

over 3 years ago

In reply to: Tomas Vondra (#71)

Re: POC: GROUP BY optimization

On Thu, 31 Mar 2022 at 12:19, Tomas Vondra
<tomas.vondra@enterprisedb.com> wrote:

Pushed, after going through the patch once more, running check-world
under valgrind, and updating the commit message.

I'm just in this general area of the code again today and wondered
about the header comment for the preprocess_groupclause() function.

It says:

* In principle it might be interesting to consider other orderings of the
* GROUP BY elements, which could match the sort ordering of other
* possible plans (eg an indexscan) and thereby reduce cost. We don't
* bother with that, though. Hashed grouping will frequently win anyway.

I'd say this commit makes that paragraph mostly obsolete. It's only
true now in the sense that we don't try orders that suit some index
that would provide pre-sorted results for a GroupAggregate path. The
comment leads me to believe that we don't do anything at all to find a
better order, and that's not true now.

David

#73

David Rowley

dgrowleyml@gmail.com

over 3 years ago

In reply to: Tomas Vondra (#71)

Re: POC: GROUP BY optimization

On Thu, 31 Mar 2022 at 12:19, Tomas Vondra
<tomas.vondra@enterprisedb.com> wrote:

Pushed, after going through the patch once more, running check-world
under valgrind, and updating the commit message.

I'm still working in this area and I noticed that db0d67db2 updated
some regression tests in partition_aggregate.out without any care as
to what the test was testing.

The comment above the test reads:

-- Without ORDER BY clause, to test Gather at top-most path

and you've changed the expected plan from being a parallel plan with a
Gather to being a serial plan. So it looks like the test might have
become useless.

I see that the original plan appears to come back with some
adjustments to parallel_setup_cost and parallel_tuple_cost. It seems a
bit strange to me that the changes with this patch would cause a
change of plan for this. There is only 1 GROUP BY column in the query
in question. There's no rearrangement to do with a single column GROUP
BY.

David

#74

Tomas Vondra

tomas.vondra@enterprisedb.com

over 3 years ago

In reply to: David Rowley (#73)

Re: POC: GROUP BY optimization

On 7/15/22 07:18, David Rowley wrote:

On Thu, 31 Mar 2022 at 12:19, Tomas Vondra
<tomas.vondra@enterprisedb.com> wrote:

Pushed, after going through the patch once more, running check-world
under valgrind, and updating the commit message.

I'm still working in this area and I noticed that db0d67db2 updated
some regression tests in partition_aggregate.out without any care as
to what the test was testing.

The comment above the test reads:

-- Without ORDER BY clause, to test Gather at top-most path

and you've changed the expected plan from being a parallel plan with a
Gather to being a serial plan. So it looks like the test might have
become useless.

I agree this is a mistake in db0d67db2 that makes the test useless.

I see that the original plan appears to come back with some
adjustments to parallel_setup_cost and parallel_tuple_cost. It seems a
bit strange to me that the changes with this patch would cause a
change of plan for this. There is only 1 GROUP BY column in the query
in question. There's no rearrangement to do with a single column GROUP
BY.

It might seem a bit strange, but the patch tweaked the costing a bit, so
it's not entirely unexpected. I'd bet the plan cost changed just a teeny
bit, but enough to change the cheapest plan. The costing changed for all
group counts, including a single group.

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#75

Michael Paquier

michael@paquier.xyz

over 3 years ago

In reply to: Tomas Vondra (#74)

Re: POC: GROUP BY optimization

On Fri, Jul 15, 2022 at 09:46:51PM +0200, Tomas Vondra wrote:

I agree this is a mistake in db0d67db2 that makes the test useless.

Tomas, please note that this is an open item assigned to you. Are you
planning to do something with these regression tests by beta3?
--
Michael

#76

David Rowley

dgrowleyml@gmail.com

over 3 years ago

In reply to: Michael Paquier (#75)

Re: POC: GROUP BY optimization

On Tue, 2 Aug 2022 at 22:21, Michael Paquier <michael@paquier.xyz> wrote:

On Fri, Jul 15, 2022 at 09:46:51PM +0200, Tomas Vondra wrote:

I agree this is a mistake in db0d67db2 that makes the test useless.

Tomas, please note that this is an open item assigned to you. Are you
planning to do something with these regression tests by beta3?

There's still something to do there for PG15, but 1349d2790 (just gone
in to master) made those two tests run as a parallel query again.

David

#77

Tomas Vondra

tomas.vondra@enterprisedb.com

over 3 years ago

In reply to: David Rowley (#76)

2 attachment(s)

Re: POC: GROUP BY optimization

On 8/2/22 13:14, David Rowley wrote:

On Tue, 2 Aug 2022 at 22:21, Michael Paquier <michael@paquier.xyz> wrote:

On Fri, Jul 15, 2022 at 09:46:51PM +0200, Tomas Vondra wrote:

I agree this is a mistake in db0d67db2 that makes the test useless.

Tomas, please note that this is an open item assigned to you. Are you
planning to do something with these regression tests by beta3?

There's still something to do there for PG15, but 1349d2790 (just gone
in to master) made those two tests run as a parallel query again.

Hi,

I've been looking at this, and as I speculated before it's due to the
sort costing changing a little bit. On PG14, the costing of the plans
looks like this:

Gather (cost=1869.39..2823.15 rows=8 width=52)

and with disabled parallelism, it's like this:

Append (cost=998.04..3000.64 rows=10 width=52)

so there's a (fairly small) diffrence in favor of the parallel plan. But
on PG15 it's the other way around - the selected non-parallel one is
costed like this:

Append (cost=779.41..2490.61 rows=10 width=52)

and by setting parallel_setup_cost=0 you get this:

Gather (cost=700.34..1531.76 rows=8 width=52)

So with the setup cost included it's ~2531, and it loses to the simple
plan. This happens because the patch changed sort costing - the same
sort on PG14 and PG15 looks like this:

PG14: -> Sort (cost=998.04..1028.04 rows=12000 width=13)

PG15: -> Sort (cost=779.41..809.41 rows=12000 width=13)

As mentioned, the commit tweaked sort costing - before it was pretty
much just

comparison_cost * tuples * LOG2(tuples)

but the patch needs to cost different pathkey orderings, and consider
that maybe we don't need to compare all the keys (which the original
costing kind assumes). That's the whole point of this optimization.

The costing (compute_cpu_sort_cost) considers a couple other things,
like cost of the comparator function for the data type, width of the
values, groupings determined by preceding keys, and so on.

It might seem strange that a query with a single pathkey changes, but
that single pathkey is costed the same way, of course. In principle we
might have a special costing for this case, but I'd bet that would
result in pretty surprising inconsistencies when adding a sort key
(going from 1 to 2 keys).

So I don't think the current costing is wrong, but it certainly is more
complex. But the test does not test what it intended - I have two ideas
how to make it work:

1) increase the number of rows in the table

2) increase cpu_operator_cost (for that one test?)

3) tweak the costing somehow, to increase the cost a bit

Both (1) and (2) work - I've tried doubling the number of rows or
setting the operator cost to 0.005, and that does the trick, but maybe a
smaller change would be enough.

I don't like (3) very much - changing the costing just to get the same
test behavior as on older release does not seem very principled. Yes,
maybe it should be tweaked, but not because of a regression test.

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#78

David Rowley

dgrowleyml@gmail.com

over 3 years ago

In reply to: Tomas Vondra (#77)

2 attachment(s)

Re: POC: GROUP BY optimization

On Thu, 18 Aug 2022 at 02:46, Tomas Vondra
<tomas.vondra@enterprisedb.com> wrote:

So I don't think the current costing is wrong, but it certainly is more
complex. But the test does not test what it intended - I have two ideas
how to make it work:

1) increase the number of rows in the table

2) increase cpu_operator_cost (for that one test?)

3) tweak the costing somehow, to increase the cost a bit

Why not, 4) SET parallel_setup_cost = 0; there are plenty of other
places we do just that so we get a parallel plan without having to
generate enough cost to drown out the parallel worker startup cost.

Here are a couple of patches to demo the idea.

David

Attachments:

fix_partitionwise_aggregate_test_master.patchtext/plain; charset=US-ASCII; name=fix_partitionwise_aggregate_test_master.patchDownload

diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index 4b41ccf1aa..db36e3a150 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -942,6 +942,7 @@ INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i % 4, 'FM0000') FROM gen
 ANALYZE pagg_tab_ml;
 -- For Parallel Append
 SET max_parallel_workers_per_gather TO 2;
+SET parallel_setup_cost = 0;
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
 -- PARTITION KEY, but still we do not see a partial aggregation as array_agg()
@@ -1025,6 +1026,7 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
                            ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
 (25 rows)
 
+RESET parallel_setup_cost;
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
 -- PARTITION KEY, thus we will have a partial aggregation for them.
diff --git a/src/test/regress/sql/partition_aggregate.sql b/src/test/regress/sql/partition_aggregate.sql
index c17294b15b..ab070fee24 100644
--- a/src/test/regress/sql/partition_aggregate.sql
+++ b/src/test/regress/sql/partition_aggregate.sql
@@ -222,6 +222,7 @@ ANALYZE pagg_tab_ml;
 
 -- For Parallel Append
 SET max_parallel_workers_per_gather TO 2;
+SET parallel_setup_cost = 0;
 
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -235,6 +236,8 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
 
+RESET parallel_setup_cost;
+
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
 -- PARTITION KEY, thus we will have a partial aggregation for them.

fix_partitionwise_aggregate_test_pg15.patchtext/plain; charset=US-ASCII; name=fix_partitionwise_aggregate_test_pg15.patchDownload

diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index a08a3825ff..0dc6d63347 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -942,40 +942,43 @@ INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i % 4, 'FM0000') FROM gen
 ANALYZE pagg_tab_ml;
 -- For Parallel Append
 SET max_parallel_workers_per_gather TO 2;
+SET parallel_setup_cost = 0;
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
 -- PARTITION KEY, but still we do not see a partial aggregation as array_agg()
 -- is not partial agg safe.
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
-                                      QUERY PLAN                                      
---------------------------------------------------------------------------------------
- Sort
-   Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-(25 rows)
+                                         QUERY PLAN                                         
+--------------------------------------------------------------------------------------------
+ Gather Merge
+   Workers Planned: 2
+   ->  Sort
+         Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
+         ->  Parallel Append
+               ->  GroupAggregate
+                     Group Key: pagg_tab_ml.a
+                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+                     ->  Sort
+                           Sort Key: pagg_tab_ml.a
+                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+               ->  GroupAggregate
+                     Group Key: pagg_tab_ml_5.a
+                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+                     ->  Sort
+                           Sort Key: pagg_tab_ml_5.a
+                           ->  Append
+                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+               ->  GroupAggregate
+                     Group Key: pagg_tab_ml_2.a
+                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+                     ->  Sort
+                           Sort Key: pagg_tab_ml_2.a
+                           ->  Append
+                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+(27 rows)
 
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
  a  | sum  |  array_agg  | count 
@@ -994,33 +997,36 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 -- Without ORDER BY clause, to test Gather at top-most path
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                             QUERY PLAN                              
----------------------------------------------------------------------
- Append
-   ->  GroupAggregate
-         Group Key: pagg_tab_ml.a
-         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-         ->  Sort
-               Sort Key: pagg_tab_ml.a
-               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-   ->  GroupAggregate
-         Group Key: pagg_tab_ml_2.a
-         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-         ->  Sort
-               Sort Key: pagg_tab_ml_2.a
-               ->  Append
-                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-   ->  GroupAggregate
-         Group Key: pagg_tab_ml_5.a
-         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-         ->  Sort
-               Sort Key: pagg_tab_ml_5.a
-               ->  Append
-                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-(23 rows)
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Gather
+   Workers Planned: 2
+   ->  Parallel Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+(25 rows)
 
+RESET parallel_setup_cost;
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
 -- PARTITION KEY, thus we will have a partial aggregation for them.
diff --git a/src/test/regress/sql/partition_aggregate.sql b/src/test/regress/sql/partition_aggregate.sql
index c17294b15b..ab070fee24 100644
--- a/src/test/regress/sql/partition_aggregate.sql
+++ b/src/test/regress/sql/partition_aggregate.sql
@@ -222,6 +222,7 @@ ANALYZE pagg_tab_ml;
 
 -- For Parallel Append
 SET max_parallel_workers_per_gather TO 2;
+SET parallel_setup_cost = 0;
 
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -235,6 +236,8 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
 
+RESET parallel_setup_cost;
+
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
 -- PARTITION KEY, thus we will have a partial aggregation for them.

#79

Tomas Vondra

tomas.vondra@enterprisedb.com

over 3 years ago

In reply to: David Rowley (#78)

Re: POC: GROUP BY optimization

On 8/18/22 03:32, David Rowley wrote:

On Thu, 18 Aug 2022 at 02:46, Tomas Vondra
<tomas.vondra@enterprisedb.com> wrote:

So I don't think the current costing is wrong, but it certainly is more
complex. But the test does not test what it intended - I have two ideas
how to make it work:

1) increase the number of rows in the table

2) increase cpu_operator_cost (for that one test?)

3) tweak the costing somehow, to increase the cost a bit

Why not, 4) SET parallel_setup_cost = 0; there are plenty of other
places we do just that so we get a parallel plan without having to
generate enough cost to drown out the parallel worker startup cost.

Here are a couple of patches to demo the idea.

Yeah, that's an option too. I should have mentioned it along with the
cpu_operator_cost.

BTW would you mind taking a look at the costing? I think it's fine, but
it would be good if someone not involved in the patch takes a look.

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#80

Jonathan S. Katz

jkatz@postgresql.org

over 3 years ago

In reply to: Tomas Vondra (#79)

Re: POC: GROUP BY optimization

On 8/18/22 3:29 AM, Tomas Vondra wrote:

On 8/18/22 03:32, David Rowley wrote:

Here are a couple of patches to demo the idea.

Yeah, that's an option too. I should have mentioned it along with the
cpu_operator_cost.

BTW would you mind taking a look at the costing? I think it's fine, but
it would be good if someone not involved in the patch takes a look.

With RMT hat on, just a friendly reminder that this is still on the open
items list[1]https://wiki.postgresql.org/wiki/PostgreSQL_15_Open_Items -- we have the Beta 4 release next week and it would be
great if we can get this resolved prior to it.

Thanks!

Jonathan

[1]: https://wiki.postgresql.org/wiki/PostgreSQL_15_Open_Items
[2]: /messages/by-id/9d251aec-cea2-bc1a-5ed8-46ef0bcf6c69@postgresql.org
/messages/by-id/9d251aec-cea2-bc1a-5ed8-46ef0bcf6c69@postgresql.org

#81

Tomas Vondra

tomas.vondra@enterprisedb.com

over 3 years ago

In reply to: Jonathan S. Katz (#80)

Re: POC: GROUP BY optimization

On 8/30/22 15:15, Jonathan S. Katz wrote:

On 8/18/22 3:29 AM, Tomas Vondra wrote:

On 8/18/22 03:32, David Rowley wrote:

Here are a couple of patches to demo the idea.

Yeah, that's an option too. I should have mentioned it along with the
cpu_operator_cost.

BTW would you mind taking a look at the costing? I think it's fine, but
it would be good if someone not involved in the patch takes a look.

With RMT hat on, just a friendly reminder that this is still on the open
items list[1] -- we have the Beta 4 release next week and it would be
great if we can get this resolved prior to it.

I know. I'll fix this by tweaking one of the cost gucs, mentioned by David.

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#82

Jonathan S. Katz

jkatz@postgresql.org

over 3 years ago

In reply to: Tomas Vondra (#81)

Re: POC: GROUP BY optimization

On 9/1/22 9:06 AM, Tomas Vondra wrote:

On 8/30/22 15:15, Jonathan S. Katz wrote:

On 8/18/22 3:29 AM, Tomas Vondra wrote:

On 8/18/22 03:32, David Rowley wrote:

Here are a couple of patches to demo the idea.

Yeah, that's an option too. I should have mentioned it along with the
cpu_operator_cost.

BTW would you mind taking a look at the costing? I think it's fine, but
it would be good if someone not involved in the patch takes a look.

With RMT hat on, just a friendly reminder that this is still on the open
items list[1] -- we have the Beta 4 release next week and it would be
great if we can get this resolved prior to it.

I know. I'll fix this by tweaking one of the cost gucs, mentioned by David.

Great -- thanks for the update!

Jonathan

#83

Tomas Vondra

tomas.vondra@enterprisedb.com

over 3 years ago

In reply to: Jonathan S. Katz (#82)

Re: POC: GROUP BY optimization

On 9/1/22 16:05, Jonathan S. Katz wrote:

On 9/1/22 9:06 AM, Tomas Vondra wrote:

On 8/30/22 15:15, Jonathan S. Katz wrote:

On 8/18/22 3:29 AM, Tomas Vondra wrote:

On 8/18/22 03:32, David Rowley wrote:

Here are a couple of patches to demo the idea.

Yeah, that's an option too. I should have mentioned it along with the
cpu_operator_cost.

BTW would you mind taking a look at the costing? I think it's fine, but
it would be good if someone not involved in the patch takes a look.

With RMT hat on, just a friendly reminder that this is still on the open
items list[1] -- we have the Beta 4 release next week and it would be
great if we can get this resolved prior to it.

I know. I'll fix this by tweaking one of the cost gucs, mentioned by
David.

Great -- thanks for the update!

I've pushed the fix to 15+master. In the end I just used David's patches
that set parallel_setup_cost to 0.

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#84

Jonathan S. Katz

jkatz@postgresql.org

over 3 years ago

In reply to: Tomas Vondra (#83)

Re: POC: GROUP BY optimization

On 9/4/22 6:14 PM, Tomas Vondra wrote:

I've pushed the fix to 15+master. In the end I just used David's patches
that set parallel_setup_cost to 0.

Thanks! I have closed the open item.

Jonathan

#85

Michael Paquier

michael@paquier.xyz

over 3 years ago

In reply to: Tomas Vondra (#83)

Re: POC: GROUP BY optimization

On Mon, Sep 05, 2022 at 12:14:48AM +0200, Tomas Vondra wrote:

I've pushed the fix to 15+master. In the end I just used David's patches
that set parallel_setup_cost to 0.

Thanks, Tomas!
--
Michael

#86

David Rowley

dgrowleyml@gmail.com

over 3 years ago

In reply to: David Rowley (#72)

Re: POC: GROUP BY optimization

On Wed, 13 Jul 2022 at 15:37, David Rowley <dgrowleyml@gmail.com> wrote:

I'm just in this general area of the code again today and wondered
about the header comment for the preprocess_groupclause() function.

It says:

* In principle it might be interesting to consider other orderings of the
* GROUP BY elements, which could match the sort ordering of other
* possible plans (eg an indexscan) and thereby reduce cost. We don't
* bother with that, though. Hashed grouping will frequently win anyway.

I'd say this commit makes that paragraph mostly obsolete. It's only
true now in the sense that we don't try orders that suit some index
that would provide pre-sorted results for a GroupAggregate path. The
comment leads me to believe that we don't do anything at all to find a
better order, and that's not true now.

I've just pushed a fix for this.

David

#87

Andrey Lepikhov

a.lepikhov@postgrespro.ru

over 2 years ago

In reply to: David Rowley (#86)

1 attachment(s)

Re: POC: GROUP BY optimization

On 3/10/2022 21:56, Tom Lane wrote:

Revert "Optimize order of GROUP BY keys".

This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes.
...
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.

It may be time to restart the project. As a first step, I rebased the
patch on the current master. It wasn't trivial because of some latest
optimizations (a29eab, 1349d27 and 8d83a5d).
Now, Let's repeat the review and rewrite the current path according to
the reasons uttered in the revert commit.

--
regards,
Andrey Lepikhov
Postgres Professional

Attachments:

0001-When-evaluating-a-query-with-a-multi-column-GROUP-BY-20230720.patchtext/plain; charset=UTF-8; name=0001-When-evaluating-a-query-with-a-multi-column-GROUP-BY-20230720.patchDownload

From 913d55ee887dccfeba360f5f44ed347dd1ba9044 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Fri, 14 Jul 2023 10:29:36 +0700
Subject: [PATCH] When evaluating a query with a multi-column GROUP BY clause
 using sort, the cost may be heavily dependent on the order in which the keys
 are compared when building the groups. Grouping does not imply any ordering,
 so we're allowed to compare the keys in arbitrary order, and a Hash Agg
 leverages this. But for Group Agg, we simply compared keys in the order as
 specified in the query. This commit explores alternative ordering of the
 keys, trying to find a cheaper one.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In principle, we might generate grouping paths for all permutations of
the keys, and leave the rest to the optimizer. But that might get very
expensive, so we try to pick only a couple interesting orderings based
on both local and global information.

When planning the grouping path, we explore statistics (number of
distinct values, cost of the comparison function) for the keys and
reorder them to minimize comparison costs. Intuitively, it may be better
to perform more expensive comparisons (for complex data types etc.)
last, because maybe the cheaper comparisons will be enough. Similarly,
the higher the cardinality of a key, the lower the probability we’ll
need to compare more keys. The patch generates and costs various
orderings, picking the cheapest ones.

The ordering of group keys may interact with other parts of the query,
some of which may not be known while planning the grouping. E.g. there
may be an explicit ORDER BY clause, or some other ordering-dependent
operation, higher up in the query, and using the same ordering may allow
using either incremental sort or even eliminate the sort entirely.

The patch generates orderings and picks those minimizing the comparison
cost (for various pathkeys), and then adds orderings that might be
useful for operations higher up in the plan (ORDER BY, etc.). Finally,
it always keeps the ordering specified in the query, on the assumption
the user might have additional insights.

This introduces a new GUC enable_group_by_reordering, so that the
optimization may be disabled if needed.
---
 .../postgres_fdw/expected/postgres_fdw.out    |  15 +-
 src/backend/optimizer/path/costsize.c         | 363 ++++++++++-
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 602 ++++++++++++++++++
 src/backend/optimizer/plan/planner.c          | 467 ++++++++------
 src/backend/optimizer/util/pathnode.c         |   4 +-
 src/backend/utils/adt/selfuncs.c              |  38 +-
 src/backend/utils/misc/guc_tables.c           |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/cost.h                  |   4 +-
 src/include/optimizer/paths.h                 |   7 +
 src/include/utils/selfuncs.h                  |   5 +
 src/test/regress/expected/aggregates.out      | 244 ++++++-
 .../regress/expected/incremental_sort.out     |   2 +-
 src/test/regress/expected/join.out            |  51 +-
 src/test/regress/expected/merge.out           |  15 +-
 .../regress/expected/partition_aggregate.out  |  44 +-
 src/test/regress/expected/partition_join.out  |  75 +--
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/expected/union.out           |  60 +-
 src/test/regress/sql/aggregates.sql           |  99 +++
 src/test/regress/sql/incremental_sort.sql     |   2 +-
 23 files changed, 1760 insertions(+), 374 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 852b5b4707..3f3c181ac8 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -9593,13 +9593,16 @@ SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER J
 -- left outer join + nullable clause
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
-                                                                                                                     QUERY PLAN                                                                                                                     
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                    QUERY PLAN                                                                                     
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
    Output: t1.a, fprt2.b, fprt2.c
-   Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
-   Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10)) ORDER BY r5.a ASC NULLS LAST, r6.b ASC NULLS LAST, r6.c ASC NULLS LAST
-(4 rows)
+   Sort Key: t1.a, fprt2.b, fprt2.c
+   ->  Foreign Scan
+         Output: t1.a, fprt2.b, fprt2.c
+         Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
+         Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10))
+(7 rows)
 
 SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
  a | b |  c   
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index ef475d95a1..1e979d57f2 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1816,6 +1816,327 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 									rterm->pathtarget->width);
 }
 
+/*
+ * is_fake_var
+ *		Workaround for generate_append_tlist() which generates fake Vars with
+ *		varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ *		Returns relative complexity of comparing two values based on its width.
+ * The idea behind is that the comparison becomes more expensive the longer the
+ * value is. Return value is in cpu_operator_cost units.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double		width = -1.0;	/* fake value */
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresponding relation */
+	if (IsA(expr, Var))
+	{
+		Var		   *var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo *rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int			ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, try using type width instead. */
+	if (width < 0.0)
+	{
+		Node	   *node = (Node *) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Values are passed as Datum type, so comparisons can't be cheaper than
+	 * comparing a Datum value.
+	 *
+	 * FIXME I find this reasoning questionable. We may pass int2, and
+	 * comparing it is probably a bit cheaper than comparing a bigint.
+	 */
+	if (width <= sizeof(Datum))
+		return 1.0;
+
+	/*
+	 * We consider the cost of a comparison not to be directly proportional to
+	 * width of the argument, because widths of the arguments could be
+	 * slightly different (we only know the average width for the whole
+	 * column). So we use log16(width) as an estimate.
+	 */
+	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ *		compute CPU cost of sort (i.e. in-memory)
+ *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, columns may have a very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algorithm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from:
+ *
+ * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
+ * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of identical
+ * tuples with value Ki, then the estimate of number of comparisons is:
+ *
+ *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ *
+ * We assume all Xi the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ *	N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparisons on ck is roughly
+ *
+ *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ *	N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column width, not just the comparator cost.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys. In this case, it will fallback to
+ * simple comparison cost estimate.
+ */
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+					  Cost comparison_cost, double tuples, double output_tuples,
+					  bool heapSort)
+{
+	Cost		per_tuple_cost = 0.0;
+	ListCell   *lc;
+	List	   *pathkeyExprs = NIL;
+	double		tuplesPerPrevGroup = tuples;
+	double		totalFuncCost = 1.0;
+	bool		has_fake_var = false;
+	int			i = 0;
+	Oid			prev_datatype = InvalidOid;
+	List	   *cache_varinfos = NIL;
+
+	/* fallback if pathkeys is unknown */
+	if (list_length(pathkeys) == 0)
+	{
+		/*
+		 * If we'll use a bounded heap-sort keeping just K tuples in memory,
+		 * for a total number of tuple comparisons of N log2 K; but the
+		 * constant factor is a bit higher than for quicksort. Tweak it so
+		 * that the cost curve is continuous at the crossover point.
+		 */
+		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+		/* add cost provided by caller */
+		per_tuple_cost += comparison_cost;
+
+		return per_tuple_cost * tuples;
+	}
+
+	/*
+	 * Computing total cost of sorting takes into account the per-column
+	 * comparison function cost.  We try to compute the needed number of
+	 * comparisons per column.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(lc);
+		EquivalenceMember *em;
+		double		nGroups,
+					correctedNGroups;
+		Cost		funcCost = 1.0;
+
+		/*
+		 * We believe that equivalence members aren't very different, so, to
+		 * estimate cost we consider just the first member.
+		 */
+		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+		if (em->em_datatype != InvalidOid)
+		{
+			/* do not lookup funcCost if the data type is the same */
+			if (prev_datatype != em->em_datatype)
+			{
+				Oid			sortop;
+				QualCost	cost;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+
+				cost.startup = 0;
+				cost.per_tuple = 0;
+				add_function_cost(root, get_opcode(sortop), NULL, &cost);
+
+				/*
+				 * add_function_cost returns the product of cpu_operator_cost
+				 * and procost, but we need just procost, co undo that.
+				 */
+				funcCost = cost.per_tuple / cpu_operator_cost;
+
+				prev_datatype = em->em_datatype;
+			}
+		}
+
+		/* factor in the width of the values in this column */
+		funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+		/* now we have per-key cost, so add to the running total */
+		totalFuncCost += funcCost;
+
+		/* remember if we have found a fake Var in pathkeys */
+		has_fake_var |= is_fake_var(em->em_expr);
+		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+		/*
+		 * We need to calculate the number of comparisons for this column,
+		 * which requires knowing the group size. So we estimate the number of
+		 * groups by calling estimate_num_groups_incremental(), which
+		 * estimates the group size for "new" pathkeys.
+		 *
+		 * Note: estimate_num_groups_incremental does not handle fake Vars, so
+		 * use a default estimate otherwise.
+		 */
+		if (!has_fake_var)
+			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+													  tuplesPerPrevGroup, NULL, NULL,
+													  &cache_varinfos,
+													  list_length(pathkeyExprs) - 1);
+		else if (tuples > 4.0)
+
+			/*
+			 * Use geometric mean as estimation if there are no stats.
+			 *
+			 * We don't use DEFAULT_NUM_DISTINCT here, because that's used for
+			 * a single column, but here we're dealing with multiple columns.
+			 */
+			nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
+		else
+			nGroups = tuples;
+
+		/*
+		 * Presorted keys are not considered in the cost above, but we still
+		 * do have to compare them in the qsort comparator. So make sure to
+		 * factor in the cost in that case.
+		 */
+		if (i >= nPresortedKeys)
+		{
+			if (heapSort)
+			{
+				/*
+				 * have to keep at least one group, and a multiple of group
+				 * size
+				 */
+				correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup);
+			}
+			else
+				/* all groups in the input */
+				correctedNGroups = nGroups;
+
+			correctedNGroups = Max(1.0, ceil(correctedNGroups));
+
+			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+		}
+
+		i++;
+
+		/*
+		 * Once we get single-row group, it means tuples in the group are
+		 * unique and we can skip all remaining columns.
+		 */
+		if (tuplesPerPrevGroup <= nGroups)
+			break;
+
+		/*
+		 * Uniform distributions with all groups being of the same size are
+		 * the best case, with nice smooth behavior. Real-world distributions
+		 * tend not to be uniform, though, and we don't have any reliable
+		 * easy-to-use information. As a basic defense against skewed
+		 * distributions, we use a 1.5 factor to make the expected group a bit
+		 * larger, but we need to be careful not to make the group larger than
+		 * in the preceding step.
+		 */
+		tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
+								 ceil(1.5 * tuplesPerPrevGroup / nGroups));
+	}
+
+	list_free(pathkeyExprs);
+
+	/* per_tuple_cost is in cpu_operator_cost units */
+	per_tuple_cost *= cpu_operator_cost;
+
+	/*
+	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles
+	 * E. Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort
+	 * estimation formula has additional term proportional to number of tuples
+	 * (see Chapter 8.2 and Theorem 4.1). That affects cases with a low number
+	 * of tuples, approximately less than 1e4. We could implement it as an
+	 * additional multiplier under the logarithm, but we use a bit more
+	 * complex formula which takes into account the number of unique tuples
+	 * and it's not clear how to combine the multiplier with the number of
+	 * groups. Estimate it as 10 cpu_operator_cost units.
+	 */
+	per_tuple_cost += 10 * cpu_operator_cost;
+
+	per_tuple_cost += comparison_cost;
+
+	return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+				   double tuples)
+{
+	return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+								 0, tuples, tuples, false);
+}
+
 /*
  * cost_tuplesort
  *	  Determines and returns the cost of sorting a relation using tuplesort,
@@ -1832,7 +2153,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * number of initial runs formed and M is the merge order used by tuplesort.c.
  * Since the average initial run should be about sort_mem, we have
  *		disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *		cpu = comparison_cost * t * log2(t)
+ * 		and cpu cost (computed by compute_cpu_sort_cost()).
  *
  * If the sort is bounded (i.e., only the first k result tuples are needed)
  * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1851,9 +2172,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * 'comparison_cost' is the extra cost per comparison, if any
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later
  */
 static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
 			   double limit_tuples)
@@ -1870,9 +2193,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
-
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
 	{
@@ -1896,12 +2216,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 		double		log_runs;
 		double		npageaccesses;
 
-		/*
-		 * CPU costs
-		 *
-		 * Assume about N log2 N comparisons
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		/* CPU costs */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 
 		/* Disk costs */
 
@@ -1917,18 +2235,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	}
 	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
 	{
-		/*
-		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
-		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		/* We'll use a bounded heap-sort keeping just K tuples in memory. */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  output_tuples, true);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 	}
 
 	/*
@@ -2042,7 +2359,7 @@ cost_incremental_sort(Path *path,
 	 * Estimate the average cost of sorting of one group where presorted keys
 	 * are equal.
 	 */
-	cost_tuplesort(&group_startup_cost, &group_run_cost,
+	cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
 				   group_tuples, width, comparison_cost, sort_mem,
 				   limit_tuples);
 
@@ -2102,7 +2419,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	Cost		startup_cost;
 	Cost		run_cost;
 
-	cost_tuplesort(&startup_cost, &run_cost,
+	cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
 				   limit_tuples);
@@ -2200,7 +2517,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2268,7 +2585,7 @@ cost_append(AppendPath *apath)
 					 * any child.
 					 */
 					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
+							  root,
 							  pathkeys,
 							  subpath->total_cost,
 							  subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 7fa502d6e2..07edd4f38e 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy the sortref if it wasn't set yet. That may happen if
+				 * the ec was constructed from WHERE clause, i.e. it doesn't
+				 * have a target reference at all.
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index e53ea84224..2143b39873 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,17 +17,24 @@
  */
 #include "postgres.h"
 
+#include <float.h>
+
+#include "miscadmin.h"
 #include "access/stratnum.h"
 #include "catalog/pg_opfamily.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool		enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -350,6 +357,548 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses,
+							   int num_groupby_pathkeys)
+{
+	List	   *new_group_pathkeys = NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append it to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find the first pathkey without a matching GROUP BY key, the
+	 * rest of the pathkeys are useless and can't be used to evaluate the
+	 * grouping, so we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			   *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	   *sgc;
+
+		if (foreach_current_index(lc) >= num_groupby_pathkeys ||
+			!list_member_ptr(*group_pathkeys, pathkey))
+			/* */
+			break;
+
+		/* abort on first mismatch */
+		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+											*group_clauses);
+		if (!sgc)
+			/* The grouping clause is not cover this paathkey */
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * Used to generate all permutations of a pathkey list.
+ */
+typedef struct PathkeyMutatorState
+{
+	List	   *elemsList;
+	ListCell  **elemCells;
+	void	  **elems;
+	int		   *positions;
+	int			mutatorNColumns;
+	int			count;
+} PathkeyMutatorState;
+
+
+/*
+ * PathkeyMutatorInit
+ *		Initialize state of the permutation generator.
+ *
+ * We want to generate permutations of elements in the "elems" list. We may want
+ * to skip some number of elements at the beginning (when treating as presorted)
+ * or at the end (we only permute a limited number of group keys).
+ *
+ * The list is decomposed into elements, and we also keep pointers to individual
+ * cells. This allows us to build the permuted list quickly and cheaply, without
+ * creating any copies.
+ */
+static void
+PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
+{
+	int			i;
+	int			n = end - start;
+	ListCell   *lc;
+
+	memset(state, 0, sizeof(*state));
+
+	state->mutatorNColumns = n;
+
+	state->elemsList = list_copy(elems);
+
+	state->elems = palloc(sizeof(void *) * n);
+	state->elemCells = palloc(sizeof(ListCell *) * n);
+	state->positions = palloc(sizeof(int) * n);
+
+	i = 0;
+	for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+	{
+		state->elemCells[i] = lc;
+		state->elems[i] = lfirst(lc);
+		state->positions[i] = i + 1;
+		i++;
+
+		if (i >= n)
+			break;
+	}
+}
+
+/* Swap two elements of an array. */
+static void
+PathkeyMutatorSwap(int *a, int i, int j)
+{
+	int			s = a[i];
+
+	a[i] = a[j];
+	a[j] = s;
+}
+
+/*
+ * Generate the next permutation of elements.
+ */
+static bool
+PathkeyMutatorNextSet(int *a, int n)
+{
+	int			j,
+				k,
+				l,
+				r;
+
+	j = n - 2;
+
+	while (j >= 0 && a[j] >= a[j + 1])
+		j--;
+
+	if (j < 0)
+		return false;
+
+	k = n - 1;
+
+	while (k >= 0 && a[j] >= a[k])
+		k--;
+
+	PathkeyMutatorSwap(a, j, k);
+
+	l = j + 1;
+	r = n - 1;
+
+	while (l < r)
+		PathkeyMutatorSwap(a, l++, r--);
+
+	return true;
+}
+
+/*
+ * PathkeyMutatorNext
+ *		Generate the next permutation of list of elements.
+ *
+ * Returns the next permutation (as a list of elements) or NIL if there are no
+ * more permutations.
+ */
+static List *
+PathkeyMutatorNext(PathkeyMutatorState *state)
+{
+	int			i;
+
+	state->count++;
+
+	/* first permutation is original list */
+	if (state->count == 1)
+		return state->elemsList;
+
+	/* when there are no more permutations, return NIL */
+	if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
+	{
+		pfree(state->elems);
+		pfree(state->elemCells);
+		pfree(state->positions);
+
+		list_free(state->elemsList);
+
+		return NIL;
+	}
+
+	/* update the list cells to point to the right elements */
+	for (i = 0; i < state->mutatorNColumns; i++)
+		lfirst(state->elemCells[i]) =
+			(void *) state->elems[state->positions[i] - 1];
+
+	return state->elemsList;
+}
+
+/*
+ * Cost of comparing pathkeys.
+ */
+typedef struct PathkeySortCost
+{
+	Cost		cost;
+	PathKey    *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+	const PathkeySortCost *a = (PathkeySortCost *) _a;
+	const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+	if (a->cost < b->cost)
+		return -1;
+	else if (a->cost == b->cost)
+		return 0;
+	return 1;
+}
+
+/*
+ * get_cheapest_group_keys_order
+ *		Reorders the group pathkeys / clauses to minimize the comparison cost.
+ *
+ * Given the list of pathkeys in '*group_pathkeys', we try to arrange these
+ * in an order that minimizes the sort costs that will be incurred by the
+ * GROUP BY.  The costs mainly depend on the cost of the sort comparator
+ * function(s) and the number of distinct values in each column of the GROUP
+ * BY clause (*group_clauses).  Sorting on subsequent columns is only required
+ * for tiebreak situations where two values sort equally.
+ *
+ * In case the input is partially sorted, only the remaining pathkeys are
+ * considered.  'n_preordered' denotes how many of the leading *group_pathkeys
+ * the input is presorted by.
+ *
+ * Returns true and sets *group_pathkeys and *group_clauses to the newly
+ * ordered versions of the lists that were passed in via these parameters.
+ * If no reordering was deemed necessary then we return false, in which case
+ * the *group_pathkeys and *group_clauses lists are left untouched. The
+ * original *group_pathkeys and *group_clauses parameter values are never
+ * destructively modified in place.
+ */
+static bool
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	List				   *new_group_pathkeys = NIL;
+	List				   *new_group_clauses = NIL;
+	List				   *var_group_pathkeys;
+	ListCell			   *cell;
+	PathkeyMutatorState		mstate;
+	double					cheapest_sort_cost = DBL_MAX;
+	int						nFreeKeys;
+	int						nToPermute;
+
+	/* If there are less than 2 unsorted pathkeys, we're done. */
+	if (root->num_groupby_pathkeys - n_preordered < 2)
+		return false;
+
+	/*
+	 * We could exhaustively cost all possible orderings of the pathkeys, but
+	 * for a large number of pathkeys it might be prohibitively expensive. So
+	 * we try to apply simple cheap heuristics first - we sort the pathkeys by
+	 * sort cost (as if the pathkey was sorted independently) and then check
+	 * only the four cheapest pathkeys. The remaining pathkeys are kept
+	 * ordered by cost.
+	 *
+	 * XXX This is a very simple heuristics, but likely to work fine for most
+	 * cases (because the number of GROUP BY clauses tends to be lower than
+	 * 4). But it ignores how the number of distinct values in each pathkey
+	 * affects the following steps. It might be better to use "more expensive"
+	 * pathkey first if it has many distinct values, because it then limits
+	 * the number of comparisons for the remaining pathkeys. But evaluating
+	 * that is likely quite the expensive.
+	 */
+	nFreeKeys = root->num_groupby_pathkeys - n_preordered;
+	nToPermute = 4;
+	if (nFreeKeys > nToPermute)
+	{
+		PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
+		PathkeySortCost *cost = costs;
+		int i = 0;
+
+		/*
+		 * Estimate cost for sorting individual pathkeys skipping the
+		 * pre-ordered pathkeys.
+		 */
+		for_each_from(cell, *group_pathkeys, n_preordered)
+		{
+			PathKey    *pathkey = (PathKey *) lfirst(cell);
+			List	   *to_cost;
+
+			if (foreach_current_index(cell) >= root->num_groupby_pathkeys)
+				break;
+
+			i++;
+			to_cost = list_make1(pathkey);
+			cost->pathkey = pathkey;
+			cost->cost = cost_sort_estimate(root, to_cost, 0, nrows);
+			cost++;
+
+			list_free(to_cost);
+		}
+
+		Assert(i == nFreeKeys);
+		/* sort the pathkeys by sort cost in ascending order */
+		qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+		/*
+		 * Rebuild the list of pathkeys - first the preordered ones, then the
+		 * rest ordered by cost.
+		 */
+		new_group_pathkeys = list_copy_head(*group_pathkeys, n_preordered);
+
+		for (int i = 0; i < nFreeKeys; i++)
+			new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+		new_group_pathkeys = list_copy_tail(*group_pathkeys,
+											root->num_groupby_pathkeys);
+		pfree(costs);
+	}
+	else
+	{
+		/* Copy the list, so that we can free the new list by list_free. */
+		new_group_pathkeys = list_copy(*group_pathkeys);
+		nToPermute = nFreeKeys;
+	}
+
+	Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
+
+	/*
+	 * Generate pathkey lists with permutations of the first nToPermute
+	 * pathkeys.
+	 *
+	 * XXX We simply calculate sort cost for each individual pathkey list, but
+	 * there's room for two dynamic programming optimizations here. Firstly,
+	 * we may pass the current "best" cost to cost_sort_estimate so that it
+	 * can "abort" if the estimated pathkeys list exceeds it. Secondly, it
+	 * could pass the return information about the position when it exceeded
+	 * the cost, and we could skip all permutations with the same prefix.
+	 *
+	 * Imagine we've already found ordering with cost C1, and we're evaluating
+	 * another ordering - cost_sort_estimate() calculates cost by adding the
+	 * pathkeys one by one (more or less), and the cost only grows. If at any
+	 * point it exceeds C1, it can't possibly be "better" so we can discard
+	 * it. But we also know that we can discard all ordering with the same
+	 * prefix, because if we're estimating (a,b,c,d) and we exceed C1 at (a,b)
+	 * then the same thing will happen for any ordering with this prefix.
+	 */
+	PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered,
+					   n_preordered + nToPermute);
+
+	while ((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
+	{
+		Cost		cost;
+
+		cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+		if (cost < cheapest_sort_cost)
+		{
+			list_free(new_group_pathkeys);
+			new_group_pathkeys = list_copy(var_group_pathkeys);
+			cheapest_sort_cost = cost;
+		}
+	}
+
+	Assert(list_length(new_group_pathkeys) == list_length(root->group_pathkeys));
+
+	/* Reorder the group clauses according to the reordered pathkeys. */
+	foreach(cell, new_group_pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(cell);
+
+		if (foreach_current_index(cell) >= root->num_groupby_pathkeys)
+			break;
+
+		new_group_clauses =
+			lappend(new_group_clauses,
+					get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+											*group_clauses));
+	}
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+											   *group_clauses);
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+	Assert(list_length(new_group_clauses) == list_length(root->processed_groupClause));
+
+	return true;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to minimize the sort cost
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reordered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
+ *   the tail reordered to minimize the sort cost
+ *
+ * There are other potentially interesting orderings (e.g. it might be best to
+ * match the first ORDER BY key, order the remaining keys differently and then
+ * rely on the incremental sort to fix this), but we ignore those for now. To
+ * make this work we'd have to pretty much generate all possible permutations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+	int			n_preordered = 0;
+
+	List	   *pathkeys = root->group_pathkeys;
+	List	   *clauses = root->processed_groupClause;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+	infos = lappend(infos, info);
+
+	/*
+	 * Should we try generating alternative orderings of the group keys? If
+	 * not, we produce only the order specified in the query, i.e. the
+	 * optimization is effectively disabled.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost, ignoring both the
+	 * target ordering (ORDER BY) and ordering of the input path.
+	 */
+	if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered))
+	{
+		Assert(list_length(pathkeys) == list_length(clauses));
+
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to
+	 * match as much of the ordering as possible - we get this sort for free
+	 * (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefiting from incremental sort
+	 * as much as possible.
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (path_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
+													  &pathkeys,
+													  &clauses,
+													  root->num_groupby_pathkeys);
+
+		/* reorder the tail to minimize sort cost */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause, but only if set debug_group_by_match_order_by).
+	 */
+	if (root->sort_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  &pathkeys,
+													  &clauses,
+													  root->num_groupby_pathkeys);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to sort_pathkeys.
+		 */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1937,6 +2486,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the ordering. The
+ * ordering may not be "complete" and may require incremental sort, but that's
+ * fine. So we simply count prefix pathkeys with a matching group key, and
+ * stop once we find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordering not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell   *key;
+	int			n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1951,6 +2548,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1986,6 +2586,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 44efb1f4eb..e4d3273f80 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2810,8 +2810,9 @@ remove_useless_groupby_columns(PlannerInfo *root)
  *
  * In principle it might be interesting to consider other orderings of the
  * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost.  We don't
- * bother with that, though.  Hashed grouping will frequently win anyway.
+ * possible plans (eg an indexscan) and thereby reduce cost.  However, we
+ * don't yet have sufficient information to do that here, so that's left until
+ * later in planning.  See get_useful_group_keys_orderings().
  *
  * Note: we need no comparable processing of the distinctClause because
  * the parser already enforced that that matches ORDER BY.
@@ -3468,6 +3469,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   tlist,
 												   true,
 												   &sortable);
+
 		if (!sortable)
 		{
 			/* Can't sort; no point in considering aggregate ordering either */
@@ -6842,90 +6844,108 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
+
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (!is_sorted)
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+				bool			is_sorted;
+				int				presorted_keys;
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (!is_sorted)
+				{
+					/*
+					 * Try at least sorting the cheapest path and also try
+					 * incrementally sorting any path which is partially sorted
+					 * already (no need to deal with paths which have presorted
+					 * keys when incremental sort is disabled unless it's the
+					 * cheapest input path).
+					 */
+					if (path != cheapest_path &&
+						(presorted_keys == 0 || !enable_incremental_sort))
+						continue;
+
+					/*
+					 * We've no need to consider both a sort and incremental sort.
+					 * We'll just do a sort if there are no presorted keys and an
+					 * incremental sort when there are presorted keys.
+					 */
+					if (presorted_keys == 0 || !enable_incremental_sort)
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					else
+						path = (Path *) create_incremental_sort_path(root,
+																	 grouped_rel,
+																	 path,
+																	 info->pathkeys,
+																	 presorted_keys,
+																	 -1.0);
+				}
+
+				/* Now decide what to stick atop it */
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash,
+												gd, agg_costs, dNumGroups);
+				}
+				else if (parse->hasAggs)
+				{
+					/*
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_agg_path(root,
 										 grouped_rel,
 										 path,
 										 grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_SIMPLE,
-										 root->processed_groupClause,
+										 info->clauses,
 										 havingQual,
 										 agg_costs,
 										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				}
+				else if (parse->groupClause)
+				{
+					/*
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_group_path(root,
 										   grouped_rel,
 										   path,
-										   root->processed_groupClause,
+										   info->clauses,
 										   havingQual,
 										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
+				}
+				else
+				{
+					/* Other cases should have been handled above */
+					Assert(false);
+				}
 			}
 		}
 
@@ -6937,69 +6957,84 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
-				bool		is_sorted;
-				int			presorted_keys;
+				List	   *pathkey_orderings = NIL;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys);
 
-				if (!is_sorted)
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach(lc2, pathkey_orderings)
 				{
-					/*
-					 * Try at least sorting the cheapest path and also try
-					 * incrementally sorting any path which is partially
-					 * sorted already (no need to deal with paths which have
-					 * presorted keys when incremental sort is disabled unless
-					 * it's the cheapest input path).
-					 */
-					if (path != partially_grouped_rel->cheapest_total_path &&
-						(presorted_keys == 0 || !enable_incremental_sort))
-						continue;
+					bool			is_sorted;
+					int				presorted_keys;
+					PathKeyInfo    *info = (PathKeyInfo *) lfirst(lc2);
 
-					/*
-					 * We've no need to consider both a sort and incremental
-					 * sort.  We'll just do a sort if there are no pre-sorted
-					 * keys and an incremental sort when there are presorted
-					 * keys.
-					 */
-					if (presorted_keys == 0 || !enable_incremental_sort)
-						path = (Path *) create_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 -1.0);
-					else
-						path = (Path *) create_incremental_sort_path(root,
-																	 grouped_rel,
-																	 path,
-																	 root->group_pathkeys,
-																	 presorted_keys,
-																	 -1.0);
-				}
+					is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+															path->pathkeys,
+															&presorted_keys);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
+					if (!is_sorted)
+					{
+						/*
+						 * Try at least sorting the cheapest path and also try
+						 * incrementally sorting any path which is partially
+						 * sorted already (no need to deal with paths which have
+						 * presorted keys when incremental sort is disabled unless
+						 * it's the cheapest input path).
+						 */
+						if (path != partially_grouped_rel->cheapest_total_path &&
+							(presorted_keys == 0 || !enable_incremental_sort))
+							continue;
+
+						/*
+						 * We've no need to consider both a sort and incremental
+						 * sort.  We'll just do a sort if there are no pre-sorted
+						 * keys and an incremental sort when there are presorted
+						 * keys.
+						 */
+						if (presorted_keys == 0 || !enable_incremental_sort)
+							path = (Path *) create_sort_path(root,
+															 grouped_rel,
+															 path,
+															 info->pathkeys,
+															 -1.0);
+						else
+							path = (Path *) create_incremental_sort_path(root,
+																		 grouped_rel,
+																		 path,
+																		 info->pathkeys,
+																		 presorted_keys,
+																		 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 root->processed_groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
+					else
+						add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   root->processed_groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 
+				}
 			}
 		}
 	}
@@ -7202,66 +7237,85 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-			if (!is_sorted)
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_total_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+				bool			is_sorted;
+				int				presorted_keys;
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			if (parse->hasAggs)
-				add_path(partially_grouped_rel, (Path *)
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+				if (!is_sorted)
+				{
+					/*
+					 * Try at least sorting the cheapest path and also try
+					 * incrementally sorting any path which is partially sorted
+					 * already (no need to deal with paths which have presorted
+					 * keys when incremental sort is disabled unless it's the
+					 * cheapest input path).
+					 */
+					if (path != cheapest_total_path &&
+						(presorted_keys == 0 || !enable_incremental_sort))
+						continue;
+
+					/*
+					 * We've no need to consider both a sort and incremental sort.
+					 * We'll just do a sort if there are no presorted keys and an
+					 * incremental sort when there are presorted keys.
+					 */
+					if (presorted_keys == 0 || !enable_incremental_sort)
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					else
+						path = (Path *) create_incremental_sort_path(root,
+																	 partially_grouped_rel,
+																	 path,
+																	 info->pathkeys,
+																	 presorted_keys,
+																	 -1.0);
+				}
+
+				if (parse->hasAggs)
+					add_path(partially_grouped_rel, (Path *)
 						 create_agg_path(root,
 										 partially_grouped_rel,
 										 path,
 										 partially_grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_INITIAL_SERIAL,
-										 root->processed_groupClause,
+										 info->clauses,
 										 NIL,
 										 agg_partial_costs,
 										 dNumPartialGroups));
-			else
-				add_path(partially_grouped_rel, (Path *)
-						 create_group_path(root,
-										   partially_grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   NIL,
-										   dNumPartialGroups));
+				else
+					add_path(partially_grouped_rel, (Path *)
+							 create_group_path(root,
+											   partially_grouped_rel,
+											   path,
+											   info->clauses,
+											   NIL,
+											   dNumPartialGroups));
+			}
 		}
 	}
 
@@ -7270,67 +7324,86 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
 
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_partial_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+ 			{
+				bool		is_sorted;
+				int			presorted_keys;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (!is_sorted)
+				{
+					/*
+					 * Try at least sorting the cheapest path and also try
+					 * incrementally sorting any path which is partially sorted
+					 * already (no need to deal with paths which have presorted
+					 * keys when incremental sort is disabled unless it's the
+					 * cheapest input path).
+					 */
+					if (path != cheapest_partial_path &&
+						(presorted_keys == 0 || !enable_incremental_sort))
+						continue;
+
+					/*
+					 * We've no need to consider both a sort and incremental sort.
+					 * We'll just do a sort if there are no presorted keys and an
+					 * incremental sort when there are presorted keys.
+					 */
+					if (presorted_keys == 0 || !enable_incremental_sort)
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					else
+						path = (Path *) create_incremental_sort_path(root,
+																	 partially_grouped_rel,
+																	 path,
+																	 info->pathkeys,
+																	 presorted_keys,
+																	 -1.0);
+				}
+
+				if (parse->hasAggs)
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_agg_path(root,
 												 partially_grouped_rel,
 												 path,
 												 partially_grouped_rel->reltarget,
 												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 												 AGGSPLIT_INITIAL_SERIAL,
-												 root->processed_groupClause,
+												 info->clauses,
 												 NIL,
 												 agg_partial_costs,
 												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
+				else
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_group_path(root,
 												   partially_grouped_rel,
 												   path,
-												   root->processed_groupClause,
+												   info->clauses,
 												   NIL,
 												   dNumPartialPartialGroups));
+			}
 		}
 	}
 
@@ -7444,6 +7517,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	 * We can also skip the entire loop when we only have a single-item
 	 * group_pathkeys because then we can't possibly have a presorted prefix
 	 * of the list without having the list be fully sorted.
+	 *
+	 * XXX Shouldn't this also consider the group-key-reordering?
 	 */
 	if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
 		return;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index f123fcb41e..c72ea08447 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1346,12 +1346,12 @@ create_append_path(PlannerInfo *root,
 			pathnode->path.total_cost = child->total_cost;
 		}
 		else
-			cost_append(pathnode);
+			cost_append(pathnode, root);
 		/* Must do this last, else cost_append complains */
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index c4fcd0076e..cb7e67099e 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3386,11 +3386,28 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset, EstimationInfo *estinfo)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, estinfo,
+										   NULL, 0);
+}
+
+/*
+ * estimate_num_groups_incremental
+ *		An estimate_num_groups variant, optimized for cases that are adding the
+ *		expressions incrementally (e.g. one by one).
+ */
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+								double input_rows,
+								List **pgset, EstimationInfo *estinfo,
+								List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i,
+				j;
 
 	/* Zero the estinfo output parameter, if non-NULL */
 	if (estinfo != NULL)
@@ -3421,7 +3438,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3430,6 +3447,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++;			/* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3499,7 +3524,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3516,6 +3545,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index f9dba43b8c..1c10ab5a7d 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1025,6 +1025,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables reordering of GROUP BY keys."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index c768af9a73..671b88fc60 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -393,6 +393,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_group_by_reordering = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index c17b53f7ad..eeeafd8e76 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1456,6 +1456,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 6cf49705d3..28175d3472 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -116,7 +116,9 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *apath);
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+							   int nPresortedKeys, double tuples);
+extern void cost_append(AppendPath *path, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 50bc3b503a..e4ff16d0c4 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -205,6 +206,12 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int	group_keys_reorder_by_pathkeys(List *pathkeys,
+										   List **group_pathkeys,
+										   List **group_clauses,
+										   int num_groupby_pathkeys);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 2f76c473db..2b21c06cc9 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -214,6 +214,11 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset,
 								  EstimationInfo *estinfo);
 
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+											  double input_rows, List **pgset,
+											  EstimationInfo *estinfo,
+											  List **cache_varinfos, int prevNExprs);
+
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
 									   Selectivity *mcv_freq,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index d8271da4d1..205be28b3d 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1234,7 +1234,8 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+   Group Key: $0, $1
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1257,10 +1258,8 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+   ->  Result
+(25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -2715,6 +2714,241 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, d, v, c
+   ->  Sort
+         Sort Key: p, d, v, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Incremental Sort
+         Sort Key: p, c, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Incremental Sort
+         Sort Key: p, v, c
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, v, d
+   ->  Incremental Sort
+         Sort Key: p, c, v, d
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Incremental Sort
+         Sort Key: p, v, c, d
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 7fdb685313..b8a0968b06 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1426,7 +1426,7 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 9b8638f286..27684bfce2 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1984,8 +1984,8 @@ USING (name);
 ------+----+----
  bb   | 12 | 13
  cc   | 22 | 23
- dd   |    | 33
  ee   | 42 |   
+ dd   |    | 33
 (4 rows)
 
 -- Cases with non-nullable expressions in subquery results;
@@ -2019,8 +2019,8 @@ NATURAL FULL JOIN
 ------+------+------+------+------
  bb   |   12 |    2 |   13 |    3
  cc   |   22 |    2 |   23 |    3
- dd   |      |      |   33 |    3
  ee   |   42 |    2 |      |     
+ dd   |      |      |   33 |    3
 (4 rows)
 
 SELECT * FROM
@@ -5645,18 +5645,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
 
 -- join removal is not possible here
 explain (costs off)
@@ -7597,44 +7599,39 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Unique
+   ->  HashAggregate
          Output: j3.id
-         ->  Sort
+         Group Key: j3.id
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(13 rows)
+(11 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Group
+   ->  HashAggregate
          Output: j3.id
          Group Key: j3.id
-         ->  Sort
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(14 rows)
+(11 rows)
 
 drop table j1;
 drop table j2;
diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out
index 133d42117c..f7c4cc4833 100644
--- a/src/test/regress/expected/merge.out
+++ b/src/test/regress/expected/merge.out
@@ -1459,18 +1459,15 @@ WHEN MATCHED AND t.a < 10 THEN
                            explain_merge                            
 --------------------------------------------------------------------
  Merge on ex_mtarget t (actual rows=0 loops=1)
-   ->  Merge Join (actual rows=0 loops=1)
-         Merge Cond: (t.a = s.a)
-         ->  Sort (actual rows=0 loops=1)
-               Sort Key: t.a
-               Sort Method: quicksort  Memory: xxx
+   ->  Hash Join (actual rows=0 loops=1)
+         Hash Cond: (s.a = t.a)
+         ->  Seq Scan on ex_msource s (actual rows=1 loops=1)
+         ->  Hash (actual rows=0 loops=1)
+               Buckets: xxx  Batches: xxx  Memory Usage: xxx
                ->  Seq Scan on ex_mtarget t (actual rows=0 loops=1)
                      Filter: (a < '-1000'::integer)
                      Rows Removed by Filter: 54
-         ->  Sort (never executed)
-               Sort Key: s.a
-               ->  Seq Scan on ex_msource s (never executed)
-(12 rows)
+(9 rows)
 
 DROP TABLE ex_msource, ex_mtarget;
 DROP FUNCTION explain_merge(text);
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index 1b900fddf8..800114dfde 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -948,12 +948,12 @@ SET parallel_setup_cost = 0;
 -- is not partial agg safe.
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
-                                      QUERY PLAN                                      
---------------------------------------------------------------------------------------
- Sort
-   Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Gather
-         Workers Planned: 2
+                                         QUERY PLAN                                         
+--------------------------------------------------------------------------------------------
+ Gather Merge
+   Workers Planned: 2
+   ->  Sort
+         Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
          ->  Parallel Append
                ->  GroupAggregate
                      Group Key: pagg_tab_ml.a
@@ -1380,28 +1380,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
 -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
 EXPLAIN (COSTS OFF)
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize GroupAggregate
+   ->  Finalize HashAggregate
          Group Key: pagg_tab_para.y
          Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather Merge
+         ->  Gather
                Workers Planned: 2
-               ->  Sort
-                     Sort Key: pagg_tab_para.y
-                     ->  Parallel Append
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_1.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_2.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+               ->  Parallel Append
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_1.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_2.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
 
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
  y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 762cc8e53b..a1add421f5 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -466,52 +466,41 @@ EXPLAIN (COSTS OFF)
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
   GROUP BY 1, 2 ORDER BY 1, 2;
-                                                   QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Group
    Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Merge Append
+   ->  Sort
          Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Group
-               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
-                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1.a, prt1.b
-                                 ->  Seq Scan on prt1_p1 prt1
-                           ->  Sort
-                                 Sort Key: p2.a, p2.b
-                                 ->  Seq Scan on prt2_p1 p2
-         ->  Group
-               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_1.a, prt1_1.b
-                                 ->  Seq Scan on prt1_p2 prt1_1
-                           ->  Sort
-                                 Sort Key: p2_1.a, p2_1.b
-                                 ->  Seq Scan on prt2_p2 p2_1
-         ->  Group
-               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_2.a, prt1_2.b
-                                 ->  Seq Scan on prt1_p3 prt1_2
-                           ->  Sort
-                                 Sort Key: p2_2.a, p2_2.b
-                                 ->  Seq Scan on prt2_p3 p2_2
-(43 rows)
+         ->  Append
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_1.a, prt1_1.b
+                           ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Sort
+                           Sort Key: p2_1.a, p2_1.b
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_2.a, prt1_2.b
+                           ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Sort
+                           Sort Key: p2_2.a, p2_2.b
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_3.a = p2_3.a) AND (prt1_3.b = p2_3.b))
+                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_3.a, prt1_3.b
+                           ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Sort
+                           Sort Key: p2_3.a, p2_3.b
+                           ->  Seq Scan on prt2_p3 p2_3
+(32 rows)
 
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 001c6e7eb9..d5e9a35096 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -132,7 +133,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(21 rows)
+(22 rows)
 
 -- Test that the pg_timezone_names and pg_timezone_abbrevs views are
 -- more-or-less working.  We can't test their contents in any great detail
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index e2613d6777..220f0c1629 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1303,24 +1303,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2;
-                        QUERY PLAN                        
-----------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: (q2 IS NOT NULL)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: (q2 IS NOT NULL)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: (q2 IS NOT NULL)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: (q2 IS NOT NULL)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
@@ -1339,24 +1337,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2;
-                       QUERY PLAN                       
---------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                    QUERY PLAN                    
+--------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: ((- q1) = q2)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: ((- q1) = q2)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: ((- q1) = q2)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: ((- q1) = q2)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 75c78be640..5c4e768825 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1172,6 +1172,105 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index ab471bdfff..70f311a6f2 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -208,7 +208,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 
-- 
2.41.0

#88

Tomas Vondra

tomas.vondra@enterprisedb.com

over 2 years ago

In reply to: Andrey Lepikhov (#87)

Re: POC: GROUP BY optimization

On 7/20/23 08:37, Andrey Lepikhov wrote:

On 3/10/2022 21:56, Tom Lane wrote:

Revert "Optimize order of GROUP BY keys".

This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes.
...
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.

It may be time to restart the project. As a first step, I rebased the
patch on the current master. It wasn't trivial because of some latest
optimizations (a29eab, 1349d27 and 8d83a5d).
Now, Let's repeat the review and rewrite the current path according to
the reasons uttered in the revert commit.

I think the fundamental task is to make the costing more reliable, and
the commit message 443df6e2db points out a couple challenges in this
area. Not sure how feasible it is to address enough of them ...

1) procost = 1.0 - I guess we could make this more realistic by doing
some microbenchmarks and tuning the costs for the most expensive cases.

2) estimating quicksort comparisons - This relies on ndistinct
estimates, and I'm not sure how much more reliable we can make those.
Probably not much :-( Not sure what to do about this, the only thing I
can think of is to track "reliability" of the estimates and only do the
reordering if we have high confidence in the estimates. That means we'll
miss some optimization opportunities, but it should limit the risk.

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#89

Andrey Lepikhov

a.lepikhov@postgrespro.ru

over 2 years ago

In reply to: Tomas Vondra (#88)

Re: POC: GROUP BY optimization

On 20/7/2023 18:46, Tomas Vondra wrote:

On 7/20/23 08:37, Andrey Lepikhov wrote:

On 3/10/2022 21:56, Tom Lane wrote:

Revert "Optimize order of GROUP BY keys".

This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes.
...
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.

It may be time to restart the project. As a first step, I rebased the
patch on the current master. It wasn't trivial because of some latest
optimizations (a29eab, 1349d27 and 8d83a5d).
Now, Let's repeat the review and rewrite the current path according to
the reasons uttered in the revert commit.

I think the fundamental task is to make the costing more reliable, and
the commit message 443df6e2db points out a couple challenges in this
area. Not sure how feasible it is to address enough of them ...

1) procost = 1.0 - I guess we could make this more realistic by doing
some microbenchmarks and tuning the costs for the most expensive cases.

2) estimating quicksort comparisons - This relies on ndistinct
estimates, and I'm not sure how much more reliable we can make those.
Probably not much :-( Not sure what to do about this, the only thing I
can think of is to track "reliability" of the estimates and only do the
reordering if we have high confidence in the estimates. That means we'll
miss some optimization opportunities, but it should limit the risk.

For me personally, the most challenging issue is:
3. Imbalance, induced by the cost_sort() changes. It may increase or
decrease the contribution of sorting to the total cost compared to other
factors and change choice of sorted paths.

--
regards,
Andrey Lepikhov
Postgres Professional

#90

Andrey Lepikhov

a.lepikhov@postgrespro.ru

over 2 years ago

In reply to: Tomas Vondra (#88)

Re: POC: GROUP BY optimization

On 20/7/2023 18:46, Tomas Vondra wrote:

On 7/20/23 08:37, Andrey Lepikhov wrote:

On 3/10/2022 21:56, Tom Lane wrote:

Revert "Optimize order of GROUP BY keys".

This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes.
...
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.

It may be time to restart the project. As a first step, I rebased the
patch on the current master. It wasn't trivial because of some latest
optimizations (a29eab, 1349d27 and 8d83a5d).
Now, Let's repeat the review and rewrite the current path according to
the reasons uttered in the revert commit.

I think the fundamental task is to make the costing more reliable, and
the commit message 443df6e2db points out a couple challenges in this
area. Not sure how feasible it is to address enough of them ...

1) procost = 1.0 - I guess we could make this more realistic by doing
some microbenchmarks and tuning the costs for the most expensive cases.

2) estimating quicksort comparisons - This relies on ndistinct
estimates, and I'm not sure how much more reliable we can make those.
Probably not much :-( Not sure what to do about this, the only thing I
can think of is to track "reliability" of the estimates and only do the
reordering if we have high confidence in the estimates. That means we'll
miss some optimization opportunities, but it should limit the risk.

I read up on the history of this thread.
As I see, all the problems mentioned above can be beaten by excluding
the new cost model at all. We can sort GROUP BY columns according to the
'ndistinct' value.
I see the reason for introducing the cost model in [1]/messages/by-id/6d1e0cdb-dde3-f62a-43e2-e90bbd9b0f42@2ndquadrant.com. The main
supporting point here is that with this patch, people couldn't optimize
the query by themselves, organizing the order of the columns in a more
optimal way. But now we have at least the GUC to switch off the
behaviour introduced here. Also, some extensions, like the well-known
pg_hint_plan, can help with automation.
So, how about committing of the undoubted part of the feature and
working on the cost model in a new thread?

[1]: /messages/by-id/6d1e0cdb-dde3-f62a-43e2-e90bbd9b0f42@2ndquadrant.com
/messages/by-id/6d1e0cdb-dde3-f62a-43e2-e90bbd9b0f42@2ndquadrant.com

--
regards,
Andrey Lepikhov
Postgres Professional

#91

Tomas Vondra

tomas.vondra@enterprisedb.com

over 2 years ago

In reply to: Andrey Lepikhov (#90)

Re: POC: GROUP BY optimization

On 7/24/23 04:10, Andrey Lepikhov wrote:

On 20/7/2023 18:46, Tomas Vondra wrote:

On 7/20/23 08:37, Andrey Lepikhov wrote:

On 3/10/2022 21:56, Tom Lane wrote:

Revert "Optimize order of GROUP BY keys".

This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes.
...
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.

It may be time to restart the project. As a first step, I rebased the
patch on the current master. It wasn't trivial because of some latest
optimizations (a29eab, 1349d27 and 8d83a5d).
Now, Let's repeat the review and rewrite the current path according to
the reasons uttered in the revert commit.

I think the fundamental task is to make the costing more reliable, and
the commit message 443df6e2db points out a couple challenges in this
area. Not sure how feasible it is to address enough of them ...

1) procost = 1.0 - I guess we could make this more realistic by doing
some microbenchmarks and tuning the costs for the most expensive cases.

2) estimating quicksort comparisons - This relies on ndistinct
estimates, and I'm not sure how much more reliable we can make those.
Probably not much :-( Not sure what to do about this, the only thing I
can think of is to track "reliability" of the estimates and only do the
reordering if we have high confidence in the estimates. That means we'll
miss some optimization opportunities, but it should limit the risk.

I read up on the history of this thread.
As I see, all the problems mentioned above can be beaten by excluding
the new cost model at all. We can sort GROUP BY columns according to the
'ndistinct' value.
I see the reason for introducing the cost model in [1]. The main
supporting point here is that with this patch, people couldn't optimize
the query by themselves, organizing the order of the columns in a more
optimal way. But now we have at least the GUC to switch off the
behaviour introduced here. Also, some extensions, like the well-known
pg_hint_plan, can help with automation.

I think the main concern is that if we reorder the group keys and get it
wrong, it's a regression. If that happens often (due to costing based on
poor stats), it's a problem. Yes, there's a GUC, but that's a rather
blunt instrument, unfortunately.

So, how about committing of the undoubted part of the feature and
working on the cost model in a new thread?

But Tom's commit message says this:

Worse, to arrive at estimates of the number of calls made to the
lower-order-column comparison functions, the code needs to make
estimates of the numbers of distinct values of multiple columns,
which are necessarily even less trustworthy than per-column stats.

so I'm not sure this really counts as "undoubted".

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#92

Andrey Lepikhov

a.lepikhov@postgrespro.ru

over 2 years ago

In reply to: Tomas Vondra (#91)

Re: POC: GROUP BY optimization

On 24/7/2023 16:56, Tomas Vondra wrote:

On 7/24/23 04:10, Andrey Lepikhov wrote:

On 20/7/2023 18:46, Tomas Vondra wrote:

On 7/20/23 08:37, Andrey Lepikhov wrote:

On 3/10/2022 21:56, Tom Lane wrote:

Revert "Optimize order of GROUP BY keys".

This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes.
...
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.

It may be time to restart the project. As a first step, I rebased the
patch on the current master. It wasn't trivial because of some latest
optimizations (a29eab, 1349d27 and 8d83a5d).
Now, Let's repeat the review and rewrite the current path according to
the reasons uttered in the revert commit.

I think the fundamental task is to make the costing more reliable, and
the commit message 443df6e2db points out a couple challenges in this
area. Not sure how feasible it is to address enough of them ...

1) procost = 1.0 - I guess we could make this more realistic by doing
some microbenchmarks and tuning the costs for the most expensive cases.

2) estimating quicksort comparisons - This relies on ndistinct
estimates, and I'm not sure how much more reliable we can make those.
Probably not much :-( Not sure what to do about this, the only thing I
can think of is to track "reliability" of the estimates and only do the
reordering if we have high confidence in the estimates. That means we'll
miss some optimization opportunities, but it should limit the risk.

I read up on the history of this thread.
As I see, all the problems mentioned above can be beaten by excluding
the new cost model at all. We can sort GROUP BY columns according to the
'ndistinct' value.
I see the reason for introducing the cost model in [1]. The main
supporting point here is that with this patch, people couldn't optimize
the query by themselves, organizing the order of the columns in a more
optimal way. But now we have at least the GUC to switch off the
behaviour introduced here. Also, some extensions, like the well-known
pg_hint_plan, can help with automation.

I think the main concern is that if we reorder the group keys and get it
wrong, it's a regression. If that happens often (due to costing based on
poor stats), it's a problem. Yes, there's a GUC, but that's a rather
blunt instrument, unfortunately.

I see. My point here is if the ndistinct of one column is much more than
the ndistinct of another, it is more probable that this correlation will
be kept in the grouping phase. Here we can get some regression, which
can be overweighed by the possibility below.

So, how about committing of the undoubted part of the feature and
working on the cost model in a new thread?

But Tom's commit message says this:

Worse, to arrive at estimates of the number of calls made to the
lower-order-column comparison functions, the code needs to make
estimates of the numbers of distinct values of multiple columns,
which are necessarily even less trustworthy than per-column stats.

so I'm not sure this really counts as "undoubted".

Don't try to estimate multiple columns - just sort columns according to
the value of ndistinct as a heuristic.
I think we should estimate the number of values of multiple columns only
if we have extended statistics on these columns. And this can extend the
applicability of extended statistics.

The suggestion above is just an attempt to gather low-hanging fruit
right now. If it is not applicable, we will go a long way.

--
regards,
Andrey Lepikhov
Postgres Professional

#93

Tomas Vondra

tomas.vondra@enterprisedb.com

over 2 years ago

In reply to: Andrey Lepikhov (#92)

Re: POC: GROUP BY optimization

On 7/24/23 14:04, Andrey Lepikhov wrote:

On 24/7/2023 16:56, Tomas Vondra wrote:

On 7/24/23 04:10, Andrey Lepikhov wrote:

On 20/7/2023 18:46, Tomas Vondra wrote:

On 7/20/23 08:37, Andrey Lepikhov wrote:

On 3/10/2022 21:56, Tom Lane wrote:

Revert "Optimize order of GROUP BY keys".

This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes.
...
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.

It may be time to restart the project. As a first step, I rebased the
patch on the current master. It wasn't trivial because of some latest
optimizations (a29eab, 1349d27 and 8d83a5d).
Now, Let's repeat the review and rewrite the current path according to
the reasons uttered in the revert commit.

I think the fundamental task is to make the costing more reliable, and
the commit message 443df6e2db points out a couple challenges in this
area. Not sure how feasible it is to address enough of them ...

1) procost = 1.0 - I guess we could make this more realistic by doing
some microbenchmarks and tuning the costs for the most expensive cases.

2) estimating quicksort comparisons - This relies on ndistinct
estimates, and I'm not sure how much more reliable we can make those.
Probably not much :-( Not sure what to do about this, the only thing I
can think of is to track "reliability" of the estimates and only do the
reordering if we have high confidence in the estimates. That means
we'll
miss some optimization opportunities, but it should limit the risk.

I read up on the history of this thread.
As I see, all the problems mentioned above can be beaten by excluding
the new cost model at all. We can sort GROUP BY columns according to the
'ndistinct' value.
I see the reason for introducing the cost model in [1]. The main
supporting point here is that with this patch, people couldn't optimize
the query by themselves, organizing the order of the columns in a more
optimal way. But now we have at least the GUC to switch off the
behaviour introduced here. Also, some extensions, like the well-known
pg_hint_plan, can help with automation.

I think the main concern is that if we reorder the group keys and get it
wrong, it's a regression. If that happens often (due to costing based on
poor stats), it's a problem. Yes, there's a GUC, but that's a rather
blunt instrument, unfortunately.

I see. My point here is if the ndistinct of one column is much more than
the ndistinct of another, it is more probable that this correlation will
be kept in the grouping phase. Here we can get some regression, which
can be overweighed by the possibility below.

I think the word "probable" hits what the problem is. Because what if it
isn't? Also, we don't actually need ndistinct for individual attributes,
but for the groups of attributes. Imagine you do

GROUP BY a, b

so you need to estimate whether to do (a,b) or (b,a). You need to calculate

ndistinct(a,b) / ndistinct(a)

and

ndistinct(b,a) / ndistinct(b)

And similarly for more grouping keys. This is why we have ndistinct
extended statistics, mostly.

So, how about committing of the undoubted part of the feature and
working on the cost model in a new thread?

But Tom's commit message says this:

     Worse, to arrive at estimates of the number of calls made to the
     lower-order-column comparison functions, the code needs to make
     estimates of the numbers of distinct values of multiple columns,
     which are necessarily even less trustworthy than per-column stats.

so I'm not sure this really counts as "undoubted".

Don't try to estimate multiple columns - just sort columns according to
the value of ndistinct as a heuristic.

Surely you realize how easy such simple heuristics can fail, right?

I think we should estimate the number of values of multiple columns only
if we have extended statistics on these columns. And this can extend the
applicability of extended statistics.

I don't see why this should estimate ndistinct differently than every
other place. That'd certainly be surprising. I can be convinced, but
there needs to be sound justification why that's the right way to do
(i.e. why it's less likely to cause poor planning choices).

The suggestion above is just an attempt to gather low-hanging fruit
right now. If it is not applicable, we will go a long way.

I'm not sure this qualities as "low hanging fruit" - that generally
means simple changes with little risk. But you're using it for rather
simplistic heuristic that can easily misfire (I think). At least that's
my impression, I can be convinced otherwise.

regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#94

Andrey Lepikhov

a.lepikhov@postgrespro.ru

over 2 years ago

In reply to: Tomas Vondra (#93)

2 attachment(s)

Re: POC: GROUP BY optimization

Hi,
Here is the patch rebased on the current master. Also, I fixed some
minor slips and one static analyzer warning.
This is just for adding to the next commitfest and enforcing work with
this patch.

One extra difference in newly added postgres_fdw tests is caused by this
patch - see changes in the query plan in attachment.

--
regards,
Andrey Lepikhov
Postgres Professional

Attachments:

0001-When-evaluating-a-query-with-a-multi-column-GROUP-BY-20230913.patchtext/plain; charset=UTF-8; name=0001-When-evaluating-a-query-with-a-multi-column-GROUP-BY-20230913.patchDownload

From 33953655c9ac3f9ec64b80c9f2a2ff38bd178745 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Wed, 13 Sep 2023 11:20:03 +0700
Subject: [PATCH] Explore alternative orderings of group-by pathkeys during
 optimization.

When evaluating a query with a multi-column GROUP BY clause using sort,
the cost may depend heavily on the order in which the keys are compared when
building the groups. Grouping does not imply any ordering, so we can compare
the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg,
we simply compared keys in the order specified in the query. This commit
explores alternative ordering of the keys, trying to find a cheaper one.

In principle, we might generate grouping paths for all permutations of the keys
and leave the rest to the optimizer. But that might get very expensive, so we
try to pick only a couple interesting orderings based on both local and global
information.

When planning the grouping path, we explore statistics (number of distinct
values, cost of the comparison function) for the keys and reorder them
to minimize comparison costs. Intuitively, it may be better to perform more
expensive comparisons (for complex data types, etc.) last because maybe
the cheaper comparisons will be enough. Similarly, the higher the cardinality
of a key, the lower the probability we'll need to compare more keys. The patch
generates and costs various orderings, picking the cheapest ones.

The ordering of group keys may interact with other parts of the query, some of
which may not be known while planning the grouping. For example, there may be
an explicit ORDER BY clause or some other ordering-dependent operation higher up
in the query, and using the same ordering may allow using either incremental
sort or even eliminating the sort entirely.

The patch generates orderings and picks those, minimizing the comparison cost
(for various path keys), and then adds orderings that might be useful for
operations higher up in the plan (ORDER BY, etc.). Finally, it always keeps
the ordering specified in the query, assuming the user might have additional
insights.

This introduces a new GUC enable_group_by_reordering so that the optimization
may be disabled if needed.
---
 .../postgres_fdw/expected/postgres_fdw.out    |  36 +-
 src/backend/optimizer/path/costsize.c         | 363 ++++++++++-
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 602 ++++++++++++++++++
 src/backend/optimizer/plan/planner.c          | 465 ++++++++------
 src/backend/optimizer/util/pathnode.c         |   4 +-
 src/backend/utils/adt/selfuncs.c              |  38 +-
 src/backend/utils/misc/guc_tables.c           |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/cost.h                  |   4 +-
 src/include/optimizer/paths.h                 |   7 +
 src/include/utils/selfuncs.h                  |   5 +
 src/test/regress/expected/aggregates.out      | 244 ++++++-
 .../regress/expected/incremental_sort.out     |   2 +-
 src/test/regress/expected/join.out            |  51 +-
 src/test/regress/expected/merge.out           |  15 +-
 .../regress/expected/partition_aggregate.out  |  44 +-
 src/test/regress/expected/partition_join.out  |  75 +--
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/expected/union.out           |  60 +-
 src/test/regress/sql/aggregates.sql           |  99 +++
 src/test/regress/sql/incremental_sort.sql     |   2 +-
 23 files changed, 1771 insertions(+), 382 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 144c114d0f..63af7feabe 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2319,18 +2319,21 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
 -- join with pseudoconstant quals
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1 AND CURRENT_USER = SESSION_USER) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                                                                           QUERY PLAN                                                                                           
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                  QUERY PLAN                                                                   
+-----------------------------------------------------------------------------------------------------------------------------------------------
  Limit
    Output: t1.c1, t2.c1, t1.c3
-   ->  Result
+   ->  Sort
          Output: t1.c1, t2.c1, t1.c3
-         One-Time Filter: (CURRENT_USER = SESSION_USER)
-         ->  Foreign Scan
-               Output: t1.c1, t1.c3, t2.c1
-               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
-(9 rows)
+         Sort Key: t1.c3, t1.c1
+         ->  Result
+               Output: t1.c1, t2.c1, t1.c3
+               One-Time Filter: (CURRENT_USER = SESSION_USER)
+               ->  Foreign Scan
+                     Output: t1.c1, t1.c3, t2.c1
+                     Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+                     Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1"))))
+(12 rows)
 
 -- non-Var items in targetlist of the nullable rel of a join preventing
 -- push-down in some cases
@@ -9609,13 +9612,16 @@ SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER J
 -- left outer join + nullable clause
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
-                                                                                                                     QUERY PLAN                                                                                                                     
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                    QUERY PLAN                                                                                     
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
    Output: t1.a, fprt2.b, fprt2.c
-   Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
-   Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10)) ORDER BY r5.a ASC NULLS LAST, r6.b ASC NULLS LAST, r6.c ASC NULLS LAST
-(4 rows)
+   Sort Key: t1.a, fprt2.b, fprt2.c
+   ->  Foreign Scan
+         Output: t1.a, fprt2.b, fprt2.c
+         Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
+         Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10))
+(7 rows)
 
 SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
  a | b |  c   
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index d6ceafd51c..6f11f679ba 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1816,6 +1816,327 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 									rterm->pathtarget->width);
 }
 
+/*
+ * is_fake_var
+ *		Workaround for generate_append_tlist() which generates fake Vars with
+ *		varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ *		Returns relative complexity of comparing two values based on its width.
+ * The idea behind is that the comparison becomes more expensive the longer the
+ * value is. Return value is in cpu_operator_cost units.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double		width = -1.0;	/* fake value */
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresponding relation */
+	if (IsA(expr, Var))
+	{
+		Var		   *var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo *rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int			ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, try using type width instead. */
+	if (width < 0.0)
+	{
+		Node	   *node = (Node *) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Values are passed as Datum type, so comparisons can't be cheaper than
+	 * comparing a Datum value.
+	 *
+	 * FIXME I find this reasoning questionable. We may pass int2, and
+	 * comparing it is probably a bit cheaper than comparing a bigint.
+	 */
+	if (width <= sizeof(Datum))
+		return 1.0;
+
+	/*
+	 * We consider the cost of a comparison not to be directly proportional to
+	 * width of the argument, because widths of the arguments could be
+	 * slightly different (we only know the average width for the whole
+	 * column). So we use log16(width) as an estimate.
+	 */
+	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ *		compute CPU cost of sort (i.e. in-memory)
+ *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, columns may have a very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algorithm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from:
+ *
+ * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
+ * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of identical
+ * tuples with value Ki, then the estimate of number of comparisons is:
+ *
+ *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ *
+ * We assume all Xi the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ *	N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparisons on ck is roughly
+ *
+ *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ *	N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column width, not just the comparator cost.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys. In this case, it will fallback to
+ * simple comparison cost estimate.
+ */
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+					  Cost comparison_cost, double tuples, double output_tuples,
+					  bool heapSort)
+{
+	Cost		per_tuple_cost = 0.0;
+	ListCell   *lc;
+	List	   *pathkeyExprs = NIL;
+	double		tuplesPerPrevGroup = tuples;
+	double		totalFuncCost = 1.0;
+	bool		has_fake_var = false;
+	int			i = 0;
+	Oid			prev_datatype = InvalidOid;
+	List	   *cache_varinfos = NIL;
+
+	/* fallback if pathkeys is unknown */
+	if (list_length(pathkeys) == 0)
+	{
+		/*
+		 * If we'll use a bounded heap-sort keeping just K tuples in memory,
+		 * for a total number of tuple comparisons of N log2 K; but the
+		 * constant factor is a bit higher than for quicksort. Tweak it so
+		 * that the cost curve is continuous at the crossover point.
+		 */
+		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+		/* add cost provided by caller */
+		per_tuple_cost += comparison_cost;
+
+		return per_tuple_cost * tuples;
+	}
+
+	/*
+	 * Computing total cost of sorting takes into account the per-column
+	 * comparison function cost.  We try to compute the needed number of
+	 * comparisons per column.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(lc);
+		EquivalenceMember *em;
+		double		nGroups,
+					correctedNGroups;
+		Cost		funcCost = 1.0;
+
+		/*
+		 * We believe that equivalence members aren't very different, so, to
+		 * estimate cost we consider just the first member.
+		 */
+		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+		if (em->em_datatype != InvalidOid)
+		{
+			/* do not lookup funcCost if the data type is the same */
+			if (prev_datatype != em->em_datatype)
+			{
+				Oid			sortop;
+				QualCost	cost;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+
+				cost.startup = 0;
+				cost.per_tuple = 0;
+				add_function_cost(root, get_opcode(sortop), NULL, &cost);
+
+				/*
+				 * add_function_cost returns the product of cpu_operator_cost
+				 * and procost, but we need just procost, co undo that.
+				 */
+				funcCost = cost.per_tuple / cpu_operator_cost;
+
+				prev_datatype = em->em_datatype;
+			}
+		}
+
+		/* factor in the width of the values in this column */
+		funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+		/* now we have per-key cost, so add to the running total */
+		totalFuncCost += funcCost;
+
+		/* remember if we have found a fake Var in pathkeys */
+		has_fake_var |= is_fake_var(em->em_expr);
+		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+		/*
+		 * We need to calculate the number of comparisons for this column,
+		 * which requires knowing the group size. So we estimate the number of
+		 * groups by calling estimate_num_groups_incremental(), which
+		 * estimates the group size for "new" pathkeys.
+		 *
+		 * Note: estimate_num_groups_incremental does not handle fake Vars, so
+		 * use a default estimate otherwise.
+		 */
+		if (!has_fake_var)
+			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+													  tuplesPerPrevGroup, NULL, NULL,
+													  &cache_varinfos,
+													  list_length(pathkeyExprs) - 1);
+		else if (tuples > 4.0)
+
+			/*
+			 * Use geometric mean as estimation if there are no stats.
+			 *
+			 * We don't use DEFAULT_NUM_DISTINCT here, because that's used for
+			 * a single column, but here we're dealing with multiple columns.
+			 */
+			nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
+		else
+			nGroups = tuples;
+
+		/*
+		 * Presorted keys are not considered in the cost above, but we still
+		 * do have to compare them in the qsort comparator. So make sure to
+		 * factor in the cost in that case.
+		 */
+		if (i >= nPresortedKeys)
+		{
+			if (heapSort)
+			{
+				/*
+				 * have to keep at least one group, and a multiple of group
+				 * size
+				 */
+				correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup);
+			}
+			else
+				/* all groups in the input */
+				correctedNGroups = nGroups;
+
+			correctedNGroups = Max(1.0, ceil(correctedNGroups));
+
+			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+		}
+
+		i++;
+
+		/*
+		 * Once we get single-row group, it means tuples in the group are
+		 * unique and we can skip all remaining columns.
+		 */
+		if (tuplesPerPrevGroup <= nGroups)
+			break;
+
+		/*
+		 * Uniform distributions with all groups being of the same size are
+		 * the best case, with nice smooth behavior. Real-world distributions
+		 * tend not to be uniform, though, and we don't have any reliable
+		 * easy-to-use information. As a basic defense against skewed
+		 * distributions, we use a 1.5 factor to make the expected group a bit
+		 * larger, but we need to be careful not to make the group larger than
+		 * in the preceding step.
+		 */
+		tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
+								 ceil(1.5 * tuplesPerPrevGroup / nGroups));
+	}
+
+	list_free(pathkeyExprs);
+
+	/* per_tuple_cost is in cpu_operator_cost units */
+	per_tuple_cost *= cpu_operator_cost;
+
+	/*
+	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles
+	 * E. Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort
+	 * estimation formula has additional term proportional to number of tuples
+	 * (see Chapter 8.2 and Theorem 4.1). That affects cases with a low number
+	 * of tuples, approximately less than 1e4. We could implement it as an
+	 * additional multiplier under the logarithm, but we use a bit more
+	 * complex formula which takes into account the number of unique tuples
+	 * and it's not clear how to combine the multiplier with the number of
+	 * groups. Estimate it as 10 cpu_operator_cost units.
+	 */
+	per_tuple_cost += 10 * cpu_operator_cost;
+
+	per_tuple_cost += comparison_cost;
+
+	return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+				   double tuples)
+{
+	return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+								 0, tuples, tuples, false);
+}
+
 /*
  * cost_tuplesort
  *	  Determines and returns the cost of sorting a relation using tuplesort,
@@ -1832,7 +2153,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * number of initial runs formed and M is the merge order used by tuplesort.c.
  * Since the average initial run should be about sort_mem, we have
  *		disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *		cpu = comparison_cost * t * log2(t)
+ * 		and cpu cost (computed by compute_cpu_sort_cost()).
  *
  * If the sort is bounded (i.e., only the first k result tuples are needed)
  * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1851,9 +2172,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * 'comparison_cost' is the extra cost per comparison, if any
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later
  */
 static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
 			   double limit_tuples)
@@ -1870,9 +2193,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
-
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
 	{
@@ -1896,12 +2216,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 		double		log_runs;
 		double		npageaccesses;
 
-		/*
-		 * CPU costs
-		 *
-		 * Assume about N log2 N comparisons
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		/* CPU costs */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 
 		/* Disk costs */
 
@@ -1917,18 +2235,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	}
 	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
 	{
-		/*
-		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
-		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		/* We'll use a bounded heap-sort keeping just K tuples in memory. */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  output_tuples, true);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 	}
 
 	/*
@@ -2042,7 +2359,7 @@ cost_incremental_sort(Path *path,
 	 * Estimate the average cost of sorting of one group where presorted keys
 	 * are equal.
 	 */
-	cost_tuplesort(&group_startup_cost, &group_run_cost,
+	cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
 				   group_tuples, width, comparison_cost, sort_mem,
 				   limit_tuples);
 
@@ -2102,7 +2419,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	Cost		startup_cost;
 	Cost		run_cost;
 
-	cost_tuplesort(&startup_cost, &run_cost,
+	cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
 				   limit_tuples);
@@ -2200,7 +2517,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2268,7 +2585,7 @@ cost_append(AppendPath *apath)
 					 * any child.
 					 */
 					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
+							  root,
 							  pathkeys,
 							  subpath->total_cost,
 							  subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 7fa502d6e2..07edd4f38e 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy the sortref if it wasn't set yet. That may happen if
+				 * the ec was constructed from WHERE clause, i.e. it doesn't
+				 * have a target reference at all.
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index fdb60aaa8d..2dfc983a6d 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,17 +17,24 @@
  */
 #include "postgres.h"
 
+#include <float.h>
+
+#include "miscadmin.h"
 #include "access/stratnum.h"
 #include "catalog/pg_opfamily.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool		enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -350,6 +357,548 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses,
+							   int num_groupby_pathkeys)
+{
+	List	   *new_group_pathkeys = NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append it to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find the first pathkey without a matching GROUP BY key, the
+	 * rest of the pathkeys are useless and can't be used to evaluate the
+	 * grouping, so we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			   *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	   *sgc;
+
+		if (foreach_current_index(lc) >= num_groupby_pathkeys ||
+			!list_member_ptr(*group_pathkeys, pathkey))
+			/* */
+			break;
+
+		/* abort on first mismatch */
+		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+											*group_clauses);
+		if (!sgc)
+			/* The grouping clause is not cover this paathkey */
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * Used to generate all permutations of a pathkey list.
+ */
+typedef struct PathkeyMutatorState
+{
+	List	   *elemsList;
+	ListCell  **elemCells;
+	void	  **elems;
+	int		   *positions;
+	int			mutatorNColumns;
+	int			count;
+} PathkeyMutatorState;
+
+
+/*
+ * PathkeyMutatorInit
+ *		Initialize state of the permutation generator.
+ *
+ * We want to generate permutations of elements in the "elems" list. We may want
+ * to skip some number of elements at the beginning (when treating as presorted)
+ * or at the end (we only permute a limited number of group keys).
+ *
+ * The list is decomposed into elements, and we also keep pointers to individual
+ * cells. This allows us to build the permuted list quickly and cheaply, without
+ * creating any copies.
+ */
+static void
+PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
+{
+	int			i;
+	int			n = end - start;
+	ListCell   *lc;
+
+	memset(state, 0, sizeof(*state));
+
+	state->mutatorNColumns = n;
+
+	state->elemsList = list_copy(elems);
+
+	state->elems = palloc(sizeof(void *) * n);
+	state->elemCells = palloc(sizeof(ListCell *) * n);
+	state->positions = palloc(sizeof(int) * n);
+
+	i = 0;
+	for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+	{
+		state->elemCells[i] = lc;
+		state->elems[i] = lfirst(lc);
+		state->positions[i] = i + 1;
+		i++;
+
+		if (i >= n)
+			break;
+	}
+}
+
+/* Swap two elements of an array. */
+static void
+PathkeyMutatorSwap(int *a, int i, int j)
+{
+	int			s = a[i];
+
+	a[i] = a[j];
+	a[j] = s;
+}
+
+/*
+ * Generate the next permutation of elements.
+ */
+static bool
+PathkeyMutatorNextSet(int *a, int n)
+{
+	int			j,
+				k,
+				l,
+				r;
+
+	j = n - 2;
+
+	while (j >= 0 && a[j] >= a[j + 1])
+		j--;
+
+	if (j < 0)
+		return false;
+
+	k = n - 1;
+
+	while (k > 0 && a[j] >= a[k])
+		k--;
+
+	PathkeyMutatorSwap(a, j, k);
+
+	l = j + 1;
+	r = n - 1;
+
+	while (l < r)
+		PathkeyMutatorSwap(a, l++, r--);
+
+	return true;
+}
+
+/*
+ * PathkeyMutatorNext
+ *		Generate the next permutation of list of elements.
+ *
+ * Returns the next permutation (as a list of elements) or NIL if there are no
+ * more permutations.
+ */
+static List *
+PathkeyMutatorNext(PathkeyMutatorState *state)
+{
+	int			i;
+
+	state->count++;
+
+	/* first permutation is original list */
+	if (state->count == 1)
+		return state->elemsList;
+
+	/* when there are no more permutations, return NIL */
+	if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
+	{
+		pfree(state->elems);
+		pfree(state->elemCells);
+		pfree(state->positions);
+
+		list_free(state->elemsList);
+
+		return NIL;
+	}
+
+	/* update the list cells to point to the right elements */
+	for (i = 0; i < state->mutatorNColumns; i++)
+		lfirst(state->elemCells[i]) =
+			(void *) state->elems[state->positions[i] - 1];
+
+	return state->elemsList;
+}
+
+/*
+ * Cost of comparing pathkeys.
+ */
+typedef struct PathkeySortCost
+{
+	Cost		cost;
+	PathKey    *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+	const PathkeySortCost *a = (PathkeySortCost *) _a;
+	const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+	if (a->cost < b->cost)
+		return -1;
+	else if (a->cost == b->cost)
+		return 0;
+	return 1;
+}
+
+/*
+ * get_cheapest_group_keys_order
+ *		Reorders the group pathkeys / clauses to minimize the comparison cost.
+ *
+ * Given the list of pathkeys in '*group_pathkeys', we try to arrange these
+ * in an order that minimizes the sort costs that will be incurred by the
+ * GROUP BY.  The costs mainly depend on the cost of the sort comparator
+ * function(s) and the number of distinct values in each column of the GROUP
+ * BY clause (*group_clauses).  Sorting on subsequent columns is only required
+ * for tiebreak situations where two values sort equally.
+ *
+ * In case the input is partially sorted, only the remaining pathkeys are
+ * considered.  'n_preordered' denotes how many of the leading *group_pathkeys
+ * the input is presorted by.
+ *
+ * Returns true and sets *group_pathkeys and *group_clauses to the newly
+ * ordered versions of the lists that were passed in via these parameters.
+ * If no reordering was deemed necessary then we return false, in which case
+ * the *group_pathkeys and *group_clauses lists are left untouched. The
+ * original *group_pathkeys and *group_clauses parameter values are never
+ * destructively modified in place.
+ */
+static bool
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	List				   *new_group_pathkeys = NIL;
+	List				   *new_group_clauses = NIL;
+	List				   *var_group_pathkeys;
+	ListCell			   *cell;
+	PathkeyMutatorState		mstate;
+	double					cheapest_sort_cost = DBL_MAX;
+	int						nFreeKeys;
+	int						nToPermute;
+
+	/* If there are less than 2 unsorted pathkeys, we're done. */
+	if (root->num_groupby_pathkeys - n_preordered < 2)
+		return false;
+
+	/*
+	 * We could exhaustively cost all possible orderings of the pathkeys, but
+	 * for a large number of pathkeys it might be prohibitively expensive. So
+	 * we try to apply simple cheap heuristics first - we sort the pathkeys by
+	 * sort cost (as if the pathkey was sorted independently) and then check
+	 * only the four cheapest pathkeys. The remaining pathkeys are kept
+	 * ordered by cost.
+	 *
+	 * XXX This is a very simple heuristics, but likely to work fine for most
+	 * cases (because the number of GROUP BY clauses tends to be lower than
+	 * 4). But it ignores how the number of distinct values in each pathkey
+	 * affects the following steps. It might be better to use "more expensive"
+	 * pathkey first if it has many distinct values, because it then limits
+	 * the number of comparisons for the remaining pathkeys. But evaluating
+	 * that is likely quite the expensive.
+	 */
+	nFreeKeys = root->num_groupby_pathkeys - n_preordered;
+	nToPermute = 4;
+	if (nFreeKeys > nToPermute)
+	{
+		PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
+		PathkeySortCost *cost = costs;
+		int i = 0;
+
+		/*
+		 * Estimate cost for sorting individual pathkeys skipping the
+		 * pre-ordered pathkeys.
+		 */
+		for_each_from(cell, *group_pathkeys, n_preordered)
+		{
+			PathKey    *pathkey = (PathKey *) lfirst(cell);
+			List	   *to_cost;
+
+			if (foreach_current_index(cell) >= root->num_groupby_pathkeys)
+				break;
+
+			i++;
+			to_cost = list_make1(pathkey);
+			cost->pathkey = pathkey;
+			cost->cost = cost_sort_estimate(root, to_cost, 0, nrows);
+			cost++;
+
+			list_free(to_cost);
+		}
+
+		Assert(i == nFreeKeys);
+		/* sort the pathkeys by sort cost in ascending order */
+		qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+		/*
+		 * Rebuild the list of pathkeys - first the preordered ones, then the
+		 * rest ordered by cost.
+		 */
+		new_group_pathkeys = list_copy_head(*group_pathkeys, n_preordered);
+
+		for (int i = 0; i < nFreeKeys; i++)
+			new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+		new_group_pathkeys = list_copy_tail(*group_pathkeys,
+											root->num_groupby_pathkeys);
+		pfree(costs);
+	}
+	else
+	{
+		/* Copy the list, so that we can free the new list by list_free. */
+		new_group_pathkeys = list_copy(*group_pathkeys);
+		nToPermute = nFreeKeys;
+	}
+
+	Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
+
+	/*
+	 * Generate pathkey lists with permutations of the first nToPermute
+	 * pathkeys.
+	 *
+	 * XXX We simply calculate sort cost for each individual pathkey list, but
+	 * there's room for two dynamic programming optimizations here. Firstly,
+	 * we may pass the current "best" cost to cost_sort_estimate so that it
+	 * can "abort" if the estimated pathkeys list exceeds it. Secondly, it
+	 * could pass the return information about the position when it exceeded
+	 * the cost, and we could skip all permutations with the same prefix.
+	 *
+	 * Imagine we've already found ordering with cost C1, and we're evaluating
+	 * another ordering - cost_sort_estimate() calculates cost by adding the
+	 * pathkeys one by one (more or less), and the cost only grows. If at any
+	 * point it exceeds C1, it can't possibly be "better" so we can discard
+	 * it. But we also know that we can discard all ordering with the same
+	 * prefix, because if we're estimating (a,b,c,d) and we exceed C1 at (a,b)
+	 * then the same thing will happen for any ordering with this prefix.
+	 */
+	PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered,
+					   n_preordered + nToPermute);
+
+	while ((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
+	{
+		Cost		cost;
+
+		cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+		if (cost < cheapest_sort_cost)
+		{
+			list_free(new_group_pathkeys);
+			new_group_pathkeys = list_copy(var_group_pathkeys);
+			cheapest_sort_cost = cost;
+		}
+	}
+
+	Assert(list_length(new_group_pathkeys) == list_length(root->group_pathkeys));
+
+	/* Reorder the group clauses according to the reordered pathkeys. */
+	foreach(cell, new_group_pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(cell);
+
+		if (foreach_current_index(cell) >= root->num_groupby_pathkeys)
+			break;
+
+		new_group_clauses =
+			lappend(new_group_clauses,
+					get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+											*group_clauses));
+	}
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+											   *group_clauses);
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+	Assert(list_length(new_group_clauses) == list_length(root->processed_groupClause));
+
+	return true;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to minimize the sort cost
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reordered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
+ *   the tail reordered to minimize the sort cost
+ *
+ * There are other potentially interesting orderings (e.g. it might be best to
+ * match the first ORDER BY key, order the remaining keys differently and then
+ * rely on the incremental sort to fix this), but we ignore those for now. To
+ * make this work we'd have to pretty much generate all possible permutations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+	int			n_preordered = 0;
+
+	List	   *pathkeys = root->group_pathkeys;
+	List	   *clauses = root->processed_groupClause;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+	infos = lappend(infos, info);
+
+	/*
+	 * Should we try generating alternative orderings of the group keys? If
+	 * not, we produce only the order specified in the query, i.e. the
+	 * optimization is effectively disabled.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost, ignoring both the
+	 * target ordering (ORDER BY) and ordering of the input path.
+	 */
+	if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered))
+	{
+		Assert(list_length(pathkeys) == list_length(clauses));
+
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to
+	 * match as much of the ordering as possible - we get this sort for free
+	 * (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefiting from incremental sort
+	 * as much as possible.
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (path_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
+													  &pathkeys,
+													  &clauses,
+													  root->num_groupby_pathkeys);
+
+		/* reorder the tail to minimize sort cost */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause, but only if set debug_group_by_match_order_by).
+	 */
+	if (root->sort_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  &pathkeys,
+													  &clauses,
+													  root->num_groupby_pathkeys);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to sort_pathkeys.
+		 */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1939,6 +2488,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the ordering. The
+ * ordering may not be "complete" and may require incremental sort, but that's
+ * fine. So we simply count prefix pathkeys with a matching group key, and
+ * stop once we find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordering not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell   *key;
+	int			n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1953,6 +2550,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1988,6 +2588,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 44efb1f4eb..4a7c9d9440 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2810,8 +2810,9 @@ remove_useless_groupby_columns(PlannerInfo *root)
  *
  * In principle it might be interesting to consider other orderings of the
  * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost.  We don't
- * bother with that, though.  Hashed grouping will frequently win anyway.
+ * possible plans (eg an indexscan) and thereby reduce cost.  However, we
+ * don't yet have sufficient information to do that here, so that's left until
+ * later in planning.  See get_useful_group_keys_orderings().
  *
  * Note: we need no comparable processing of the distinctClause because
  * the parser already enforced that that matches ORDER BY.
@@ -3468,6 +3469,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   tlist,
 												   true,
 												   &sortable);
+
 		if (!sortable)
 		{
 			/* Can't sort; no point in considering aggregate ordering either */
@@ -6842,90 +6844,108 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
+
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (!is_sorted)
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+				bool			is_sorted;
+				int				presorted_keys;
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (!is_sorted)
+				{
+					/*
+					 * Try at least sorting the cheapest path and also try
+					 * incrementally sorting any path which is partially sorted
+					 * already (no need to deal with paths which have presorted
+					 * keys when incremental sort is disabled unless it's the
+					 * cheapest input path).
+					 */
+					if (path != cheapest_path &&
+						(presorted_keys == 0 || !enable_incremental_sort))
+						continue;
+
+					/*
+					 * We've no need to consider both a sort and incremental sort.
+					 * We'll just do a sort if there are no presorted keys and an
+					 * incremental sort when there are presorted keys.
+					 */
+					if (presorted_keys == 0 || !enable_incremental_sort)
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					else
+						path = (Path *) create_incremental_sort_path(root,
+																	 grouped_rel,
+																	 path,
+																	 info->pathkeys,
+																	 presorted_keys,
+																	 -1.0);
+				}
+
+				/* Now decide what to stick atop it */
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash,
+												gd, agg_costs, dNumGroups);
+				}
+				else if (parse->hasAggs)
+				{
+					/*
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_agg_path(root,
 										 grouped_rel,
 										 path,
 										 grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_SIMPLE,
-										 root->processed_groupClause,
+										 info->clauses,
 										 havingQual,
 										 agg_costs,
 										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				}
+				else if (parse->groupClause)
+				{
+					/*
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_group_path(root,
 										   grouped_rel,
 										   path,
-										   root->processed_groupClause,
+										   info->clauses,
 										   havingQual,
 										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
+				}
+				else
+				{
+					/* Other cases should have been handled above */
+					Assert(false);
+				}
 			}
 		}
 
@@ -6937,69 +6957,84 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
-				bool		is_sorted;
-				int			presorted_keys;
+				List	   *pathkey_orderings = NIL;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys);
 
-				if (!is_sorted)
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach(lc2, pathkey_orderings)
 				{
-					/*
-					 * Try at least sorting the cheapest path and also try
-					 * incrementally sorting any path which is partially
-					 * sorted already (no need to deal with paths which have
-					 * presorted keys when incremental sort is disabled unless
-					 * it's the cheapest input path).
-					 */
-					if (path != partially_grouped_rel->cheapest_total_path &&
-						(presorted_keys == 0 || !enable_incremental_sort))
-						continue;
+					bool			is_sorted;
+					int				presorted_keys;
+					PathKeyInfo    *info = (PathKeyInfo *) lfirst(lc2);
 
-					/*
-					 * We've no need to consider both a sort and incremental
-					 * sort.  We'll just do a sort if there are no pre-sorted
-					 * keys and an incremental sort when there are presorted
-					 * keys.
-					 */
-					if (presorted_keys == 0 || !enable_incremental_sort)
-						path = (Path *) create_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 -1.0);
-					else
-						path = (Path *) create_incremental_sort_path(root,
-																	 grouped_rel,
-																	 path,
-																	 root->group_pathkeys,
-																	 presorted_keys,
-																	 -1.0);
-				}
+					is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+															path->pathkeys,
+															&presorted_keys);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
+					if (!is_sorted)
+					{
+						/*
+						 * Try at least sorting the cheapest path and also try
+						 * incrementally sorting any path which is partially
+						 * sorted already (no need to deal with paths which have
+						 * presorted keys when incremental sort is disabled unless
+						 * it's the cheapest input path).
+						 */
+						if (path != partially_grouped_rel->cheapest_total_path &&
+							(presorted_keys == 0 || !enable_incremental_sort))
+							continue;
+
+						/*
+						 * We've no need to consider both a sort and incremental
+						 * sort.  We'll just do a sort if there are no pre-sorted
+						 * keys and an incremental sort when there are presorted
+						 * keys.
+						 */
+						if (presorted_keys == 0 || !enable_incremental_sort)
+							path = (Path *) create_sort_path(root,
+															 grouped_rel,
+															 path,
+															 info->pathkeys,
+															 -1.0);
+						else
+							path = (Path *) create_incremental_sort_path(root,
+																		 grouped_rel,
+																		 path,
+																		 info->pathkeys,
+																		 presorted_keys,
+																		 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 root->processed_groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
+					else
+						add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   root->processed_groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 
+				}
 			}
 		}
 	}
@@ -7202,66 +7237,85 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-			if (!is_sorted)
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_total_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+				bool			is_sorted;
+				int				presorted_keys;
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			if (parse->hasAggs)
-				add_path(partially_grouped_rel, (Path *)
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+				if (!is_sorted)
+				{
+					/*
+					 * Try at least sorting the cheapest path and also try
+					 * incrementally sorting any path which is partially sorted
+					 * already (no need to deal with paths which have presorted
+					 * keys when incremental sort is disabled unless it's the
+					 * cheapest input path).
+					 */
+					if (path != cheapest_total_path &&
+						(presorted_keys == 0 || !enable_incremental_sort))
+						continue;
+
+					/*
+					 * We've no need to consider both a sort and incremental sort.
+					 * We'll just do a sort if there are no presorted keys and an
+					 * incremental sort when there are presorted keys.
+					 */
+					if (presorted_keys == 0 || !enable_incremental_sort)
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					else
+						path = (Path *) create_incremental_sort_path(root,
+																	 partially_grouped_rel,
+																	 path,
+																	 info->pathkeys,
+																	 presorted_keys,
+																	 -1.0);
+				}
+
+				if (parse->hasAggs)
+					add_path(partially_grouped_rel, (Path *)
 						 create_agg_path(root,
 										 partially_grouped_rel,
 										 path,
 										 partially_grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_INITIAL_SERIAL,
-										 root->processed_groupClause,
+										 info->clauses,
 										 NIL,
 										 agg_partial_costs,
 										 dNumPartialGroups));
-			else
-				add_path(partially_grouped_rel, (Path *)
-						 create_group_path(root,
-										   partially_grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   NIL,
-										   dNumPartialGroups));
+				else
+					add_path(partially_grouped_rel, (Path *)
+							 create_group_path(root,
+											   partially_grouped_rel,
+											   path,
+											   info->clauses,
+											   NIL,
+											   dNumPartialGroups));
+			}
 		}
 	}
 
@@ -7270,67 +7324,86 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
+
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (!is_sorted)
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_partial_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+				bool		is_sorted;
+				int			presorted_keys;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (!is_sorted)
+				{
+					/*
+					 * Try at least sorting the cheapest path and also try
+					 * incrementally sorting any path which is partially sorted
+					 * already (no need to deal with paths which have presorted
+					 * keys when incremental sort is disabled unless it's the
+					 * cheapest input path).
+					 */
+					if (path != cheapest_partial_path &&
+						(presorted_keys == 0 || !enable_incremental_sort))
+						continue;
+
+					/*
+					 * We've no need to consider both a sort and incremental sort.
+					 * We'll just do a sort if there are no presorted keys and an
+					 * incremental sort when there are presorted keys.
+					 */
+					if (presorted_keys == 0 || !enable_incremental_sort)
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					else
+						path = (Path *) create_incremental_sort_path(root,
+																	 partially_grouped_rel,
+																	 path,
+																	 info->pathkeys,
+																	 presorted_keys,
+																	 -1.0);
+				}
+
+				if (parse->hasAggs)
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_agg_path(root,
 												 partially_grouped_rel,
 												 path,
 												 partially_grouped_rel->reltarget,
 												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 												 AGGSPLIT_INITIAL_SERIAL,
-												 root->processed_groupClause,
+												 info->clauses,
 												 NIL,
 												 agg_partial_costs,
 												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
+				else
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_group_path(root,
 												   partially_grouped_rel,
 												   path,
-												   root->processed_groupClause,
+												   info->clauses,
 												   NIL,
 												   dNumPartialPartialGroups));
+			}
 		}
 	}
 
@@ -7444,6 +7517,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	 * We can also skip the entire loop when we only have a single-item
 	 * group_pathkeys because then we can't possibly have a presorted prefix
 	 * of the list without having the list be fully sorted.
+	 *
+	 * XXX Shouldn't this also consider the group-key-reordering?
 	 */
 	if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
 		return;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 211ba65389..7b8d0cd294 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1346,12 +1346,12 @@ create_append_path(PlannerInfo *root,
 			pathnode->path.total_cost = child->total_cost;
 		}
 		else
-			cost_append(pathnode);
+			cost_append(pathnode, root);
 		/* Must do this last, else cost_append complains */
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index c4fcd0076e..cb7e67099e 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3386,11 +3386,28 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset, EstimationInfo *estinfo)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, estinfo,
+										   NULL, 0);
+}
+
+/*
+ * estimate_num_groups_incremental
+ *		An estimate_num_groups variant, optimized for cases that are adding the
+ *		expressions incrementally (e.g. one by one).
+ */
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+								double input_rows,
+								List **pgset, EstimationInfo *estinfo,
+								List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i,
+				j;
 
 	/* Zero the estinfo output parameter, if non-NULL */
 	if (estinfo != NULL)
@@ -3421,7 +3438,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3430,6 +3447,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++;			/* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3499,7 +3524,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3516,6 +3545,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index bdb26e2b77..385f160ff8 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1026,6 +1026,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables reordering of GROUP BY keys."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 6bb39d39ba..751511745a 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -391,6 +391,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_group_by_reordering = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 5702fbba60..15ba2f8218 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1456,6 +1456,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index bee090ffc2..bbb1132473 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -116,7 +116,9 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *apath);
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+							   int nPresortedKeys, double tuples);
+extern void cost_append(AppendPath *path, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 50bc3b503a..e4ff16d0c4 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -205,6 +206,12 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int	group_keys_reorder_by_pathkeys(List *pathkeys,
+										   List **group_pathkeys,
+										   List **group_clauses,
+										   int num_groupby_pathkeys);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 2f76c473db..2b21c06cc9 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -214,6 +214,11 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset,
 								  EstimationInfo *estinfo);
 
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+											  double input_rows, List **pgset,
+											  EstimationInfo *estinfo,
+											  List **cache_varinfos, int prevNExprs);
+
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
 									   Selectivity *mcv_freq,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index d8271da4d1..205be28b3d 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1234,7 +1234,8 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+   Group Key: $0, $1
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1257,10 +1258,8 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+   ->  Result
+(25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -2715,6 +2714,241 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, d, v, c
+   ->  Sort
+         Sort Key: p, d, v, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Incremental Sort
+         Sort Key: p, c, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Incremental Sort
+         Sort Key: p, v, c
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, v, d
+   ->  Incremental Sort
+         Sort Key: p, c, v, d
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Incremental Sort
+         Sort Key: p, v, c, d
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 7fdb685313..b8a0968b06 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1426,7 +1426,7 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 9b8638f286..27684bfce2 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1984,8 +1984,8 @@ USING (name);
 ------+----+----
  bb   | 12 | 13
  cc   | 22 | 23
- dd   |    | 33
  ee   | 42 |   
+ dd   |    | 33
 (4 rows)
 
 -- Cases with non-nullable expressions in subquery results;
@@ -2019,8 +2019,8 @@ NATURAL FULL JOIN
 ------+------+------+------+------
  bb   |   12 |    2 |   13 |    3
  cc   |   22 |    2 |   23 |    3
- dd   |      |      |   33 |    3
  ee   |   42 |    2 |      |     
+ dd   |      |      |   33 |    3
 (4 rows)
 
 SELECT * FROM
@@ -5645,18 +5645,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
 
 -- join removal is not possible here
 explain (costs off)
@@ -7597,44 +7599,39 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Unique
+   ->  HashAggregate
          Output: j3.id
-         ->  Sort
+         Group Key: j3.id
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(13 rows)
+(11 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Group
+   ->  HashAggregate
          Output: j3.id
          Group Key: j3.id
-         ->  Sort
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(14 rows)
+(11 rows)
 
 drop table j1;
 drop table j2;
diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out
index 133d42117c..f7c4cc4833 100644
--- a/src/test/regress/expected/merge.out
+++ b/src/test/regress/expected/merge.out
@@ -1459,18 +1459,15 @@ WHEN MATCHED AND t.a < 10 THEN
                            explain_merge                            
 --------------------------------------------------------------------
  Merge on ex_mtarget t (actual rows=0 loops=1)
-   ->  Merge Join (actual rows=0 loops=1)
-         Merge Cond: (t.a = s.a)
-         ->  Sort (actual rows=0 loops=1)
-               Sort Key: t.a
-               Sort Method: quicksort  Memory: xxx
+   ->  Hash Join (actual rows=0 loops=1)
+         Hash Cond: (s.a = t.a)
+         ->  Seq Scan on ex_msource s (actual rows=1 loops=1)
+         ->  Hash (actual rows=0 loops=1)
+               Buckets: xxx  Batches: xxx  Memory Usage: xxx
                ->  Seq Scan on ex_mtarget t (actual rows=0 loops=1)
                      Filter: (a < '-1000'::integer)
                      Rows Removed by Filter: 54
-         ->  Sort (never executed)
-               Sort Key: s.a
-               ->  Seq Scan on ex_msource s (never executed)
-(12 rows)
+(9 rows)
 
 DROP TABLE ex_msource, ex_mtarget;
 DROP FUNCTION explain_merge(text);
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index 1b900fddf8..800114dfde 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -948,12 +948,12 @@ SET parallel_setup_cost = 0;
 -- is not partial agg safe.
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
-                                      QUERY PLAN                                      
---------------------------------------------------------------------------------------
- Sort
-   Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Gather
-         Workers Planned: 2
+                                         QUERY PLAN                                         
+--------------------------------------------------------------------------------------------
+ Gather Merge
+   Workers Planned: 2
+   ->  Sort
+         Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
          ->  Parallel Append
                ->  GroupAggregate
                      Group Key: pagg_tab_ml.a
@@ -1380,28 +1380,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
 -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
 EXPLAIN (COSTS OFF)
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize GroupAggregate
+   ->  Finalize HashAggregate
          Group Key: pagg_tab_para.y
          Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather Merge
+         ->  Gather
                Workers Planned: 2
-               ->  Sort
-                     Sort Key: pagg_tab_para.y
-                     ->  Parallel Append
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_1.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_2.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+               ->  Parallel Append
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_1.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_2.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
 
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
  y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 6560fe2416..680e82cfc5 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -512,52 +512,41 @@ EXPLAIN (COSTS OFF)
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
   GROUP BY 1, 2 ORDER BY 1, 2;
-                                                   QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Group
    Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Merge Append
+   ->  Sort
          Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Group
-               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
-                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1.a, prt1.b
-                                 ->  Seq Scan on prt1_p1 prt1
-                           ->  Sort
-                                 Sort Key: p2.a, p2.b
-                                 ->  Seq Scan on prt2_p1 p2
-         ->  Group
-               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_1.a, prt1_1.b
-                                 ->  Seq Scan on prt1_p2 prt1_1
-                           ->  Sort
-                                 Sort Key: p2_1.a, p2_1.b
-                                 ->  Seq Scan on prt2_p2 p2_1
-         ->  Group
-               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_2.a, prt1_2.b
-                                 ->  Seq Scan on prt1_p3 prt1_2
-                           ->  Sort
-                                 Sort Key: p2_2.a, p2_2.b
-                                 ->  Seq Scan on prt2_p3 p2_2
-(43 rows)
+         ->  Append
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_1.a, prt1_1.b
+                           ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Sort
+                           Sort Key: p2_1.a, p2_1.b
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_2.a, prt1_2.b
+                           ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Sort
+                           Sort Key: p2_2.a, p2_2.b
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_3.a = p2_3.a) AND (prt1_3.b = p2_3.b))
+                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_3.a, prt1_3.b
+                           ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Sort
+                           Sort Key: p2_3.a, p2_3.b
+                           ->  Seq Scan on prt2_p3 p2_3
+(32 rows)
 
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index aae5d51e1c..487fe26906 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -132,7 +133,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(21 rows)
+(22 rows)
 
 -- There are always wait event descriptions for various types.
 select type, count(*) > 0 as ok FROM pg_wait_events
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index e2613d6777..220f0c1629 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1303,24 +1303,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2;
-                        QUERY PLAN                        
-----------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: (q2 IS NOT NULL)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: (q2 IS NOT NULL)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: (q2 IS NOT NULL)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: (q2 IS NOT NULL)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
@@ -1339,24 +1337,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2;
-                       QUERY PLAN                       
---------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                    QUERY PLAN                    
+--------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: ((- q1) = q2)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: ((- q1) = q2)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: ((- q1) = q2)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: ((- q1) = q2)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 75c78be640..5c4e768825 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1172,6 +1172,105 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index ab471bdfff..70f311a6f2 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -208,7 +208,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 
-- 
2.42.0

postgres_fdw.difftext/plain; charset=UTF-8; name=postgres_fdw.diffDownload

-- join with pseudoconstant quals
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1 AND CURRENT_USER = SESSION_USER) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                                                                           QUERY PLAN                                                                                           
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                  QUERY PLAN                                                                   
+-----------------------------------------------------------------------------------------------------------------------------------------------
  Limit
    Output: t1.c1, t2.c1, t1.c3
-   ->  Result
+   ->  Sort
          Output: t1.c1, t2.c1, t1.c3
-         One-Time Filter: (CURRENT_USER = SESSION_USER)
-         ->  Foreign Scan
-               Output: t1.c1, t1.c3, t2.c1
-               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
-(9 rows)
+         Sort Key: t1.c3, t1.c1
+         ->  Result
+               Output: t1.c1, t2.c1, t1.c3
+               One-Time Filter: (CURRENT_USER = SESSION_USER)
+               ->  Foreign Scan
+                     Output: t1.c1, t1.c3, t2.c1
+                     Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+                     Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1"))))
+(12 rows)
 
 -- non-Var items in targetlist of the nullable rel of a join preventing
 -- push-down in some cases

#95

Andrey Lepikhov

a.lepikhov@postgrespro.ru

over 2 years ago

In reply to: Tomas Vondra (#88)

Re: POC: GROUP BY optimization

On 20/7/2023 18:46, Tomas Vondra wrote:

On 7/20/23 08:37, Andrey Lepikhov wrote:

On 3/10/2022 21:56, Tom Lane wrote:

Revert "Optimize order of GROUP BY keys".

This reverts commit db0d67db2401eb6238ccc04c6407a4fd4f985832 and
several follow-on fixes.
...
Since we're hard up against the release deadline for v15, let's
revert these changes for now. We can always try again later.

It may be time to restart the project. As a first step, I rebased the
patch on the current master. It wasn't trivial because of some latest
optimizations (a29eab, 1349d27 and 8d83a5d).
Now, Let's repeat the review and rewrite the current path according to
the reasons uttered in the revert commit.

1) procost = 1.0 - I guess we could make this more realistic by doing
some microbenchmarks and tuning the costs for the most expensive cases.

Ok, some thoughts on this part of the task. As I see, we have not so
many different operators: 26 with fixed width and 16 with variable width:

SELECT o.oid,oprcode,typname,typlen FROM pg_operator o
JOIN pg_type t ON (oprleft = t.oid)
WHERE (oprname='<') AND oprleft=oprright AND typlen>0
ORDER BY o.oid;

SELECT o.oid,oprcode,typname,typlen FROM pg_operator o
JOIN pg_type t ON (oprleft = t.oid)
WHERE (oprname='<') AND oprleft=oprright AND typlen<0
ORDER BY o.oid;

Benchmarking procedure of types with fixed length can be something like
below:

CREATE OR REPLACE FUNCTION pass_sort(typ regtype) RETURNS TABLE (
nrows integer,
exec_time float
) AS $$
DECLARE
data json;
step integer;
BEGIN
SET work_mem='1GB';

FOR step IN 0..3 LOOP
SELECT pow(100, step)::integer INTO nrows;
DROP TABLE IF EXISTS test CASCADE;
EXECUTE format('CREATE TABLE test AS SELECT gs::%s AS x
FROM generate_series(1,%s) AS gs;', typ, nrows);

EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, FORMAT JSON)
SELECT * FROM test ORDER BY (x) DESC INTO data;
SELECT data->0->'Execution Time' INTO exec_time;
RETURN NEXT;
END LOOP;
END;
$$ LANGUAGE plpgsql VOLATILE;

Execution of SELECT * FROM pass_sort('integer'); shows quite linear grow
of the execution time. So, using '2.0 * cpu_operator_cost' as a cost for
the integer type (as a basis) we can calculate costs for other
operators. Variable-width types, i think, could require more complex
technique to check dependency on the length.

Does this way look worthwhile?

--
regards,
Andrey Lepikhov
Postgres Professional

#96

Andrey Lepikhov

a.lepikhov@postgrespro.ru

over 2 years ago

In reply to: Tomas Vondra (#88)

Re: POC: GROUP BY optimization

On 20/7/2023 18:46, Tomas Vondra wrote:

2) estimating quicksort comparisons - This relies on ndistinct
estimates, and I'm not sure how much more reliable we can make those.
Probably not much :-( Not sure what to do about this, the only thing I
can think of is to track "reliability" of the estimates and only do the
reordering if we have high confidence in the estimates. That means we'll
miss some optimization opportunities, but it should limit the risk.

According to this issue, I see two options:
1. Go through the grouping column list and find the most reliable one.
If we have a unique column or column with statistics on the number of
distinct values, which is significantly more than ndistincts for other
grouping columns, we can place this column as the first in the grouping.
It should guarantee the reliability of such a decision, isn't it?
2. If we have extended statistics on distinct values and these
statistics cover some set of first columns in the grouping list, we can
optimize these positions. It also looks reliable.

Any thoughts?

--
regards,
Andrey Lepikhov
Postgres Professional

#97

Andrei Lepikhov

a.lepikhov@postgrespro.ru

over 2 years ago

In reply to: Andrey Lepikhov (#96)

1 attachment(s)

Re: POC: GROUP BY optimization

New version of the patch. Fixed minor inconsistencies and rebased onto
current master.

--
regards,
Andrey Lepikhov
Postgres Professional

Attachments:

0001-Explore-alternative-orderings-of-group-by-pathkeys-d-20231001.patchtext/plain; charset=UTF-8; name=0001-Explore-alternative-orderings-of-group-by-pathkeys-d-20231001.patchDownload

From 2f5a42c8a53286f830e3376ff4d3f8b7d4215b4b Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Wed, 13 Sep 2023 11:20:03 +0700
Subject: [PATCH] Explore alternative orderings of group-by pathkeys during
 optimization.

When evaluating a query with a multi-column GROUP BY clause using sort,
the cost may depend heavily on the order in which the keys are compared when
building the groups. Grouping does not imply any ordering, so we can compare
the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg,
we simply compared keys in the order specified in the query. This commit
explores alternative ordering of the keys, trying to find a cheaper one.

In principle, we might generate grouping paths for all permutations of the keys
and leave the rest to the optimizer. But that might get very expensive, so we
try to pick only a couple interesting orderings based on both local and global
information.

When planning the grouping path, we explore statistics (number of distinct
values, cost of the comparison function) for the keys and reorder them
to minimize comparison costs. Intuitively, it may be better to perform more
expensive comparisons (for complex data types, etc.) last because maybe
the cheaper comparisons will be enough. Similarly, the higher the cardinality
of a key, the lower the probability we'll need to compare more keys. The patch
generates and costs various orderings, picking the cheapest ones.

The ordering of group keys may interact with other parts of the query, some of
which may not be known while planning the grouping. For example, there may be
an explicit ORDER BY clause or some other ordering-dependent operation higher up
in the query, and using the same ordering may allow using either incremental
sort or even eliminating the sort entirely.

The patch generates orderings and picks those, minimizing the comparison cost
(for various path keys), and then adds orderings that might be useful for
operations higher up in the plan (ORDER BY, etc.). Finally, it always keeps
the ordering specified in the query, assuming the user might have additional
insights.

This introduces a new GUC enable_group_by_reordering so that the optimization
may be disabled if needed.
---
 .../postgres_fdw/expected/postgres_fdw.out    |  36 +-
 src/backend/optimizer/path/costsize.c         | 363 ++++++++++-
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 600 ++++++++++++++++++
 src/backend/optimizer/plan/planner.c          | 465 ++++++++------
 src/backend/optimizer/util/pathnode.c         |   4 +-
 src/backend/utils/adt/selfuncs.c              |  38 +-
 src/backend/utils/misc/guc_tables.c           |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/cost.h                  |   4 +-
 src/include/optimizer/paths.h                 |   7 +
 src/include/utils/selfuncs.h                  |   5 +
 src/test/regress/expected/aggregates.out      | 244 ++++++-
 .../regress/expected/incremental_sort.out     |   2 +-
 src/test/regress/expected/join.out            |  51 +-
 src/test/regress/expected/merge.out           |  15 +-
 .../regress/expected/partition_aggregate.out  |  44 +-
 src/test/regress/expected/partition_join.out  |  75 +--
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/expected/union.out           |  60 +-
 src/test/regress/sql/aggregates.sql           |  99 +++
 src/test/regress/sql/incremental_sort.sql     |   2 +-
 23 files changed, 1769 insertions(+), 382 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 144c114d0f..63af7feabe 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2319,18 +2319,21 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
 -- join with pseudoconstant quals
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1 AND CURRENT_USER = SESSION_USER) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                                                                           QUERY PLAN                                                                                           
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                  QUERY PLAN                                                                   
+-----------------------------------------------------------------------------------------------------------------------------------------------
  Limit
    Output: t1.c1, t2.c1, t1.c3
-   ->  Result
+   ->  Sort
          Output: t1.c1, t2.c1, t1.c3
-         One-Time Filter: (CURRENT_USER = SESSION_USER)
-         ->  Foreign Scan
-               Output: t1.c1, t1.c3, t2.c1
-               Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
-               Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST
-(9 rows)
+         Sort Key: t1.c3, t1.c1
+         ->  Result
+               Output: t1.c1, t2.c1, t1.c3
+               One-Time Filter: (CURRENT_USER = SESSION_USER)
+               ->  Foreign Scan
+                     Output: t1.c1, t1.c3, t2.c1
+                     Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
+                     Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1"))))
+(12 rows)
 
 -- non-Var items in targetlist of the nullable rel of a join preventing
 -- push-down in some cases
@@ -9609,13 +9612,16 @@ SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER J
 -- left outer join + nullable clause
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
-                                                                                                                     QUERY PLAN                                                                                                                     
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                    QUERY PLAN                                                                                     
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
    Output: t1.a, fprt2.b, fprt2.c
-   Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
-   Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10)) ORDER BY r5.a ASC NULLS LAST, r6.b ASC NULLS LAST, r6.c ASC NULLS LAST
-(4 rows)
+   Sort Key: t1.a, fprt2.b, fprt2.c
+   ->  Foreign Scan
+         Output: t1.a, fprt2.b, fprt2.c
+         Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
+         Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10))
+(7 rows)
 
 SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
  a | b |  c   
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index d6ceafd51c..6f11f679ba 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1816,6 +1816,327 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
 									rterm->pathtarget->width);
 }
 
+/*
+ * is_fake_var
+ *		Workaround for generate_append_tlist() which generates fake Vars with
+ *		varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ *		Returns relative complexity of comparing two values based on its width.
+ * The idea behind is that the comparison becomes more expensive the longer the
+ * value is. Return value is in cpu_operator_cost units.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+	double		width = -1.0;	/* fake value */
+
+	if (IsA(expr, RelabelType))
+		expr = (Expr *) ((RelabelType *) expr)->arg;
+
+	/* Try to find actual stat in corresponding relation */
+	if (IsA(expr, Var))
+	{
+		Var		   *var = (Var *) expr;
+
+		if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+		{
+			RelOptInfo *rel = root->simple_rel_array[var->varno];
+
+			if (rel != NULL &&
+				var->varattno >= rel->min_attr &&
+				var->varattno <= rel->max_attr)
+			{
+				int			ndx = var->varattno - rel->min_attr;
+
+				if (rel->attr_widths[ndx] > 0)
+					width = rel->attr_widths[ndx];
+			}
+		}
+	}
+
+	/* Didn't find any actual stats, try using type width instead. */
+	if (width < 0.0)
+	{
+		Node	   *node = (Node *) expr;
+
+		width = get_typavgwidth(exprType(node), exprTypmod(node));
+	}
+
+	/*
+	 * Values are passed as Datum type, so comparisons can't be cheaper than
+	 * comparing a Datum value.
+	 *
+	 * FIXME I find this reasoning questionable. We may pass int2, and
+	 * comparing it is probably a bit cheaper than comparing a bigint.
+	 */
+	if (width <= sizeof(Datum))
+		return 1.0;
+
+	/*
+	 * We consider the cost of a comparison not to be directly proportional to
+	 * width of the argument, because widths of the arguments could be
+	 * slightly different (we only know the average width for the whole
+	 * column). So we use log16(width) as an estimate.
+	 */
+	return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ *		compute CPU cost of sort (i.e. in-memory)
+ *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, columns may have a very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algorithm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from:
+ *
+ * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
+ * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of identical
+ * tuples with value Ki, then the estimate of number of comparisons is:
+ *
+ *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ *
+ * We assume all Xi the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ *	N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparisons on ck is roughly
+ *
+ *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ *	N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column width, not just the comparator cost.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys. In this case, it will fallback to
+ * simple comparison cost estimate.
+ */
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+					  Cost comparison_cost, double tuples, double output_tuples,
+					  bool heapSort)
+{
+	Cost		per_tuple_cost = 0.0;
+	ListCell   *lc;
+	List	   *pathkeyExprs = NIL;
+	double		tuplesPerPrevGroup = tuples;
+	double		totalFuncCost = 1.0;
+	bool		has_fake_var = false;
+	int			i = 0;
+	Oid			prev_datatype = InvalidOid;
+	List	   *cache_varinfos = NIL;
+
+	/* fallback if pathkeys is unknown */
+	if (list_length(pathkeys) == 0)
+	{
+		/*
+		 * If we'll use a bounded heap-sort keeping just K tuples in memory,
+		 * for a total number of tuple comparisons of N log2 K; but the
+		 * constant factor is a bit higher than for quicksort. Tweak it so
+		 * that the cost curve is continuous at the crossover point.
+		 */
+		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+		/* add cost provided by caller */
+		per_tuple_cost += comparison_cost;
+
+		return per_tuple_cost * tuples;
+	}
+
+	/*
+	 * Computing total cost of sorting takes into account the per-column
+	 * comparison function cost.  We try to compute the needed number of
+	 * comparisons per column.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(lc);
+		EquivalenceMember *em;
+		double		nGroups,
+					correctedNGroups;
+		Cost		funcCost = 1.0;
+
+		/*
+		 * We believe that equivalence members aren't very different, so, to
+		 * estimate cost we consider just the first member.
+		 */
+		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+		if (em->em_datatype != InvalidOid)
+		{
+			/* do not lookup funcCost if the data type is the same */
+			if (prev_datatype != em->em_datatype)
+			{
+				Oid			sortop;
+				QualCost	cost;
+
+				sortop = get_opfamily_member(pathkey->pk_opfamily,
+											 em->em_datatype, em->em_datatype,
+											 pathkey->pk_strategy);
+
+				cost.startup = 0;
+				cost.per_tuple = 0;
+				add_function_cost(root, get_opcode(sortop), NULL, &cost);
+
+				/*
+				 * add_function_cost returns the product of cpu_operator_cost
+				 * and procost, but we need just procost, co undo that.
+				 */
+				funcCost = cost.per_tuple / cpu_operator_cost;
+
+				prev_datatype = em->em_datatype;
+			}
+		}
+
+		/* factor in the width of the values in this column */
+		funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+		/* now we have per-key cost, so add to the running total */
+		totalFuncCost += funcCost;
+
+		/* remember if we have found a fake Var in pathkeys */
+		has_fake_var |= is_fake_var(em->em_expr);
+		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+		/*
+		 * We need to calculate the number of comparisons for this column,
+		 * which requires knowing the group size. So we estimate the number of
+		 * groups by calling estimate_num_groups_incremental(), which
+		 * estimates the group size for "new" pathkeys.
+		 *
+		 * Note: estimate_num_groups_incremental does not handle fake Vars, so
+		 * use a default estimate otherwise.
+		 */
+		if (!has_fake_var)
+			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+													  tuplesPerPrevGroup, NULL, NULL,
+													  &cache_varinfos,
+													  list_length(pathkeyExprs) - 1);
+		else if (tuples > 4.0)
+
+			/*
+			 * Use geometric mean as estimation if there are no stats.
+			 *
+			 * We don't use DEFAULT_NUM_DISTINCT here, because that's used for
+			 * a single column, but here we're dealing with multiple columns.
+			 */
+			nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
+		else
+			nGroups = tuples;
+
+		/*
+		 * Presorted keys are not considered in the cost above, but we still
+		 * do have to compare them in the qsort comparator. So make sure to
+		 * factor in the cost in that case.
+		 */
+		if (i >= nPresortedKeys)
+		{
+			if (heapSort)
+			{
+				/*
+				 * have to keep at least one group, and a multiple of group
+				 * size
+				 */
+				correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup);
+			}
+			else
+				/* all groups in the input */
+				correctedNGroups = nGroups;
+
+			correctedNGroups = Max(1.0, ceil(correctedNGroups));
+
+			per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+		}
+
+		i++;
+
+		/*
+		 * Once we get single-row group, it means tuples in the group are
+		 * unique and we can skip all remaining columns.
+		 */
+		if (tuplesPerPrevGroup <= nGroups)
+			break;
+
+		/*
+		 * Uniform distributions with all groups being of the same size are
+		 * the best case, with nice smooth behavior. Real-world distributions
+		 * tend not to be uniform, though, and we don't have any reliable
+		 * easy-to-use information. As a basic defense against skewed
+		 * distributions, we use a 1.5 factor to make the expected group a bit
+		 * larger, but we need to be careful not to make the group larger than
+		 * in the preceding step.
+		 */
+		tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
+								 ceil(1.5 * tuplesPerPrevGroup / nGroups));
+	}
+
+	list_free(pathkeyExprs);
+
+	/* per_tuple_cost is in cpu_operator_cost units */
+	per_tuple_cost *= cpu_operator_cost;
+
+	/*
+	 * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles
+	 * E. Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort
+	 * estimation formula has additional term proportional to number of tuples
+	 * (see Chapter 8.2 and Theorem 4.1). That affects cases with a low number
+	 * of tuples, approximately less than 1e4. We could implement it as an
+	 * additional multiplier under the logarithm, but we use a bit more
+	 * complex formula which takes into account the number of unique tuples
+	 * and it's not clear how to combine the multiplier with the number of
+	 * groups. Estimate it as 10 cpu_operator_cost units.
+	 */
+	per_tuple_cost += 10 * cpu_operator_cost;
+
+	per_tuple_cost += comparison_cost;
+
+	return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+				   double tuples)
+{
+	return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+								 0, tuples, tuples, false);
+}
+
 /*
  * cost_tuplesort
  *	  Determines and returns the cost of sorting a relation using tuplesort,
@@ -1832,7 +2153,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * number of initial runs formed and M is the merge order used by tuplesort.c.
  * Since the average initial run should be about sort_mem, we have
  *		disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- *		cpu = comparison_cost * t * log2(t)
+ * 		and cpu cost (computed by compute_cpu_sort_cost()).
  *
  * If the sort is bounded (i.e., only the first k result tuples are needed)
  * and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1851,9 +2172,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * 'comparison_cost' is the extra cost per comparison, if any
  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later
  */
 static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
 			   double tuples, int width,
 			   Cost comparison_cost, int sort_mem,
 			   double limit_tuples)
@@ -1870,9 +2193,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	/* Include the default cost-per-comparison */
-	comparison_cost += 2.0 * cpu_operator_cost;
-
 	/* Do we have a useful LIMIT? */
 	if (limit_tuples > 0 && limit_tuples < tuples)
 	{
@@ -1896,12 +2216,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 		double		log_runs;
 		double		npageaccesses;
 
-		/*
-		 * CPU costs
-		 *
-		 * Assume about N log2 N comparisons
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		/* CPU costs */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 
 		/* Disk costs */
 
@@ -1917,18 +2235,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
 	}
 	else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
 	{
-		/*
-		 * We'll use a bounded heap-sort keeping just K tuples in memory, for
-		 * a total number of tuple comparisons of N log2 K; but the constant
-		 * factor is a bit higher than for quicksort.  Tweak it so that the
-		 * cost curve is continuous at the crossover point.
-		 */
-		*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+		/* We'll use a bounded heap-sort keeping just K tuples in memory. */
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  output_tuples, true);
 	}
 	else
 	{
 		/* We'll use plain quicksort on all the input tuples */
-		*startup_cost = comparison_cost * tuples * LOG2(tuples);
+		*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+											  comparison_cost, tuples,
+											  tuples, false);
 	}
 
 	/*
@@ -2042,7 +2359,7 @@ cost_incremental_sort(Path *path,
 	 * Estimate the average cost of sorting of one group where presorted keys
 	 * are equal.
 	 */
-	cost_tuplesort(&group_startup_cost, &group_run_cost,
+	cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
 				   group_tuples, width, comparison_cost, sort_mem,
 				   limit_tuples);
 
@@ -2102,7 +2419,7 @@ cost_sort(Path *path, PlannerInfo *root,
 	Cost		startup_cost;
 	Cost		run_cost;
 
-	cost_tuplesort(&startup_cost, &run_cost,
+	cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
 				   tuples, width,
 				   comparison_cost, sort_mem,
 				   limit_tuples);
@@ -2200,7 +2517,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2268,7 +2585,7 @@ cost_append(AppendPath *apath)
 					 * any child.
 					 */
 					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
+							  root,
 							  pathkeys,
 							  subpath->total_cost,
 							  subpath->rows,
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 7fa502d6e2..07edd4f38e 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy the sortref if it wasn't set yet. That may happen if
+				 * the ec was constructed from WHERE clause, i.e. it doesn't
+				 * have a target reference at all.
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index fdb60aaa8d..ac3e13ddd4 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,17 +17,24 @@
  */
 #include "postgres.h"
 
+#include <float.h>
+
+#include "miscadmin.h"
 #include "access/stratnum.h"
 #include "catalog/pg_opfamily.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool		enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -350,6 +357,546 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses,
+							   int num_groupby_pathkeys)
+{
+	List	   *new_group_pathkeys = NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append it to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find the first pathkey without a matching GROUP BY key, the
+	 * rest of the pathkeys are useless and can't be used to evaluate the
+	 * grouping, so we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			   *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	   *sgc;
+
+		if (foreach_current_index(lc) >= num_groupby_pathkeys ||
+			!list_member_ptr(*group_pathkeys, pathkey))
+			/* */
+			break;
+
+		/* abort on first mismatch */
+		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+											*group_clauses);
+		if (!sgc)
+			/* The grouping clause is not cover this paathkey */
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * Used to generate all permutations of a pathkey list.
+ */
+typedef struct PathkeyMutatorState
+{
+	List	   *elemsList;
+	ListCell  **elemCells;
+	void	  **elems;
+	int		   *positions;
+	int			mutatorNColumns;
+	int			count;
+} PathkeyMutatorState;
+
+
+/*
+ * PathkeyMutatorInit
+ *		Initialize state of the permutation generator.
+ *
+ * We want to generate permutations of elements in the "elems" list. We may want
+ * to skip some number of elements at the beginning (when treating as presorted)
+ * or at the end (we only permute a limited number of group keys).
+ *
+ * The list is decomposed into elements, and we also keep pointers to individual
+ * cells. This allows us to build the permuted list quickly and cheaply, without
+ * creating any copies.
+ */
+static void
+PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
+{
+	int			i;
+	int			n = end - start;
+	ListCell   *lc;
+
+	memset(state, 0, sizeof(*state));
+
+	state->mutatorNColumns = n;
+
+	state->elemsList = list_copy(elems);
+
+	state->elems = palloc(sizeof(void *) * n);
+	state->elemCells = palloc(sizeof(ListCell *) * n);
+	state->positions = palloc(sizeof(int) * n);
+
+	i = 0;
+	for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
+	{
+		state->elemCells[i] = lc;
+		state->elems[i] = lfirst(lc);
+		state->positions[i] = i + 1;
+		i++;
+
+		if (i >= n)
+			break;
+	}
+}
+
+/* Swap two elements of an array. */
+static void
+PathkeyMutatorSwap(int *a, int i, int j)
+{
+	int			s = a[i];
+
+	a[i] = a[j];
+	a[j] = s;
+}
+
+/*
+ * Generate the next permutation of elements.
+ */
+static bool
+PathkeyMutatorNextSet(int *a, int n)
+{
+	int			j,
+				k,
+				l,
+				r;
+
+	j = n - 2;
+
+	while (j >= 0 && a[j] >= a[j + 1])
+		j--;
+
+	if (j < 0)
+		return false;
+
+	k = n - 1;
+
+	while (k > 0 && a[j] >= a[k])
+		k--;
+
+	PathkeyMutatorSwap(a, j, k);
+
+	l = j + 1;
+	r = n - 1;
+
+	while (l < r)
+		PathkeyMutatorSwap(a, l++, r--);
+
+	return true;
+}
+
+/*
+ * PathkeyMutatorNext
+ *		Generate the next permutation of list of elements.
+ *
+ * Returns the next permutation (as a list of elements) or NIL if there are no
+ * more permutations.
+ */
+static List *
+PathkeyMutatorNext(PathkeyMutatorState *state)
+{
+	int			i;
+
+	state->count++;
+
+	/* first permutation is original list */
+	if (state->count == 1)
+		return state->elemsList;
+
+	/* when there are no more permutations, return NIL */
+	if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
+	{
+		pfree(state->elems);
+		pfree(state->elemCells);
+		pfree(state->positions);
+
+		list_free(state->elemsList);
+
+		return NIL;
+	}
+
+	/* update the list cells to point to the right elements */
+	for (i = 0; i < state->mutatorNColumns; i++)
+		lfirst(state->elemCells[i]) =
+			(void *) state->elems[state->positions[i] - 1];
+
+	return state->elemsList;
+}
+
+/*
+ * Cost of comparing pathkeys.
+ */
+typedef struct PathkeySortCost
+{
+	Cost		cost;
+	PathKey    *pathkey;
+} PathkeySortCost;
+
+static int
+pathkey_sort_cost_comparator(const void *_a, const void *_b)
+{
+	const PathkeySortCost *a = (PathkeySortCost *) _a;
+	const PathkeySortCost *b = (PathkeySortCost *) _b;
+
+	if (a->cost < b->cost)
+		return -1;
+	else if (a->cost == b->cost)
+		return 0;
+	return 1;
+}
+
+/*
+ * get_cheapest_group_keys_order
+ *		Reorders the group pathkeys / clauses to minimize the comparison cost.
+ *
+ * Given the list of pathkeys in '*group_pathkeys', we try to arrange these
+ * in an order that minimizes the sort costs that will be incurred by the
+ * GROUP BY.  The costs mainly depend on the cost of the sort comparator
+ * function(s) and the number of distinct values in each column of the GROUP
+ * BY clause (*group_clauses).  Sorting on subsequent columns is only required
+ * for tiebreak situations where two values sort equally.
+ *
+ * In case the input is partially sorted, only the remaining pathkeys are
+ * considered.  'n_preordered' denotes how many of the leading *group_pathkeys
+ * the input is presorted by.
+ *
+ * Returns true and sets *group_pathkeys and *group_clauses to the newly
+ * ordered versions of the lists that were passed in via these parameters.
+ * If no reordering was deemed necessary then we return false, in which case
+ * the *group_pathkeys and *group_clauses lists are left untouched. The
+ * original *group_pathkeys and *group_clauses parameter values are never
+ * destructively modified in place.
+ */
+static bool
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+							  List **group_pathkeys, List **group_clauses,
+							  int n_preordered)
+{
+	List				   *new_group_pathkeys = NIL;
+	List				   *new_group_clauses = NIL;
+	List				   *var_group_pathkeys;
+	ListCell			   *cell;
+	PathkeyMutatorState		mstate;
+	double					cheapest_sort_cost = DBL_MAX;
+	int						nFreeKeys;
+	int						nToPermute;
+
+	/* If there are less than 2 unsorted pathkeys, we're done. */
+	if (root->num_groupby_pathkeys - n_preordered < 2)
+		return false;
+
+	/*
+	 * We could exhaustively cost all possible orderings of the pathkeys, but
+	 * for a large number of pathkeys it might be prohibitively expensive. So
+	 * we try to apply simple cheap heuristics first - we sort the pathkeys by
+	 * sort cost (as if the pathkey was sorted independently) and then check
+	 * only the four cheapest pathkeys. The remaining pathkeys are kept
+	 * ordered by cost.
+	 *
+	 * XXX This is a very simple heuristics, but likely to work fine for most
+	 * cases (because the number of GROUP BY clauses tends to be lower than
+	 * 4). But it ignores how the number of distinct values in each pathkey
+	 * affects the following steps. It might be better to use "more expensive"
+	 * pathkey first if it has many distinct values, because it then limits
+	 * the number of comparisons for the remaining pathkeys. But evaluating
+	 * that is likely quite the expensive.
+	 */
+	nFreeKeys = root->num_groupby_pathkeys - n_preordered;
+	nToPermute = 4;
+	if (nFreeKeys > nToPermute)
+	{
+		PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
+		PathkeySortCost *cost = costs;
+		int i;
+
+		/*
+		 * Estimate cost for sorting individual pathkeys skipping the
+		 * pre-ordered pathkeys.
+		 */
+		for_each_from(cell, *group_pathkeys, n_preordered)
+		{
+			PathKey    *pathkey = (PathKey *) lfirst(cell);
+			List	   *to_cost;
+
+			if (foreach_current_index(cell) >= root->num_groupby_pathkeys)
+				break;
+
+			to_cost = list_make1(pathkey);
+			cost->pathkey = pathkey;
+			cost->cost = cost_sort_estimate(root, to_cost, 0, nrows);
+			cost++;
+
+			list_free(to_cost);
+		}
+
+		/* sort the pathkeys by sort cost in ascending order */
+		qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
+
+		/*
+		 * Rebuild the list of pathkeys - first the preordered ones, then the
+		 * rest ordered by cost.
+		 */
+		new_group_pathkeys = list_copy_head(*group_pathkeys, n_preordered);
+
+		for (i = 0; i < nFreeKeys; i++)
+			new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
+
+		new_group_pathkeys = list_copy_tail(*group_pathkeys,
+											root->num_groupby_pathkeys);
+		pfree(costs);
+	}
+	else
+	{
+		/* Copy the list, so that we can free the new list by list_free. */
+		new_group_pathkeys = list_copy(*group_pathkeys);
+		nToPermute = nFreeKeys;
+	}
+
+	Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
+
+	/*
+	 * Generate pathkey lists with permutations of the first nToPermute
+	 * pathkeys.
+	 *
+	 * XXX We simply calculate sort cost for each individual pathkey list, but
+	 * there's room for two dynamic programming optimizations here. Firstly,
+	 * we may pass the current "best" cost to cost_sort_estimate so that it
+	 * can "abort" if the estimated pathkeys list exceeds it. Secondly, it
+	 * could pass the return information about the position when it exceeded
+	 * the cost, and we could skip all permutations with the same prefix.
+	 *
+	 * Imagine we've already found ordering with cost C1, and we're evaluating
+	 * another ordering - cost_sort_estimate() calculates cost by adding the
+	 * pathkeys one by one (more or less), and the cost only grows. If at any
+	 * point it exceeds C1, it can't possibly be "better" so we can discard
+	 * it. But we also know that we can discard all ordering with the same
+	 * prefix, because if we're estimating (a,b,c,d) and we exceed C1 at (a,b)
+	 * then the same thing will happen for any ordering with this prefix.
+	 */
+	PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered,
+					   n_preordered + nToPermute);
+
+	while ((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
+	{
+		Cost		cost;
+
+		cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
+
+		if (cost < cheapest_sort_cost)
+		{
+			list_free(new_group_pathkeys);
+			new_group_pathkeys = list_copy(var_group_pathkeys);
+			cheapest_sort_cost = cost;
+		}
+	}
+
+	Assert(list_length(new_group_pathkeys) == list_length(root->group_pathkeys));
+
+	/* Reorder the group clauses according to the reordered pathkeys. */
+	foreach(cell, new_group_pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(cell);
+
+		if (foreach_current_index(cell) >= root->num_groupby_pathkeys)
+			break;
+
+		new_group_clauses =
+			lappend(new_group_clauses,
+					get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+											*group_clauses));
+	}
+
+	/* Just append the rest GROUP BY clauses */
+	new_group_clauses = list_concat_unique_ptr(new_group_clauses,
+											   *group_clauses);
+	*group_pathkeys = new_group_pathkeys;
+	*group_clauses = new_group_clauses;
+	Assert(list_length(new_group_clauses) == list_length(root->processed_groupClause));
+
+	return true;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to minimize the sort cost
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reordered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
+ *   the tail reordered to minimize the sort cost
+ *
+ * There are other potentially interesting orderings (e.g. it might be best to
+ * match the first ORDER BY key, order the remaining keys differently and then
+ * rely on the incremental sort to fix this), but we ignore those for now. To
+ * make this work we'd have to pretty much generate all possible permutations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+	int			n_preordered = 0;
+
+	List	   *pathkeys = root->group_pathkeys;
+	List	   *clauses = root->processed_groupClause;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+	infos = lappend(infos, info);
+
+	/*
+	 * Should we try generating alternative orderings of the group keys? If
+	 * not, we produce only the order specified in the query, i.e. the
+	 * optimization is effectively disabled.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost, ignoring both the
+	 * target ordering (ORDER BY) and ordering of the input path.
+	 */
+	if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered))
+	{
+		Assert(list_length(pathkeys) == list_length(clauses));
+
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to
+	 * match as much of the ordering as possible - we get this sort for free
+	 * (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefiting from incremental sort
+	 * as much as possible.
+	 *
+	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
+	 * info in this case.
+	 */
+	if (path_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
+													  &pathkeys,
+													  &clauses,
+													  root->num_groupby_pathkeys);
+
+		/* reorder the tail to minimize sort cost */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause, but only if set debug_group_by_match_order_by).
+	 */
+	if (root->sort_pathkeys)
+	{
+		n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+													  &pathkeys,
+													  &clauses,
+													  root->num_groupby_pathkeys);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, in
+		 * which case get_cheapest_group_keys_order returns false. But we
+		 * still want to keep the keys reordered to sort_pathkeys.
+		 */
+		get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
+									  n_preordered);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1939,6 +2486,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the ordering. The
+ * ordering may not be "complete" and may require incremental sort, but that's
+ * fine. So we simply count prefix pathkeys with a matching group key, and
+ * stop once we find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordering not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell   *key;
+	int			n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1953,6 +2548,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1988,6 +2586,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index aafef24cf1..4727a5a157 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2811,8 +2811,9 @@ remove_useless_groupby_columns(PlannerInfo *root)
  *
  * In principle it might be interesting to consider other orderings of the
  * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost.  We don't
- * bother with that, though.  Hashed grouping will frequently win anyway.
+ * possible plans (eg an indexscan) and thereby reduce cost.  However, we
+ * don't yet have sufficient information to do that here, so that's left until
+ * later in planning.  See get_useful_group_keys_orderings().
  *
  * Note: we need no comparable processing of the distinctClause because
  * the parser already enforced that that matches ORDER BY.
@@ -3469,6 +3470,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   tlist,
 												   true,
 												   &sortable);
+
 		if (!sortable)
 		{
 			/* Can't sort; no point in considering aggregate ordering either */
@@ -6843,90 +6845,108 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
+
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (!is_sorted)
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+				bool			is_sorted;
+				int				presorted_keys;
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (!is_sorted)
+				{
+					/*
+					 * Try at least sorting the cheapest path and also try
+					 * incrementally sorting any path which is partially sorted
+					 * already (no need to deal with paths which have presorted
+					 * keys when incremental sort is disabled unless it's the
+					 * cheapest input path).
+					 */
+					if (path != cheapest_path &&
+						(presorted_keys == 0 || !enable_incremental_sort))
+						continue;
+
+					/*
+					 * We've no need to consider both a sort and incremental sort.
+					 * We'll just do a sort if there are no presorted keys and an
+					 * incremental sort when there are presorted keys.
+					 */
+					if (presorted_keys == 0 || !enable_incremental_sort)
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					else
+						path = (Path *) create_incremental_sort_path(root,
+																	 grouped_rel,
+																	 path,
+																	 info->pathkeys,
+																	 presorted_keys,
+																	 -1.0);
+				}
+
+				/* Now decide what to stick atop it */
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash,
+												gd, agg_costs, dNumGroups);
+				}
+				else if (parse->hasAggs)
+				{
+					/*
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_agg_path(root,
 										 grouped_rel,
 										 path,
 										 grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_SIMPLE,
-										 root->processed_groupClause,
+										 info->clauses,
 										 havingQual,
 										 agg_costs,
 										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				}
+				else if (parse->groupClause)
+				{
+					/*
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_group_path(root,
 										   grouped_rel,
 										   path,
-										   root->processed_groupClause,
+										   info->clauses,
 										   havingQual,
 										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
+				}
+				else
+				{
+					/* Other cases should have been handled above */
+					Assert(false);
+				}
 			}
 		}
 
@@ -6938,69 +6958,84 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
-				bool		is_sorted;
-				int			presorted_keys;
+				List	   *pathkey_orderings = NIL;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys);
 
-				if (!is_sorted)
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach(lc2, pathkey_orderings)
 				{
-					/*
-					 * Try at least sorting the cheapest path and also try
-					 * incrementally sorting any path which is partially
-					 * sorted already (no need to deal with paths which have
-					 * presorted keys when incremental sort is disabled unless
-					 * it's the cheapest input path).
-					 */
-					if (path != partially_grouped_rel->cheapest_total_path &&
-						(presorted_keys == 0 || !enable_incremental_sort))
-						continue;
+					bool			is_sorted;
+					int				presorted_keys;
+					PathKeyInfo    *info = (PathKeyInfo *) lfirst(lc2);
 
-					/*
-					 * We've no need to consider both a sort and incremental
-					 * sort.  We'll just do a sort if there are no pre-sorted
-					 * keys and an incremental sort when there are presorted
-					 * keys.
-					 */
-					if (presorted_keys == 0 || !enable_incremental_sort)
-						path = (Path *) create_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 -1.0);
-					else
-						path = (Path *) create_incremental_sort_path(root,
-																	 grouped_rel,
-																	 path,
-																	 root->group_pathkeys,
-																	 presorted_keys,
-																	 -1.0);
-				}
+					is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+															path->pathkeys,
+															&presorted_keys);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
+					if (!is_sorted)
+					{
+						/*
+						 * Try at least sorting the cheapest path and also try
+						 * incrementally sorting any path which is partially
+						 * sorted already (no need to deal with paths which have
+						 * presorted keys when incremental sort is disabled unless
+						 * it's the cheapest input path).
+						 */
+						if (path != partially_grouped_rel->cheapest_total_path &&
+							(presorted_keys == 0 || !enable_incremental_sort))
+							continue;
+
+						/*
+						 * We've no need to consider both a sort and incremental
+						 * sort.  We'll just do a sort if there are no pre-sorted
+						 * keys and an incremental sort when there are presorted
+						 * keys.
+						 */
+						if (presorted_keys == 0 || !enable_incremental_sort)
+							path = (Path *) create_sort_path(root,
+															 grouped_rel,
+															 path,
+															 info->pathkeys,
+															 -1.0);
+						else
+							path = (Path *) create_incremental_sort_path(root,
+																		 grouped_rel,
+																		 path,
+																		 info->pathkeys,
+																		 presorted_keys,
+																		 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 root->processed_groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
+					else
+						add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   root->processed_groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 
+				}
 			}
 		}
 	}
@@ -7203,66 +7238,85 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-			if (!is_sorted)
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_total_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+				bool			is_sorted;
+				int				presorted_keys;
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			if (parse->hasAggs)
-				add_path(partially_grouped_rel, (Path *)
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+				if (!is_sorted)
+				{
+					/*
+					 * Try at least sorting the cheapest path and also try
+					 * incrementally sorting any path which is partially sorted
+					 * already (no need to deal with paths which have presorted
+					 * keys when incremental sort is disabled unless it's the
+					 * cheapest input path).
+					 */
+					if (path != cheapest_total_path &&
+						(presorted_keys == 0 || !enable_incremental_sort))
+						continue;
+
+					/*
+					 * We've no need to consider both a sort and incremental sort.
+					 * We'll just do a sort if there are no presorted keys and an
+					 * incremental sort when there are presorted keys.
+					 */
+					if (presorted_keys == 0 || !enable_incremental_sort)
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					else
+						path = (Path *) create_incremental_sort_path(root,
+																	 partially_grouped_rel,
+																	 path,
+																	 info->pathkeys,
+																	 presorted_keys,
+																	 -1.0);
+				}
+
+				if (parse->hasAggs)
+					add_path(partially_grouped_rel, (Path *)
 						 create_agg_path(root,
 										 partially_grouped_rel,
 										 path,
 										 partially_grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_INITIAL_SERIAL,
-										 root->processed_groupClause,
+										 info->clauses,
 										 NIL,
 										 agg_partial_costs,
 										 dNumPartialGroups));
-			else
-				add_path(partially_grouped_rel, (Path *)
-						 create_group_path(root,
-										   partially_grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   NIL,
-										   dNumPartialGroups));
+				else
+					add_path(partially_grouped_rel, (Path *)
+							 create_group_path(root,
+											   partially_grouped_rel,
+											   path,
+											   info->clauses,
+											   NIL,
+											   dNumPartialGroups));
+			}
 		}
 	}
 
@@ -7271,67 +7325,86 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
+
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (!is_sorted)
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_partial_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+				bool		is_sorted;
+				int			presorted_keys;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (!is_sorted)
+				{
+					/*
+					 * Try at least sorting the cheapest path and also try
+					 * incrementally sorting any path which is partially sorted
+					 * already (no need to deal with paths which have presorted
+					 * keys when incremental sort is disabled unless it's the
+					 * cheapest input path).
+					 */
+					if (path != cheapest_partial_path &&
+						(presorted_keys == 0 || !enable_incremental_sort))
+						continue;
+
+					/*
+					 * We've no need to consider both a sort and incremental sort.
+					 * We'll just do a sort if there are no presorted keys and an
+					 * incremental sort when there are presorted keys.
+					 */
+					if (presorted_keys == 0 || !enable_incremental_sort)
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					else
+						path = (Path *) create_incremental_sort_path(root,
+																	 partially_grouped_rel,
+																	 path,
+																	 info->pathkeys,
+																	 presorted_keys,
+																	 -1.0);
+				}
+
+				if (parse->hasAggs)
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_agg_path(root,
 												 partially_grouped_rel,
 												 path,
 												 partially_grouped_rel->reltarget,
 												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 												 AGGSPLIT_INITIAL_SERIAL,
-												 root->processed_groupClause,
+												 info->clauses,
 												 NIL,
 												 agg_partial_costs,
 												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
+				else
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_group_path(root,
 												   partially_grouped_rel,
 												   path,
-												   root->processed_groupClause,
+												   info->clauses,
 												   NIL,
 												   dNumPartialPartialGroups));
+			}
 		}
 	}
 
@@ -7445,6 +7518,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	 * We can also skip the entire loop when we only have a single-item
 	 * group_pathkeys because then we can't possibly have a presorted prefix
 	 * of the list without having the list be fully sorted.
+	 *
+	 * XXX Shouldn't this also consider the group-key-reordering?
 	 */
 	if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
 		return;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 211ba65389..7b8d0cd294 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1346,12 +1346,12 @@ create_append_path(PlannerInfo *root,
 			pathnode->path.total_cost = child->total_cost;
 		}
 		else
-			cost_append(pathnode);
+			cost_append(pathnode, root);
 		/* Must do this last, else cost_append complains */
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index c4fcd0076e..cb7e67099e 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3386,11 +3386,28 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset, EstimationInfo *estinfo)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, estinfo,
+										   NULL, 0);
+}
+
+/*
+ * estimate_num_groups_incremental
+ *		An estimate_num_groups variant, optimized for cases that are adding the
+ *		expressions incrementally (e.g. one by one).
+ */
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+								double input_rows,
+								List **pgset, EstimationInfo *estinfo,
+								List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i,
+				j;
 
 	/* Zero the estinfo output parameter, if non-NULL */
 	if (estinfo != NULL)
@@ -3421,7 +3438,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3430,6 +3447,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++;			/* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3499,7 +3524,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3516,6 +3545,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 16ec6c5ef0..9701d6c70f 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1027,6 +1027,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables reordering of GROUP BY keys."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d08d55c3fe..0fcd023c38 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -391,6 +391,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_group_by_reordering = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 5702fbba60..15ba2f8218 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1456,6 +1456,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index bee090ffc2..bbb1132473 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -116,7 +116,9 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *apath);
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+							   int nPresortedKeys, double tuples);
+extern void cost_append(AppendPath *path, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 50bc3b503a..e4ff16d0c4 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -205,6 +206,12 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int	group_keys_reorder_by_pathkeys(List *pathkeys,
+										   List **group_pathkeys,
+										   List **group_clauses,
+										   int num_groupby_pathkeys);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 2f76c473db..2b21c06cc9 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -214,6 +214,11 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset,
 								  EstimationInfo *estinfo);
 
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+											  double input_rows, List **pgset,
+											  EstimationInfo *estinfo,
+											  List **cache_varinfos, int prevNExprs);
+
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
 									   Selectivity *mcv_freq,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index d8271da4d1..205be28b3d 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1234,7 +1234,8 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+   Group Key: $0, $1
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1257,10 +1258,8 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+   ->  Result
+(25 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -2715,6 +2714,241 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Sort
+         Sort Key: p, c, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, d, v, c
+   ->  Sort
+         Sort Key: p, d, v, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, v
+   ->  Incremental Sort
+         Sort Key: p, c, v
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Incremental Sort
+         Sort Key: p, v, c
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, c, v, d
+   ->  Incremental Sort
+         Sort Key: p, c, v, d
+         Presorted Key: p
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Incremental Sort
+         Sort Key: p, v, c, d
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 7fdb685313..b8a0968b06 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1426,7 +1426,7 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 9b8638f286..27684bfce2 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1984,8 +1984,8 @@ USING (name);
 ------+----+----
  bb   | 12 | 13
  cc   | 22 | 23
- dd   |    | 33
  ee   | 42 |   
+ dd   |    | 33
 (4 rows)
 
 -- Cases with non-nullable expressions in subquery results;
@@ -2019,8 +2019,8 @@ NATURAL FULL JOIN
 ------+------+------+------+------
  bb   |   12 |    2 |   13 |    3
  cc   |   22 |    2 |   23 |    3
- dd   |      |      |   33 |    3
  ee   |   42 |    2 |      |     
+ dd   |      |      |   33 |    3
 (4 rows)
 
 SELECT * FROM
@@ -5645,18 +5645,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
 
 -- join removal is not possible here
 explain (costs off)
@@ -7597,44 +7599,39 @@ select * from j1 natural join j2;
 explain (verbose, costs off)
 select * from j1
 inner join (select distinct id from j3) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Unique
+   ->  HashAggregate
          Output: j3.id
-         ->  Sort
+         Group Key: j3.id
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(13 rows)
+(11 rows)
 
 -- ensure group by clause allows the inner to become unique
 explain (verbose, costs off)
 select * from j1
 inner join (select id from j3 group by id) j3 on j1.id = j3.id;
-               QUERY PLAN                
------------------------------------------
+            QUERY PLAN             
+-----------------------------------
  Nested Loop
    Output: j1.id, j3.id
    Inner Unique: true
    Join Filter: (j1.id = j3.id)
-   ->  Group
+   ->  HashAggregate
          Output: j3.id
          Group Key: j3.id
-         ->  Sort
+         ->  Seq Scan on public.j3
                Output: j3.id
-               Sort Key: j3.id
-               ->  Seq Scan on public.j3
-                     Output: j3.id
    ->  Seq Scan on public.j1
          Output: j1.id
-(14 rows)
+(11 rows)
 
 drop table j1;
 drop table j2;
diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out
index 28a6d0ba98..51a4596e9f 100644
--- a/src/test/regress/expected/merge.out
+++ b/src/test/regress/expected/merge.out
@@ -1459,18 +1459,15 @@ WHEN MATCHED AND t.a < 10 THEN
                            explain_merge                            
 --------------------------------------------------------------------
  Merge on ex_mtarget t (actual rows=0 loops=1)
-   ->  Merge Join (actual rows=0 loops=1)
-         Merge Cond: (t.a = s.a)
-         ->  Sort (actual rows=0 loops=1)
-               Sort Key: t.a
-               Sort Method: quicksort  Memory: xxx
+   ->  Hash Join (actual rows=0 loops=1)
+         Hash Cond: (s.a = t.a)
+         ->  Seq Scan on ex_msource s (actual rows=1 loops=1)
+         ->  Hash (actual rows=0 loops=1)
+               Buckets: xxx  Batches: xxx  Memory Usage: xxx
                ->  Seq Scan on ex_mtarget t (actual rows=0 loops=1)
                      Filter: (a < '-1000'::integer)
                      Rows Removed by Filter: 54
-         ->  Sort (never executed)
-               Sort Key: s.a
-               ->  Seq Scan on ex_msource s (never executed)
-(12 rows)
+(9 rows)
 
 DROP TABLE ex_msource, ex_mtarget;
 DROP FUNCTION explain_merge(text);
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index 1b900fddf8..800114dfde 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -948,12 +948,12 @@ SET parallel_setup_cost = 0;
 -- is not partial agg safe.
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
-                                      QUERY PLAN                                      
---------------------------------------------------------------------------------------
- Sort
-   Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Gather
-         Workers Planned: 2
+                                         QUERY PLAN                                         
+--------------------------------------------------------------------------------------------
+ Gather Merge
+   Workers Planned: 2
+   ->  Sort
+         Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
          ->  Parallel Append
                ->  GroupAggregate
                      Group Key: pagg_tab_ml.a
@@ -1380,28 +1380,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
 -- When GROUP BY clause does not match; partial aggregation is performed for each partition.
 EXPLAIN (COSTS OFF)
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Sort
    Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-   ->  Finalize GroupAggregate
+   ->  Finalize HashAggregate
          Group Key: pagg_tab_para.y
          Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-         ->  Gather Merge
+         ->  Gather
                Workers Planned: 2
-               ->  Sort
-                     Sort Key: pagg_tab_para.y
-                     ->  Parallel Append
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_1.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-                           ->  Partial HashAggregate
-                                 Group Key: pagg_tab_para_2.y
-                                 ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+               ->  Parallel Append
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_1.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+                     ->  Partial HashAggregate
+                           Group Key: pagg_tab_para_2.y
+                           ->  Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
 
 SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
  y  |  sum  |         avg         | count 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 6560fe2416..680e82cfc5 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -512,52 +512,41 @@ EXPLAIN (COSTS OFF)
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
   GROUP BY 1, 2 ORDER BY 1, 2;
-                                                   QUERY PLAN                                                    
------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Group
    Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-   ->  Merge Append
+   ->  Sort
          Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-         ->  Group
-               Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
-                           Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1.a, prt1.b
-                                 ->  Seq Scan on prt1_p1 prt1
-                           ->  Sort
-                                 Sort Key: p2.a, p2.b
-                                 ->  Seq Scan on prt2_p1 p2
-         ->  Group
-               Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
-                           Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_1.a, prt1_1.b
-                                 ->  Seq Scan on prt1_p2 prt1_1
-                           ->  Sort
-                                 Sort Key: p2_1.a, p2_1.b
-                                 ->  Seq Scan on prt2_p2 p2_1
-         ->  Group
-               Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-               ->  Sort
-                     Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-                     ->  Merge Full Join
-                           Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
-                           Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-                           ->  Sort
-                                 Sort Key: prt1_2.a, prt1_2.b
-                                 ->  Seq Scan on prt1_p3 prt1_2
-                           ->  Sort
-                                 Sort Key: p2_2.a, p2_2.b
-                                 ->  Seq Scan on prt2_p3 p2_2
-(43 rows)
+         ->  Append
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_1.a, prt1_1.b
+                           ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Sort
+                           Sort Key: p2_1.a, p2_1.b
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_2.a, prt1_2.b
+                           ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Sort
+                           Sort Key: p2_2.a, p2_2.b
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Merge Full Join
+                     Merge Cond: ((prt1_3.a = p2_3.a) AND (prt1_3.b = p2_3.b))
+                     Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+                     ->  Sort
+                           Sort Key: prt1_3.a, prt1_3.b
+                           ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Sort
+                           Sort Key: p2_3.a, p2_3.b
+                           ->  Seq Scan on prt2_p3 p2_3
+(32 rows)
 
 SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
   WHERE a BETWEEN 490 AND 510
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index aae5d51e1c..487fe26906 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -132,7 +133,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(21 rows)
+(22 rows)
 
 -- There are always wait event descriptions for various types.
 select type, count(*) > 0 as ok FROM pg_wait_events
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index e2613d6777..220f0c1629 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1303,24 +1303,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2;
-                        QUERY PLAN                        
-----------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: (q2 IS NOT NULL)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: (q2 IS NOT NULL)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: (q2 IS NOT NULL)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: (q2 IS NOT NULL)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
@@ -1339,24 +1337,22 @@ select distinct q1 from
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2;
-                       QUERY PLAN                       
---------------------------------------------------------
- Unique
-   ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
+                    QUERY PLAN                    
+--------------------------------------------------
+ HashAggregate
+   Group Key: "*SELECT* 1".q1
+   ->  Append
          ->  Subquery Scan on "*SELECT* 1"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i81.q1, i81.q2
-                           ->  Seq Scan on int8_tbl i81
-                                 Filter: ((- q1) = q2)
+               ->  HashAggregate
+                     Group Key: i81.q1, i81.q2
+                     ->  Seq Scan on int8_tbl i81
+                           Filter: ((- q1) = q2)
          ->  Subquery Scan on "*SELECT* 2"
-               ->  Unique
-                     ->  Sort
-                           Sort Key: i82.q1, i82.q2
-                           ->  Seq Scan on int8_tbl i82
-                                 Filter: ((- q1) = q2)
-(15 rows)
+               ->  HashAggregate
+                     Group Key: i82.q1, i82.q2
+                     ->  Seq Scan on int8_tbl i82
+                           Filter: ((- q1) = q2)
+(13 rows)
 
 select distinct q1 from
   (select distinct * from int8_tbl i81
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 75c78be640..5c4e768825 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1172,6 +1172,105 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index ab471bdfff..70f311a6f2 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -208,7 +208,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 
-- 
2.42.0

#98

Alexander Korotkov

aekorotkov@gmail.com

about 2 years ago

In reply to: Andrei Lepikhov (#97)

Re: POC: GROUP BY optimization

Hi!

On Sun, Oct 1, 2023 at 11:45 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

New version of the patch. Fixed minor inconsistencies and rebased onto
current master.

Thank you (and other authors) for working on this subject. Indeed to
GROUP BY clauses are order-agnostic. Reordering them in the most
suitable order could give up significant query planning benefits. I
went through the thread: I see significant work has been already made
on this patch, the code is quite polished.

I'd like to make some notes.

1) As already mentioned, there is clearly a repetitive pattern for the
code following after get_useful_group_keys_orderings() calls. I think
it would be good to extract it into a separate function. Please, do
this as a separate patch coming before the group-by patch. That would
simplify the review.

2) I wonder what planning overhead this patch could introduce? Could
you try to measure the worst case? What if we have a table with a lot
of indexes and a long list of group-by clauses partially patching
every index. This should give us an understanding on whether we need
a separate GUC to control this feature.

3) I see that get_useful_group_keys_orderings() makes 3 calls to
get_cheapest_group_keys_order() function. Each time
get_cheapest_group_keys_order() performs the cost estimate and
reorders the free keys. However, cost estimation implies the system
catalog lookups (that is quite expensive). I wonder if we could
change the algorithm. Could we just sort the group-by keys by cost
once, save this ordering and then just re-use it. So, every time we
need to reorder a group by, we can just pull the required keys to the
top and use saved ordering for the rest. I also wonder if we could do
this once for add_paths_to_grouping_rel() and
create_partial_grouping_paths() calls. So, it probably should be
somewhere in create_ordinary_grouping_paths().

4) I think we can do some optimizations when enable_incremental_sort
== off. Then in get_useful_group_keys_orderings() we should only deal
with input_path fully matching the group-by clause, and try only full
match of group-by output to the required order.

------
Regards,
Alexander Korotkov

#99

Andrei Lepikhov

a.lepikhov@postgrespro.ru

about 2 years ago

In reply to: Alexander Korotkov (#98)

Re: POC: GROUP BY optimization

On 21/12/2023 17:53, Alexander Korotkov wrote:

On Sun, Oct 1, 2023 at 11:45 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

New version of the patch. Fixed minor inconsistencies and rebased onto
current master.

Thank you (and other authors) for working on this subject. Indeed to
GROUP BY clauses are order-agnostic. Reordering them in the most
suitable order could give up significant query planning benefits. I
went through the thread: I see significant work has been already made
on this patch, the code is quite polished.

Maybe, but issues, mentioned in [1]/messages/by-id/60610df1-c32f-ebdf-e58c-7a664431f452@enterprisedb.com, still not resolved. It is the only
reason, why this thread hasn't been active.

I'd like to make some notes.
1) As already mentioned, there is clearly a repetitive pattern for the
code following after get_useful_group_keys_orderings() calls. I think
it would be good to extract it into a separate function. Please, do
this as a separate patch coming before the group-by patch. That would
simplify the review.

Yeah, these parts of code a bit different. I will try to make common
routine.

2) I wonder what planning overhead this patch could introduce? Could
you try to measure the worst case? What if we have a table with a lot
of indexes and a long list of group-by clauses partially patching
every index. This should give us an understanding on whether we need
a separate GUC to control this feature.

Ok> 3) I see that get_useful_group_keys_orderings() makes 3 calls to

get_cheapest_group_keys_order() function. Each time
get_cheapest_group_keys_order() performs the cost estimate and
reorders the free keys. However, cost estimation implies the system
catalog lookups (that is quite expensive). I wonder if we could
change the algorithm. Could we just sort the group-by keys by cost
once, save this ordering and then just re-use it. So, every time we
need to reorder a group by, we can just pull the required keys to the
top and use saved ordering for the rest. I also wonder if we could do
this once for add_paths_to_grouping_rel() and
create_partial_grouping_paths() calls. So, it probably should be
somewhere in create_ordinary_grouping_paths().

Thanks for the idea!> 4) I think we can do some optimizations when
enable_incremental_sort

== off. Then in get_useful_group_keys_orderings() we should only deal
with input_path fully matching the group-by clause, and try only full
match of group-by output to the required order.

Oh, we had designed before the incremental sort was invented. Will see
what we can do here.

[1]: /messages/by-id/60610df1-c32f-ebdf-e58c-7a664431f452@enterprisedb.com
/messages/by-id/60610df1-c32f-ebdf-e58c-7a664431f452@enterprisedb.com

--
regards,
Andrei Lepikhov
Postgres Professional

#100

Alexander Korotkov

aekorotkov@gmail.com

about 2 years ago

In reply to: Andrei Lepikhov (#99)

Re: POC: GROUP BY optimization

On Tue, Dec 26, 2023 at 1:37 PM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

On 21/12/2023 17:53, Alexander Korotkov wrote:

On Sun, Oct 1, 2023 at 11:45 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

New version of the patch. Fixed minor inconsistencies and rebased onto
current master.

Thank you (and other authors) for working on this subject. Indeed to
GROUP BY clauses are order-agnostic. Reordering them in the most
suitable order could give up significant query planning benefits. I
went through the thread: I see significant work has been already made
on this patch, the code is quite polished.

Maybe, but issues, mentioned in [1], still not resolved. It is the only
reason, why this thread hasn't been active.

Yes, this makes sense. I have a couple of points from me on this subject.
1) The patch reorders GROUP BY items not only to make comparison
cheaper but also to match the ordering of input paths and to match the
ORDER BY clause. Thus, even if we leave aside for now sorting GROUP
BY items by their cost, the patch will remain valuable.
2) An accurate estimate of the sorting cost is quite a difficult task.
What if we make a simple rule of thumb that sorting integers and
floats is cheaper than sorting numerics and strings with collation C,
in turn, that is cheaper than sorting collation-aware strings
(probably more groups)? Within the group, we could keep the original
order of items.

------
Regards,
Alexander Korotkov

#101

Tom Lane

tgl@sss.pgh.pa.us

about 2 years ago

In reply to: Alexander Korotkov (#100)

Re: POC: GROUP BY optimization

Alexander Korotkov <aekorotkov@gmail.com> writes:

2) An accurate estimate of the sorting cost is quite a difficult task.

Indeed.

What if we make a simple rule of thumb that sorting integers and
floats is cheaper than sorting numerics and strings with collation C,
in turn, that is cheaper than sorting collation-aware strings
(probably more groups)? Within the group, we could keep the original
order of items.

I think it's a fool's errand to even try to separate different sort
column orderings by cost. We simply do not have sufficiently accurate
cost information. The previous patch in this thread got reverted because
of that (well, also some implementation issues, but mostly that), and
nothing has happened to make me think that another try will fare any
better.

regards, tom lane

#102

Alexander Korotkov

aekorotkov@gmail.com

about 2 years ago

In reply to: Tom Lane (#101)

Re: POC: GROUP BY optimization

On Wed, Dec 27, 2023 at 5:23 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Alexander Korotkov <aekorotkov@gmail.com> writes:

2) An accurate estimate of the sorting cost is quite a difficult task.

Indeed.

What if we make a simple rule of thumb that sorting integers and
floats is cheaper than sorting numerics and strings with collation C,
in turn, that is cheaper than sorting collation-aware strings
(probably more groups)? Within the group, we could keep the original
order of items.

I think it's a fool's errand to even try to separate different sort
column orderings by cost. We simply do not have sufficiently accurate
cost information. The previous patch in this thread got reverted because
of that (well, also some implementation issues, but mostly that), and
nothing has happened to make me think that another try will fare any
better.

If there is a choice of what to compare first: 8-bytes integers or
collation-aware texts possibly toasted, then the answer is pretty
evident for me. For sure, there are cases then this choice is wrong.
But even if all the integers appear to be the same, the penalty isn't
that much.

Besides sorting column orderings by cost, this patch also tries to
match GROUP BY pathkeys to input pathkeys and ORDER BY pathkeys. Do
you think there is a chance for the second part if we leave the cost
part aside?

------
Regards,
Alexander Korotkov

#103

Tom Lane

tgl@sss.pgh.pa.us

about 2 years ago

In reply to: Alexander Korotkov (#102)

Re: POC: GROUP BY optimization

Alexander Korotkov <aekorotkov@gmail.com> writes:

On Wed, Dec 27, 2023 at 5:23 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

I think it's a fool's errand to even try to separate different sort
column orderings by cost.

Besides sorting column orderings by cost, this patch also tries to
match GROUP BY pathkeys to input pathkeys and ORDER BY pathkeys. Do
you think there is a chance for the second part if we leave the cost
part aside?

I think it's definitely reasonable to try to match up available
orderings, because that doesn't really require fine distinctions
of cost: either it matches or it doesn't. Eliminating a sort step
entirely is clearly a win. (Incremental sort complicates this though.
I doubt our cost model for incremental sorts is any good either, so
I am not eager to rely on that more heavily.)

regards, tom lane

#104

Andrei Lepikhov

a.lepikhov@postgrespro.ru

about 2 years ago

In reply to: Alexander Korotkov (#102)

Re: POC: GROUP BY optimization

On 27/12/2023 11:15, Alexander Korotkov wrote:

On Wed, Dec 27, 2023 at 5:23 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Alexander Korotkov <aekorotkov@gmail.com> writes:

2) An accurate estimate of the sorting cost is quite a difficult task.

Indeed.

What if we make a simple rule of thumb that sorting integers and
floats is cheaper than sorting numerics and strings with collation C,
in turn, that is cheaper than sorting collation-aware strings
(probably more groups)? Within the group, we could keep the original
order of items.

I think it's a fool's errand to even try to separate different sort
column orderings by cost. We simply do not have sufficiently accurate
cost information. The previous patch in this thread got reverted because
of that (well, also some implementation issues, but mostly that), and
nothing has happened to make me think that another try will fare any
better.

To be clear. In [1]/messages/by-id/e3602ccb-e643-2e79-ed2c-1175a80533a1@postgrespro.ru, I mentioned we can perform micro-benchmarks and
structure costs of operators. At least for fixed-length operators, it is
relatively easy. So, the main block here is an accurate prediction of
ndistincts for different combinations of columns. Does it make sense to
continue to design the feature in the direction of turning on choosing
between different sort column orderings if we have extended statistics
on the columns?

[1]: /messages/by-id/e3602ccb-e643-2e79-ed2c-1175a80533a1@postgrespro.ru
/messages/by-id/e3602ccb-e643-2e79-ed2c-1175a80533a1@postgrespro.ru

--
regards,
Andrei Lepikhov
Postgres Professional

#105

Tom Lane

tgl@sss.pgh.pa.us

about 2 years ago

In reply to: Andrei Lepikhov (#104)

Re: POC: GROUP BY optimization

Andrei Lepikhov <a.lepikhov@postgrespro.ru> writes:

To be clear. In [1], I mentioned we can perform micro-benchmarks and
structure costs of operators. At least for fixed-length operators, it is
relatively easy.

I repeat what I said: this is a fool's errand. You will not get
trustworthy results even for the cases you measured, let alone
all the rest. I'd go as far as to say I would not believe your
microbenchmarks, because they would only apply for one platform,
compiler, backend build, phase of the moon, etc.

regards, tom lane

#106

Andrei Lepikhov

a.lepikhov@postgrespro.ru

about 2 years ago

In reply to: Tom Lane (#105)

1 attachment(s)

Re: POC: GROUP BY optimization

On 27/12/2023 12:07, Tom Lane wrote:

Andrei Lepikhov <a.lepikhov@postgrespro.ru> writes:

To be clear. In [1], I mentioned we can perform micro-benchmarks and
structure costs of operators. At least for fixed-length operators, it is
relatively easy.

I repeat what I said: this is a fool's errand. You will not get
trustworthy results even for the cases you measured, let alone
all the rest. I'd go as far as to say I would not believe your
microbenchmarks, because they would only apply for one platform,
compiler, backend build, phase of the moon, etc.

Thanks for the explanation.
I removed all cost-related codes. It still needs to be finished; I will
smooth the code further and rewrite regression tests - many of them
without cost-dependent reorderings look silly. Also, remember
Alexander's remarks, which must be implemented, too.
But already here, it works well. Look:

Preliminaries:
CREATE TABLE t(x int, y int, z text, w int);
INSERT INTO t SELECT gs%100,gs%100, 'abc' || gs%10, gs
FROM generate_series(1,10000) AS gs;
CREATE INDEX abc ON t(x,y);
ANALYZE t;
SET enable_hashagg = 'off';

This patch eliminates unneeded Sort operation:
explain SELECT x,y FROM t GROUP BY (x,y);
explain SELECT x,y FROM t GROUP BY (y,x);

Engages incremental sort:
explain SELECT x,y FROM t GROUP BY (x,y,z,w);
explain SELECT x,y FROM t GROUP BY (z,y,w,x);
explain SELECT x,y FROM t GROUP BY (w,z,x,y);
explain SELECT x,y FROM t GROUP BY (w,x,z,y);

Works with subqueries:
explain SELECT x,y
FROM (SELECT * FROM t ORDER BY x,y,w,z) AS q1
GROUP BY (w,x,z,y);
explain SELECT x,y
FROM (SELECT * FROM t ORDER BY x,y,w,z LIMIT 100) AS q1
GROUP BY (w,x,z,y);

But arrangement with an ORDER BY clause doesn't work:

DROP INDEX abc;
explain SELECT x,w,z FROM t GROUP BY (w,x,z) ORDER BY (x,z,w);

I think the reason is that the sort_pathkeys and group_pathkeys are
physically different structures, and we can't just compare pointers here.

--
regards,
Andrei Lepikhov
Postgres Professional

Attachments:

0001-Explore-alternative-orderings-of-group-by-pathkeys-d-20231228.patchtext/plain; charset=UTF-8; name=0001-Explore-alternative-orderings-of-group-by-pathkeys-d-20231228.patchDownload

From 6183555c2c56d7cbbc44f213f6c5bc4cbcaef1ec Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Wed, 13 Sep 2023 11:20:03 +0700
Subject: [PATCH] Explore alternative orderings of group-by pathkeys during
 optimization.

When evaluating a query with a multi-column GROUP BY clause, we can minimize
sort operations or avoid them if we synchronize the order of GROUP BY clauses
with the ORDER BY sort clause or sort order, which comes from the underlying
query tree. Grouping does not imply any ordering, so we can compare
the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg,
we simply compared keys in the order specified in the query. This commit
explores alternative ordering of the keys, trying to find a cheaper one.

The ordering of group keys may interact with other parts of the query, some of
which may not be known while planning the grouping. For example, there may be
an explicit ORDER BY clause or some other ordering-dependent operation higher up
in the query, and using the same ordering may allow using either incremental
sort or even eliminating the sort entirely.

The patch always keeps the ordering specified in the query, assuming the user
might have additional insights.

This introduces a new GUC enable_group_by_reordering so that the optimization
may be disabled if needed.
---
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 224 +++++++++
 src/backend/optimizer/plan/planner.c          | 464 ++++++++++--------
 src/backend/utils/adt/selfuncs.c              |  38 +-
 src/backend/utils/misc/guc_tables.c           |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/paths.h                 |   3 +
 src/test/regress/expected/aggregates.out      | 235 +++++++++
 .../regress/expected/incremental_sort.out     |  20 +-
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/sql/aggregates.sql           |  99 ++++
 src/test/regress/sql/incremental_sort.sql     |   2 +-
 13 files changed, 912 insertions(+), 210 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 7fa502d6e2..07edd4f38e 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy the sortref if it wasn't set yet. That may happen if
+				 * the ec was constructed from WHERE clause, i.e. it doesn't
+				 * have a target reference at all.
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index fdb60aaa8d..38c1c85472 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -28,6 +28,8 @@
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool		enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -350,6 +352,175 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+static int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses,
+							   int num_groupby_pathkeys)
+{
+	List	   *new_group_pathkeys = NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append it to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find the first pathkey without a matching GROUP BY key, the
+	 * rest of the pathkeys are useless and can't be used to evaluate the
+	 * grouping, so we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			   *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	   *sgc;
+
+		if (foreach_current_index(lc) >= num_groupby_pathkeys ||
+			!list_member_ptr(*group_pathkeys, pathkey))
+			/* */
+			break;
+
+		/* abort on first mismatch */
+		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+											*group_clauses);
+		if (!sgc)
+			/* The grouping clause is not cover this pathkey */
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to minimize the sort cost
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reordered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible), with
+ *   the tail reordered to minimize the sort cost
+ *
+ * There are other potentially interesting orderings (e.g. it might be best to
+ * match the first ORDER BY key, order the remaining keys differently and then
+ * rely on the incremental sort to fix this), but we ignore those for now. To
+ * make this work we'd have to pretty much generate all possible permutations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+
+	List	   *pathkeys = root->group_pathkeys;
+	List	   *clauses = root->processed_groupClause;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+	infos = lappend(infos, info);
+
+	/*
+	 * Should we try generating alternative orderings of the group keys? If
+	 * not, we produce only the order specified in the query, i.e. the
+	 * optimization is effectively disabled.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to
+	 * match as much of the ordering as possible - we get this sort for free
+	 * (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefiting from incremental sort
+	 * as much as possible.
+	 */
+	if (path_pathkeys)
+	{
+		group_keys_reorder_by_pathkeys(path_pathkeys, &pathkeys, &clauses,
+									   root->num_groupby_pathkeys);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause, but only if set debug_group_by_match_order_by).
+	 */
+	if (root->sort_pathkeys)
+	{
+		group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys, &clauses,
+									   root->num_groupby_pathkeys);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1939,6 +2110,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the ordering. The
+ * ordering may not be "complete" and may require incremental sort, but that's
+ * fine. So we simply count prefix pathkeys with a matching group key, and
+ * stop once we find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordering not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell   *key;
+	int			n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1953,6 +2172,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1988,6 +2210,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 6f45efde21..fc774bfce6 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2805,8 +2805,9 @@ remove_useless_groupby_columns(PlannerInfo *root)
  *
  * In principle it might be interesting to consider other orderings of the
  * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost.  We don't
- * bother with that, though.  Hashed grouping will frequently win anyway.
+ * possible plans (eg an indexscan) and thereby reduce cost.  However, we
+ * don't yet have sufficient information to do that here, so that's left until
+ * later in planning.  See get_useful_group_keys_orderings().
  *
  * Note: we need no comparable processing of the distinctClause because
  * the parser already enforced that that matches ORDER BY.
@@ -6839,90 +6840,108 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
 
-			if (!is_sorted)
+			Assert(list_length(pathkey_orderings) > 0);
+
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+				bool			is_sorted;
+				int				presorted_keys;
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (!is_sorted)
+				{
+					/*
+					 * Try at least sorting the cheapest path and also try
+					 * incrementally sorting any path which is partially sorted
+					 * already (no need to deal with paths which have presorted
+					 * keys when incremental sort is disabled unless it's the
+					 * cheapest input path).
+					 */
+					if (path != cheapest_path &&
+						(presorted_keys == 0 || !enable_incremental_sort))
+						continue;
+
+					/*
+					 * We've no need to consider both a sort and incremental sort.
+					 * We'll just do a sort if there are no presorted keys and an
+					 * incremental sort when there are presorted keys.
+					 */
+					if (presorted_keys == 0 || !enable_incremental_sort)
+						path = (Path *) create_sort_path(root,
+														 grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					else
+						path = (Path *) create_incremental_sort_path(root,
+																	 grouped_rel,
+																	 path,
+																	 info->pathkeys,
+																	 presorted_keys,
+																	 -1.0);
+				}
+
+				/* Now decide what to stick atop it */
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash,
+												gd, agg_costs, dNumGroups);
+				}
+				else if (parse->hasAggs)
+				{
+					/*
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_agg_path(root,
 										 grouped_rel,
 										 path,
 										 grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_SIMPLE,
-										 root->processed_groupClause,
+										 info->clauses,
 										 havingQual,
 										 agg_costs,
 										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				}
+				else if (parse->groupClause)
+				{
+					/*
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_group_path(root,
 										   grouped_rel,
 										   path,
-										   root->processed_groupClause,
+										   info->clauses,
 										   havingQual,
 										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
+				}
+				else
+				{
+					/* Other cases should have been handled above */
+					Assert(false);
+				}
 			}
 		}
 
@@ -6934,69 +6953,84 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
-				bool		is_sorted;
-				int			presorted_keys;
+				List	   *pathkey_orderings = NIL;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys);
 
-				if (!is_sorted)
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach(lc2, pathkey_orderings)
 				{
-					/*
-					 * Try at least sorting the cheapest path and also try
-					 * incrementally sorting any path which is partially
-					 * sorted already (no need to deal with paths which have
-					 * presorted keys when incremental sort is disabled unless
-					 * it's the cheapest input path).
-					 */
-					if (path != partially_grouped_rel->cheapest_total_path &&
-						(presorted_keys == 0 || !enable_incremental_sort))
-						continue;
+					bool			is_sorted;
+					int				presorted_keys;
+					PathKeyInfo    *info = (PathKeyInfo *) lfirst(lc2);
 
-					/*
-					 * We've no need to consider both a sort and incremental
-					 * sort.  We'll just do a sort if there are no pre-sorted
-					 * keys and an incremental sort when there are presorted
-					 * keys.
-					 */
-					if (presorted_keys == 0 || !enable_incremental_sort)
-						path = (Path *) create_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 -1.0);
-					else
-						path = (Path *) create_incremental_sort_path(root,
-																	 grouped_rel,
-																	 path,
-																	 root->group_pathkeys,
-																	 presorted_keys,
-																	 -1.0);
-				}
+					is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+															path->pathkeys,
+															&presorted_keys);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
+					if (!is_sorted)
+					{
+						/*
+						 * Try at least sorting the cheapest path and also try
+						 * incrementally sorting any path which is partially
+						 * sorted already (no need to deal with paths which have
+						 * presorted keys when incremental sort is disabled unless
+						 * it's the cheapest input path).
+						 */
+						if (path != partially_grouped_rel->cheapest_total_path &&
+							(presorted_keys == 0 || !enable_incremental_sort))
+							continue;
+
+						/*
+						 * We've no need to consider both a sort and incremental
+						 * sort.  We'll just do a sort if there are no pre-sorted
+						 * keys and an incremental sort when there are presorted
+						 * keys.
+						 */
+						if (presorted_keys == 0 || !enable_incremental_sort)
+							path = (Path *) create_sort_path(root,
+															 grouped_rel,
+															 path,
+															 info->pathkeys,
+															 -1.0);
+						else
+							path = (Path *) create_incremental_sort_path(root,
+																		 grouped_rel,
+																		 path,
+																		 info->pathkeys,
+																		 presorted_keys,
+																		 -1.0);
+					}
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 root->processed_groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
+					else
+						add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   root->processed_groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 
+				}
 			}
 		}
 	}
@@ -7199,66 +7233,85 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-			if (!is_sorted)
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_total_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+				bool			is_sorted;
+				int				presorted_keys;
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			if (parse->hasAggs)
-				add_path(partially_grouped_rel, (Path *)
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+				if (!is_sorted)
+				{
+					/*
+					 * Try at least sorting the cheapest path and also try
+					 * incrementally sorting any path which is partially sorted
+					 * already (no need to deal with paths which have presorted
+					 * keys when incremental sort is disabled unless it's the
+					 * cheapest input path).
+					 */
+					if (path != cheapest_total_path &&
+						(presorted_keys == 0 || !enable_incremental_sort))
+						continue;
+
+					/*
+					 * We've no need to consider both a sort and incremental sort.
+					 * We'll just do a sort if there are no presorted keys and an
+					 * incremental sort when there are presorted keys.
+					 */
+					if (presorted_keys == 0 || !enable_incremental_sort)
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					else
+						path = (Path *) create_incremental_sort_path(root,
+																	 partially_grouped_rel,
+																	 path,
+																	 info->pathkeys,
+																	 presorted_keys,
+																	 -1.0);
+				}
+
+				if (parse->hasAggs)
+					add_path(partially_grouped_rel, (Path *)
 						 create_agg_path(root,
 										 partially_grouped_rel,
 										 path,
 										 partially_grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_INITIAL_SERIAL,
-										 root->processed_groupClause,
+										 info->clauses,
 										 NIL,
 										 agg_partial_costs,
 										 dNumPartialGroups));
-			else
-				add_path(partially_grouped_rel, (Path *)
-						 create_group_path(root,
-										   partially_grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   NIL,
-										   dNumPartialGroups));
+				else
+					add_path(partially_grouped_rel, (Path *)
+							 create_group_path(root,
+											   partially_grouped_rel,
+											   path,
+											   info->clauses,
+											   NIL,
+											   dNumPartialGroups));
+			}
 		}
 	}
 
@@ -7267,67 +7320,86 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
+
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (!is_sorted)
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_partial_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+				bool		is_sorted;
+				int			presorted_keys;
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
+				is_sorted = pathkeys_count_contained_in(info->pathkeys,
+														path->pathkeys,
+														&presorted_keys);
+
+				if (!is_sorted)
+				{
+					/*
+					 * Try at least sorting the cheapest path and also try
+					 * incrementally sorting any path which is partially sorted
+					 * already (no need to deal with paths which have presorted
+					 * keys when incremental sort is disabled unless it's the
+					 * cheapest input path).
+					 */
+					if (path != cheapest_partial_path &&
+						(presorted_keys == 0 || !enable_incremental_sort))
+						continue;
+
+					/*
+					 * We've no need to consider both a sort and incremental sort.
+					 * We'll just do a sort if there are no presorted keys and an
+					 * incremental sort when there are presorted keys.
+					 */
+					if (presorted_keys == 0 || !enable_incremental_sort)
+						path = (Path *) create_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 info->pathkeys,
+														 -1.0);
+					else
+						path = (Path *) create_incremental_sort_path(root,
+																	 partially_grouped_rel,
+																	 path,
+																	 info->pathkeys,
+																	 presorted_keys,
+																	 -1.0);
+				}
+
+				if (parse->hasAggs)
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_agg_path(root,
 												 partially_grouped_rel,
 												 path,
 												 partially_grouped_rel->reltarget,
 												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 												 AGGSPLIT_INITIAL_SERIAL,
-												 root->processed_groupClause,
+												 info->clauses,
 												 NIL,
 												 agg_partial_costs,
 												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
+				else
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_group_path(root,
 												   partially_grouped_rel,
 												   path,
-												   root->processed_groupClause,
+												   info->clauses,
 												   NIL,
 												   dNumPartialPartialGroups));
+			}
 		}
 	}
 
@@ -7441,6 +7513,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	 * We can also skip the entire loop when we only have a single-item
 	 * group_pathkeys because then we can't possibly have a presorted prefix
 	 * of the list without having the list be fully sorted.
+	 *
+	 * XXX Shouldn't this also consider the group-key-reordering?
 	 */
 	if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
 		return;
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index e11d022827..661c5554f0 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3386,11 +3386,28 @@ double
 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 					List **pgset, EstimationInfo *estinfo)
 {
-	List	   *varinfos = NIL;
+	return estimate_num_groups_incremental(root, groupExprs,
+										   input_rows, pgset, estinfo,
+										   NULL, 0);
+}
+
+/*
+ * estimate_num_groups_incremental
+ *		An estimate_num_groups variant, optimized for cases that are adding the
+ *		expressions incrementally (e.g. one by one).
+ */
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+								double input_rows,
+								List **pgset, EstimationInfo *estinfo,
+								List **cache_varinfos, int prevNExprs)
+{
+	List	   *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
 	double		srf_multiplier = 1.0;
 	double		numdistinct;
 	ListCell   *l;
-	int			i;
+	int			i,
+				j;
 
 	/* Zero the estinfo output parameter, if non-NULL */
 	if (estinfo != NULL)
@@ -3421,7 +3438,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	 */
 	numdistinct = 1.0;
 
-	i = 0;
+	i = j = 0;
 	foreach(l, groupExprs)
 	{
 		Node	   *groupexpr = (Node *) lfirst(l);
@@ -3430,6 +3447,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* was done on previous call */
+		if (cache_varinfos && j++ < prevNExprs)
+		{
+			if (pgset)
+				i++;			/* to keep in sync with lines below */
+			continue;
+		}
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -3499,7 +3524,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		if (varshere == NIL)
 		{
 			if (contain_volatile_functions(groupexpr))
+			{
+				if (cache_varinfos)
+					*cache_varinfos = varinfos;
 				return input_rows;
+			}
 			continue;
 		}
 
@@ -3516,6 +3545,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		}
 	}
 
+	if (cache_varinfos)
+		*cache_varinfos = varinfos;
+
 	/*
 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
 	 * list.
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 9f59440526..c0db58757a 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1039,6 +1039,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables reordering of GROUP BY keys."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b2809c711a..dc82fceb05 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -399,6 +399,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_group_by_reordering = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index ed85dc7414..6e639c0494 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1456,6 +1456,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 9e7408c7ec..cbc17a082f 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -204,6 +205,8 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index d8271da4d1..268f38fa42 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2715,6 +2715,241 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+VACUUM btg;
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Sort
+         Sort Key: p, v
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p
+   ->  Sort
+         Sort Key: v, p
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: v, p, c
+   ->  Sort
+         Sort Key: v, p, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: v, p, d, c
+   ->  Sort
+         Sort Key: v, p, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+          QUERY PLAN          
+------------------------------
+ GroupAggregate
+   Group Key: p, v, d, c
+   ->  Sort
+         Sort Key: p, v, d, c
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, d, e
+   ->  Sort
+         Sort Key: p, d, e
+         ->  Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+         QUERY PLAN          
+-----------------------------
+ GroupAggregate
+   Group Key: p, e, d
+   ->  Sort
+         Sort Key: p, e, d
+         ->  Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+                   QUERY PLAN                   
+------------------------------------------------
+ GroupAggregate
+   Group Key: p, v
+   ->  Index Only Scan using btg_p_v_idx on btg
+(3 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Incremental Sort
+         Sort Key: p, v, c
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c
+   ->  Incremental Sort
+         Sort Key: p, v, c
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Incremental Sort
+         Sort Key: p, v, c, d
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
+   Group Key: p, v, c, d
+   ->  Incremental Sort
+         Sort Key: p, v, c, d
+         Presorted Key: p, v
+         ->  Index Scan using btg_p_v_idx on btg
+(6 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 7fdb685313..35d697ec29 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1426,24 +1426,26 @@ set parallel_setup_cost = 0;
 set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 set enable_incremental_sort = off;
 explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1;
-                      QUERY PLAN                      
-------------------------------------------------------
+                         QUERY PLAN                         
+------------------------------------------------------------
  Limit
    ->  Sort
          Sort Key: a, b, (sum(c))
-         ->  Finalize HashAggregate
+         ->  Finalize GroupAggregate
                Group Key: a, b
-               ->  Gather
+               ->  Gather Merge
                      Workers Planned: 2
-                     ->  Partial HashAggregate
-                           Group Key: a, b
-                           ->  Parallel Seq Scan on t
-(10 rows)
+                     ->  Sort
+                           Sort Key: a, b
+                           ->  Partial HashAggregate
+                                 Group Key: a, b
+                                 ->  Parallel Seq Scan on t
+(12 rows)
 
 set enable_incremental_sort = on;
 explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1;
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 271313ebf8..9be7aca2b8 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -133,7 +134,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(22 rows)
+(23 rows)
 
 -- There are always wait event descriptions for various types.
 select type, count(*) > 0 as ok FROM pg_wait_events
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 75c78be640..5c4e768825 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1172,6 +1172,105 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+SELECT
+	i AS id,
+	i/2 AS p,
+	format('%60s', i%2) AS v,
+	i/4 AS c,
+	i/8 AS d,
+	(random() * (10000/8))::int as e --the same as d but no correlation with p
+	INTO btg
+FROM
+	generate_series(1, 10000) i;
+
+VACUUM btg;
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+VACUUM btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index ab471bdfff..70f311a6f2 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -208,7 +208,7 @@ set parallel_tuple_cost = 0;
 set max_parallel_workers_per_gather = 2;
 
 create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
 create index on t (a);
 analyze t;
 
-- 
2.43.0

#107

Alexander Korotkov

aekorotkov@gmail.com

about 2 years ago

In reply to: Andrei Lepikhov (#106)

Re: POC: GROUP BY optimization

On Thu, Dec 28, 2023 at 10:22 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

But arrangement with an ORDER BY clause doesn't work:

DROP INDEX abc;
explain SELECT x,w,z FROM t GROUP BY (w,x,z) ORDER BY (x,z,w);

I think the reason is that the sort_pathkeys and group_pathkeys are
physically different structures, and we can't just compare pointers here.

I haven't yet looked into the code. But this looks strange to me.
Somehow, optimizer currently matches index pathkeys to ORDER BY
pathkeys. If GROUP BY pathkeys could be matched to index pathkeys,
then it should be possible to match them to ORDER BY pathkeys too.

------
Regards,
Alexander Korotkov

#108

Andrei Lepikhov

a.lepikhov@postgrespro.ru

about 2 years ago

In reply to: Alexander Korotkov (#107)

Re: POC: GROUP BY optimization

On 28/12/2023 18:29, Alexander Korotkov wrote:

On Thu, Dec 28, 2023 at 10:22 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

But arrangement with an ORDER BY clause doesn't work:

DROP INDEX abc;
explain SELECT x,w,z FROM t GROUP BY (w,x,z) ORDER BY (x,z,w);

I think the reason is that the sort_pathkeys and group_pathkeys are
physically different structures, and we can't just compare pointers here.

I haven't yet looked into the code. But this looks strange to me.
Somehow, optimizer currently matches index pathkeys to ORDER BY
pathkeys. If GROUP BY pathkeys could be matched to index pathkeys,
then it should be possible to match them to ORDER BY pathkeys too.

Oh, I found the mistake: I got used to using GROUP BY and ORDER BY on
many columns with round brackets. In the case of the grouping list, it
doesn't change anything. But ordering treats it as a WholeRowVar and
breaks group-by arrangement. Look:
explain (COSTS OFF) SELECT relname,reltuples FROM pg_class
GROUP BY relname,reltuples ORDER BY reltuples,relname;

Group
Group Key: reltuples, relname
-> Sort
Sort Key: reltuples, relname
-> Seq Scan on pg_class
But:
explain (COSTS OFF) SELECT relname,reltuples FROM pg_class
GROUP BY relname,reltuples ORDER BY (reltuples,relname);

Sort
Sort Key: (ROW(reltuples, relname))
-> Group
Group Key: relname, reltuples
-> Sort
Sort Key: relname, reltuples
-> Seq Scan on pg_class

So, let's continue to work.

--
regards,
Andrei Lepikhov
Postgres Professional

#109

Andrei Lepikhov

a.lepikhov@postgrespro.ru

about 2 years ago

In reply to: Alexander Korotkov (#98)

2 attachment(s)

Re: POC: GROUP BY optimization

Here is a new version of GROUP-BY optimization without sort model.

On 21/12/2023 17:53, Alexander Korotkov wrote:

I'd like to make some notes.

1) As already mentioned, there is clearly a repetitive pattern for the
code following after get_useful_group_keys_orderings() calls. I think
it would be good to extract it into a separate function. Please, do
this as a separate patch coming before the group-by patch. That would
simplify the review.

Done. See patch 0001-*. Unfortunately, extraction of whole cycle isn't
practical, because it blows out the interface of the routine.

2) I wonder what planning overhead this patch could introduce? Could
you try to measure the worst case? What if we have a table with a lot
of indexes and a long list of group-by clauses partially patching
every index. This should give us an understanding on whether we need
a separate GUC to control this feature.

In current implementation I don't anticipate any significant overhead.
GUC is needed here to allow users adhere their own ordering and to
disable feature in the case of problems.

4) I think we can do some optimizations when enable_incremental_sort
== off. Then in get_useful_group_keys_orderings() we should only deal
with input_path fully matching the group-by clause, and try only full
match of group-by output to the required order.

Hm, is it really make sense in current implementation?

--
regards,
Andrei Lepikhov
Postgres Professional

Attachments:

0001-Generalize-common-code-of-adding-sort-before-generat.patchtext/plain; charset=UTF-8; name=0001-Generalize-common-code-of-adding-sort-before-generat.patchDownload

From ea068e221a754a463142575f6e0360d3118475f8 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 9 Jan 2024 12:00:27 +0700
Subject: [PATCH 1/2] Generalize common code of adding sort before generation
 of grouping paths.

---
 src/backend/optimizer/plan/planner.c | 209 ++++++++-------------------
 1 file changed, 61 insertions(+), 148 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 667723b675..fcf647940e 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6809,6 +6809,52 @@ done:
 	return parallel_workers;
 }
 
+static Path *
+try_presorting(PlannerInfo *root, RelOptInfo *grouped_rel,
+					Path *path, Path *cheapest_path)
+{
+	bool		is_sorted;
+	int			presorted_keys;
+
+	is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+											path->pathkeys,
+											&presorted_keys);
+
+	if (!is_sorted)
+	{
+		/*
+		 * Try at least sorting the cheapest path and also try
+		 * incrementally sorting any path which is partially sorted
+		 * already (no need to deal with paths which have presorted
+		 * keys when incremental sort is disabled unless it's the
+		 * cheapest input path).
+		 */
+		if (path != cheapest_path &&
+			(presorted_keys == 0 || !enable_incremental_sort))
+			return NULL;
+
+		/*
+		 * We've no need to consider both a sort and incremental sort.
+		 * We'll just do a sort if there are no presorted keys and an
+		 * incremental sort when there are presorted keys.
+		 */
+		if (presorted_keys == 0 || !enable_incremental_sort)
+			path = (Path *) create_sort_path(root,
+											 grouped_rel,
+											 path,
+											 root->group_pathkeys,
+											 -1.0);
+		else
+			path = (Path *) create_incremental_sort_path(root,
+														 grouped_rel,
+														 path,
+														 root->group_pathkeys,
+														 presorted_keys,
+														 -1.0);
+	}
+	return path;
+}
+
 /*
  * add_paths_to_grouping_rel
  *
@@ -6840,45 +6886,11 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		foreach(lc, input_rel->pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
-
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
 
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = try_presorting(root, grouped_rel, path, cheapest_path);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			/* Now decide what to stick atop it */
 			if (parse->groupingSets)
@@ -6935,46 +6947,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
-				bool		is_sorted;
-				int			presorted_keys;
-
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
 
-				if (!is_sorted)
-				{
-					/*
-					 * Try at least sorting the cheapest path and also try
-					 * incrementally sorting any path which is partially
-					 * sorted already (no need to deal with paths which have
-					 * presorted keys when incremental sort is disabled unless
-					 * it's the cheapest input path).
-					 */
-					if (path != partially_grouped_rel->cheapest_total_path &&
-						(presorted_keys == 0 || !enable_incremental_sort))
-						continue;
+				path = try_presorting(root, grouped_rel, path,
+									  partially_grouped_rel->cheapest_total_path);
 
-					/*
-					 * We've no need to consider both a sort and incremental
-					 * sort.  We'll just do a sort if there are no pre-sorted
-					 * keys and an incremental sort when there are presorted
-					 * keys.
-					 */
-					if (presorted_keys == 0 || !enable_incremental_sort)
-						path = (Path *) create_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 -1.0);
-					else
-						path = (Path *) create_incremental_sort_path(root,
-																	 grouped_rel,
-																	 path,
-																	 root->group_pathkeys,
-																	 presorted_keys,
-																	 -1.0);
-				}
+				if (path == NULL)
+					continue;
 
 				if (parse->hasAggs)
 					add_path(grouped_rel, (Path *)
@@ -7200,44 +7178,12 @@ create_partial_grouping_paths(PlannerInfo *root,
 		foreach(lc, input_rel->pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_total_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = try_presorting(root, partially_grouped_rel, path,
+								  cheapest_total_path);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			if (parse->hasAggs)
 				add_path(partially_grouped_rel, (Path *)
@@ -7268,45 +7214,12 @@ create_partial_grouping_paths(PlannerInfo *root,
 		foreach(lc, input_rel->partial_pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_partial_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = try_presorting(root, partially_grouped_rel, path,
+								  cheapest_partial_path);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			if (parse->hasAggs)
 				add_partial_path(partially_grouped_rel, (Path *)
-- 
2.43.0

0002-Explore-alternative-orderings-of-group-by-pathkeys-d.patchtext/plain; charset=UTF-8; name=0002-Explore-alternative-orderings-of-group-by-pathkeys-d.patchDownload

From 886227bd33a368df1b564121506842be39d31f3d Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 9 Jan 2024 12:32:15 +0700
Subject: [PATCH 2/2] Explore alternative orderings of group-by pathkeys during
 optimization.

When evaluating a query with a multi-column GROUP BY clause, we can minimize
sort operations or avoid them if we synchronize the order of GROUP BY clauses
with the ORDER BY sort clause or sort order, which comes from the underlying
query tree. Grouping does not imply any ordering, so we can compare
the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg,
we simply compared keys in the order specified in the query. This commit
explores alternative ordering of the keys, trying to find a cheaper one.

The ordering of group keys may interact with other parts of the query, some of
which may not be known while planning the grouping. For example, there may be
an explicit ORDER BY clause or some other ordering-dependent operation higher up
in the query, and using the same ordering may allow using either incremental
sort or even eliminating the sort entirely.

The patch always keeps the ordering specified in the query, assuming the user
might have additional insights.

This introduces a new GUC enable_group_by_reordering so that the optimization
may be disabled if needed.
---
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 222 ++++++++++++++++++
 src/backend/optimizer/plan/planner.c          | 214 +++++++++++------
 src/backend/utils/misc/guc_tables.c           |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/paths.h                 |   3 +
 src/test/regress/expected/aggregates.out      | 132 +++++++++++
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/sql/aggregates.sql           |  47 ++++
 10 files changed, 586 insertions(+), 69 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index e86dfeaecd..7dd14d0a43 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy the sortref if it wasn't set yet. That may happen if
+				 * the ec was constructed from WHERE clause, i.e. it doesn't
+				 * have a target reference at all.
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ca94a31f71..8bc4eb9f03 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -28,6 +28,8 @@
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool		enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -350,6 +352,173 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+static int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses,
+							   int num_groupby_pathkeys)
+{
+	List	   *new_group_pathkeys = NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append it to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find the first pathkey without a matching GROUP BY key, the
+	 * rest of the pathkeys are useless and can't be used to evaluate the
+	 * grouping, so we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			   *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	   *sgc;
+
+		if (foreach_current_index(lc) >= num_groupby_pathkeys ||
+			!list_member_ptr(*group_pathkeys, pathkey))
+			/* */
+			break;
+
+		/* abort on first mismatch */
+		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+											*group_clauses);
+		if (!sgc)
+			/* The grouping clause is not cover this pathkey */
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reordered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible)
+ *
+ * There are other potentially interesting orderings. Just to memorize:
+ * 1. It might be best to match the first ORDER BY key, order the remaining keys
+ * differently and then rely on the incremental sort to fix this).
+ * 2. Place integer fixed-length columns in the front of text/bytea columns
+ * 3. Rearrange columns according to ndistinct estimations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+
+	List	   *pathkeys = root->group_pathkeys;
+	List	   *clauses = root->processed_groupClause;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+	infos = lappend(infos, info);
+
+	/*
+	 * Should we try generating alternative orderings of the group keys? If
+	 * not, we produce only the order specified in the query, i.e. the
+	 * optimization is effectively disabled.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to
+	 * match as much of the ordering as possible - we get this sort for free
+	 * (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefiting from incremental sort
+	 * as much as possible.
+	 */
+	if (path_pathkeys)
+	{
+		group_keys_reorder_by_pathkeys(path_pathkeys, &pathkeys, &clauses,
+									   root->num_groupby_pathkeys);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause).
+	 */
+	if (root->sort_pathkeys)
+	{
+		group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys, &clauses,
+									   root->num_groupby_pathkeys);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1939,6 +2108,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the ordering. The
+ * ordering may not be "complete" and may require incremental sort, but that's
+ * fine. So we simply count prefix pathkeys with a matching group key, and
+ * stop once we find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordering not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell   *key;
+	int			n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1953,6 +2170,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1988,6 +2208,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index fcf647940e..90b0f5b6c5 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2805,8 +2805,9 @@ remove_useless_groupby_columns(PlannerInfo *root)
  *
  * In principle it might be interesting to consider other orderings of the
  * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost.  We don't
- * bother with that, though.  Hashed grouping will frequently win anyway.
+ * possible plans (eg an indexscan) and thereby reduce cost.  However, we
+ * don't yet have sufficient information to do that here, so that's left until
+ * later in planning.  See get_useful_group_keys_orderings().
  *
  * Note: we need no comparable processing of the distinctClause because
  * the parser already enforced that that matches ORDER BY.
@@ -6811,12 +6812,12 @@ done:
 
 static Path *
 try_presorting(PlannerInfo *root, RelOptInfo *grouped_rel,
-					Path *path, Path *cheapest_path)
+					Path *path, Path *cheapest_path, PathKeyInfo *info)
 {
 	bool		is_sorted;
 	int			presorted_keys;
 
-	is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+	is_sorted = pathkeys_count_contained_in(info->pathkeys,
 											path->pathkeys,
 											&presorted_keys);
 
@@ -6842,13 +6843,13 @@ try_presorting(PlannerInfo *root, RelOptInfo *grouped_rel,
 			path = (Path *) create_sort_path(root,
 											 grouped_rel,
 											 path,
-											 root->group_pathkeys,
+											 info->pathkeys,
 											 -1.0);
 		else
 			path = (Path *) create_incremental_sort_path(root,
 														 grouped_rel,
 														 path,
-														 root->group_pathkeys,
+														 info->pathkeys,
 														 presorted_keys,
 														 -1.0);
 	}
@@ -6885,56 +6886,74 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
+
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
+
+			Assert(list_length(pathkey_orderings) > 0);
+
+			foreach(lc2, pathkey_orderings)
+ 			{
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			path = try_presorting(root, grouped_rel, path, cheapest_path);
+				path = try_presorting(root, grouped_rel, path, cheapest_path, info);
 
-			if (path == NULL)
+				if (path == NULL)
 				continue;
 
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				/* Now decide what to stick atop it */
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash,
+												gd, agg_costs, dNumGroups);
+				}
+				else if (parse->hasAggs)
+				{
+					/*
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_agg_path(root,
 										 grouped_rel,
 										 path,
 										 grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_SIMPLE,
-										 root->processed_groupClause,
+										 info->clauses,
 										 havingQual,
 										 agg_costs,
 										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				}
+				else if (parse->groupClause)
+				{
+					/*
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_group_path(root,
 										   grouped_rel,
 										   path,
-										   root->processed_groupClause,
+										   info->clauses,
 										   havingQual,
 										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
+				}
+				else
+				{
+					/* Other cases should have been handled above */
+					Assert(false);
+				}
 			}
 		}
 
@@ -6946,35 +6965,55 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
+				Path	   *path_save = path;
+				List	   *pathkey_orderings = NIL;
 
-				path = try_presorting(root, grouped_rel, path,
-									  partially_grouped_rel->cheapest_total_path);
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys);
 
-				if (path == NULL)
-					continue;
+				Assert(list_length(pathkey_orderings) > 0);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
+				/* process all potentially interesting grouping reorderings */
+				foreach(lc2, pathkey_orderings)
+ 				{
+					PathKeyInfo    *info = (PathKeyInfo *) lfirst(lc2);
+
+					/* restore the path (we replace it in the loop) */
+					path = path_save;
+
+					path = try_presorting(root, grouped_rel, path,
+										  partially_grouped_rel->cheapest_total_path,
+										  info);
+
+					if (path == NULL)
+						continue;
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 root->processed_groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
+					else
+						add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   root->processed_groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 
+				}
 			}
 		}
 	}
@@ -7177,34 +7216,53 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = try_presorting(root, partially_grouped_rel, path,
-								  cheapest_total_path);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+ 			{
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				path = try_presorting(root, partially_grouped_rel, path,
+									  cheapest_total_path, info);
 
-			if (parse->hasAggs)
-				add_path(partially_grouped_rel, (Path *)
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_path(partially_grouped_rel, (Path *)
 						 create_agg_path(root,
 										 partially_grouped_rel,
 										 path,
 										 partially_grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_INITIAL_SERIAL,
-										 root->processed_groupClause,
+										 info->clauses,
 										 NIL,
 										 agg_partial_costs,
 										 dNumPartialGroups));
-			else
-				add_path(partially_grouped_rel, (Path *)
+				else
+					add_path(partially_grouped_rel, (Path *)
 						 create_group_path(root,
 										   partially_grouped_rel,
 										   path,
-										   root->processed_groupClause,
+										   info->clauses,
 										   NIL,
 										   dNumPartialGroups));
+			}
 		}
 	}
 
@@ -7213,34 +7271,54 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = try_presorting(root, partially_grouped_rel, path,
-								  cheapest_partial_path);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+ 			{
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
+				path = try_presorting(root, partially_grouped_rel, path,
+									  cheapest_partial_path, info);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_agg_path(root,
 												 partially_grouped_rel,
 												 path,
 												 partially_grouped_rel->reltarget,
 												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 												 AGGSPLIT_INITIAL_SERIAL,
-												 root->processed_groupClause,
+												 info->clauses,
 												 NIL,
 												 agg_partial_costs,
 												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
+				else
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_group_path(root,
 												   partially_grouped_rel,
 												   path,
-												   root->processed_groupClause,
+												   info->clauses,
 												   NIL,
 												   dNumPartialPartialGroups));
+			}
 		}
 	}
 
@@ -7354,6 +7432,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	 * We can also skip the entire loop when we only have a single-item
 	 * group_pathkeys because then we can't possibly have a presorted prefix
 	 * of the list without having the list be fully sorted.
+	 *
+	 * XXX Shouldn't this also consider the group-key-reordering?
 	 */
 	if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
 		return;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index e53ebc6dc2..7fe58518d7 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1050,6 +1050,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables reordering of GROUP BY keys."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b2809c711a..dc82fceb05 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -399,6 +399,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_group_by_reordering = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index b9713ec9aa..0a251984c1 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1456,6 +1456,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index efd4abc28f..e80156c49f 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -204,6 +205,8 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f635c5a1af..423c8ec3b6 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2728,6 +2728,138 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+                  QUERY PLAN                  
+----------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Incremental Sort
+         Sort Key: btg.x, btg.y, btg.w, btg.z
+         Presorted Key: btg.x, btg.y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+                     QUERY PLAN                     
+----------------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Limit
+         ->  Incremental Sort
+               Sort Key: btg.x, btg.y, btg.w, btg.z
+               Presorted Key: btg.x, btg.y
+               ->  Index Scan using abc on btg
+(7 rows)
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+          QUERY PLAN          
+------------------------------
+ Group
+   Group Key: y, x, z, w
+   ->  Sort
+         Sort Key: y, x, z, w
+         ->  Seq Scan on btg
+(5 rows)
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Sort
+   Sort Key: ((x * x)), z
+   ->  Group
+         Group Key: x, y, w, z
+         ->  Incremental Sort
+               Sort Key: x, y, w, z
+               Presorted Key: x, y
+               ->  Index Scan using abc on btg
+(8 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 271313ebf8..9be7aca2b8 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -133,7 +134,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(22 rows)
+(23 rows)
 
 -- There are always wait event descriptions for various types.
 select type, count(*) > 0 as ok FROM pg_wait_events
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index cc8f0efad5..b9fcceedd7 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1181,6 +1181,53 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
-- 
2.43.0

#110

vignesh C

vignesh21@gmail.com

about 2 years ago

In reply to: Andrei Lepikhov (#109)

Re: POC: GROUP BY optimization

On Tue, 9 Jan 2024 at 14:31, Andrei Lepikhov <a.lepikhov@postgrespro.ru> wrote:

Here is a new version of GROUP-BY optimization without sort model.

On 21/12/2023 17:53, Alexander Korotkov wrote:

I'd like to make some notes.

1) As already mentioned, there is clearly a repetitive pattern for the
code following after get_useful_group_keys_orderings() calls. I think
it would be good to extract it into a separate function. Please, do
this as a separate patch coming before the group-by patch. That would
simplify the review.

Done. See patch 0001-*. Unfortunately, extraction of whole cycle isn't
practical, because it blows out the interface of the routine.

2) I wonder what planning overhead this patch could introduce? Could
you try to measure the worst case? What if we have a table with a lot
of indexes and a long list of group-by clauses partially patching
every index. This should give us an understanding on whether we need
a separate GUC to control this feature.

In current implementation I don't anticipate any significant overhead.
GUC is needed here to allow users adhere their own ordering and to
disable feature in the case of problems.

4) I think we can do some optimizations when enable_incremental_sort
== off. Then in get_useful_group_keys_orderings() we should only deal
with input_path fully matching the group-by clause, and try only full
match of group-by output to the required order.

Hm, is it really make sense in current implementation?

CFBot shows the following errors at [1]https://cirrus-ci.com/task/5074942069309440 with:
[08:33:28.813] ../src/backend/utils/adt/selfuncs.c: In function
‘estimate_num_groups’:
[08:33:28.813] ../src/backend/utils/adt/selfuncs.c:3389:9: warning:
implicit declaration of function ‘estimate_num_groups_incremental’
[-Wimplicit-function-declaration]
[08:33:28.813] 3389 | return estimate_num_groups_incremental(root, groupExprs,
[08:33:28.813] | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[08:33:28.813] ../src/backend/utils/adt/selfuncs.c: At top level:
[08:33:28.813] ../src/backend/utils/adt/selfuncs.c:3400:1: warning: no
previous prototype for ‘estimate_num_groups_incremental’
[-Wmissing-prototypes]
[08:33:28.813] 3400 | estimate_num_groups_incremental(PlannerInfo
*root, List *groupExprs,
[08:33:28.813] | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[08:33:28.813] ../src/backend/utils/adt/selfuncs.c:3400:1: error:
conflicting types for ‘estimate_num_groups_incremental’
[08:33:28.813] ../src/backend/utils/adt/selfuncs.c:3389:9: note:
previous implicit declaration of ‘estimate_num_groups_incremental’ was
here
[08:33:28.813] 3389 | return estimate_num_groups_incremental(root, groupExprs,

[1]: https://cirrus-ci.com/task/5074942069309440

Regards,
Vignesh

#111

Andrei Lepikhov

a.lepikhov@postgrespro.ru

about 2 years ago

In reply to: vignesh C (#110)

1 attachment(s)

Re: POC: GROUP BY optimization

On 9/1/2024 16:45, vignesh C wrote:

On Tue, 9 Jan 2024 at 14:31, Andrei Lepikhov <a.lepikhov@postgrespro.ru> wrote:

Here is a new version of GROUP-BY optimization without sort model.

On 21/12/2023 17:53, Alexander Korotkov wrote:

I'd like to make some notes.

1) As already mentioned, there is clearly a repetitive pattern for the
code following after get_useful_group_keys_orderings() calls. I think
it would be good to extract it into a separate function. Please, do
this as a separate patch coming before the group-by patch. That would
simplify the review.

Done. See patch 0001-*. Unfortunately, extraction of whole cycle isn't
practical, because it blows out the interface of the routine.

2) I wonder what planning overhead this patch could introduce? Could
you try to measure the worst case? What if we have a table with a lot
of indexes and a long list of group-by clauses partially patching
every index. This should give us an understanding on whether we need
a separate GUC to control this feature.

In current implementation I don't anticipate any significant overhead.
GUC is needed here to allow users adhere their own ordering and to
disable feature in the case of problems.

4) I think we can do some optimizations when enable_incremental_sort
== off. Then in get_useful_group_keys_orderings() we should only deal
with input_path fully matching the group-by clause, and try only full
match of group-by output to the required order.

Hm, is it really make sense in current implementation?

CFBot shows the following errors at [1] with:
[08:33:28.813] ../src/backend/utils/adt/selfuncs.c: In function
‘estimate_num_groups’:
[08:33:28.813] ../src/backend/utils/adt/selfuncs.c:3389:9: warning:
implicit declaration of function ‘estimate_num_groups_incremental’
[-Wimplicit-function-declaration]
[08:33:28.813] 3389 | return estimate_num_groups_incremental(root, groupExprs,
[08:33:28.813] | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[08:33:28.813] ../src/backend/utils/adt/selfuncs.c: At top level:
[08:33:28.813] ../src/backend/utils/adt/selfuncs.c:3400:1: warning: no
previous prototype for ‘estimate_num_groups_incremental’
[-Wmissing-prototypes]
[08:33:28.813] 3400 | estimate_num_groups_incremental(PlannerInfo
*root, List *groupExprs,
[08:33:28.813] | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[08:33:28.813] ../src/backend/utils/adt/selfuncs.c:3400:1: error:
conflicting types for ‘estimate_num_groups_incremental’
[08:33:28.813] ../src/backend/utils/adt/selfuncs.c:3389:9: note:
previous implicit declaration of ‘estimate_num_groups_incremental’ was
here
[08:33:28.813] 3389 | return estimate_num_groups_incremental(root, groupExprs,

Hmm, I don't see this old code in these patches. Resend 0002-* because
of trailing spaces.

--
regards,
Andrei Lepikhov
Postgres Professional

Attachments:

0002-Explore-alternative-orderings-of-group-by-pathkeys-d-20240109.patchtext/plain; charset=UTF-8; name=0002-Explore-alternative-orderings-of-group-by-pathkeys-d-20240109.patchDownload

From 45cfff5731b81c0df2af00f5e4212fc598f6a231 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 9 Jan 2024 12:32:15 +0700
Subject: [PATCH 2/2] Explore alternative orderings of group-by pathkeys during
 optimization.

When evaluating a query with a multi-column GROUP BY clause, we can minimize
sort operations or avoid them if we synchronize the order of GROUP BY clauses
with the ORDER BY sort clause or sort order, which comes from the underlying
query tree. Grouping does not imply any ordering, so we can compare
the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg,
we simply compared keys in the order specified in the query. This commit
explores alternative ordering of the keys, trying to find a cheaper one.

The ordering of group keys may interact with other parts of the query, some of
which may not be known while planning the grouping. For example, there may be
an explicit ORDER BY clause or some other ordering-dependent operation higher up
in the query, and using the same ordering may allow using either incremental
sort or even eliminating the sort entirely.

The patch always keeps the ordering specified in the query, assuming the user
might have additional insights.

This introduces a new GUC enable_group_by_reordering so that the optimization
may be disabled if needed.
---
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 222 ++++++++++++++++++
 src/backend/optimizer/plan/planner.c          | 214 +++++++++++------
 src/backend/utils/misc/guc_tables.c           |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/paths.h                 |   3 +
 src/test/regress/expected/aggregates.out      | 132 +++++++++++
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/sql/aggregates.sql           |  47 ++++
 10 files changed, 586 insertions(+), 69 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index e86dfeaecd..7dd14d0a43 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy the sortref if it wasn't set yet. That may happen if
+				 * the ec was constructed from WHERE clause, i.e. it doesn't
+				 * have a target reference at all.
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ca94a31f71..8bc4eb9f03 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -28,6 +28,8 @@
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool		enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -350,6 +352,173 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+static int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses,
+							   int num_groupby_pathkeys)
+{
+	List	   *new_group_pathkeys = NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append it to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find the first pathkey without a matching GROUP BY key, the
+	 * rest of the pathkeys are useless and can't be used to evaluate the
+	 * grouping, so we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			   *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	   *sgc;
+
+		if (foreach_current_index(lc) >= num_groupby_pathkeys ||
+			!list_member_ptr(*group_pathkeys, pathkey))
+			/* */
+			break;
+
+		/* abort on first mismatch */
+		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+											*group_clauses);
+		if (!sgc)
+			/* The grouping clause is not cover this pathkey */
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reordered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible)
+ *
+ * There are other potentially interesting orderings. Just to memorize:
+ * 1. It might be best to match the first ORDER BY key, order the remaining keys
+ * differently and then rely on the incremental sort to fix this).
+ * 2. Place integer fixed-length columns in the front of text/bytea columns
+ * 3. Rearrange columns according to ndistinct estimations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+
+	List	   *pathkeys = root->group_pathkeys;
+	List	   *clauses = root->processed_groupClause;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+	infos = lappend(infos, info);
+
+	/*
+	 * Should we try generating alternative orderings of the group keys? If
+	 * not, we produce only the order specified in the query, i.e. the
+	 * optimization is effectively disabled.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to
+	 * match as much of the ordering as possible - we get this sort for free
+	 * (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefiting from incremental sort
+	 * as much as possible.
+	 */
+	if (path_pathkeys)
+	{
+		group_keys_reorder_by_pathkeys(path_pathkeys, &pathkeys, &clauses,
+									   root->num_groupby_pathkeys);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause).
+	 */
+	if (root->sort_pathkeys)
+	{
+		group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys, &clauses,
+									   root->num_groupby_pathkeys);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1939,6 +2108,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the ordering. The
+ * ordering may not be "complete" and may require incremental sort, but that's
+ * fine. So we simply count prefix pathkeys with a matching group key, and
+ * stop once we find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordering not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell   *key;
+	int			n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1953,6 +2170,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1988,6 +2208,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index fcf647940e..50453c2470 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2805,8 +2805,9 @@ remove_useless_groupby_columns(PlannerInfo *root)
  *
  * In principle it might be interesting to consider other orderings of the
  * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost.  We don't
- * bother with that, though.  Hashed grouping will frequently win anyway.
+ * possible plans (eg an indexscan) and thereby reduce cost.  However, we
+ * don't yet have sufficient information to do that here, so that's left until
+ * later in planning.  See get_useful_group_keys_orderings().
  *
  * Note: we need no comparable processing of the distinctClause because
  * the parser already enforced that that matches ORDER BY.
@@ -6811,12 +6812,12 @@ done:
 
 static Path *
 try_presorting(PlannerInfo *root, RelOptInfo *grouped_rel,
-					Path *path, Path *cheapest_path)
+					Path *path, Path *cheapest_path, PathKeyInfo *info)
 {
 	bool		is_sorted;
 	int			presorted_keys;
 
-	is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+	is_sorted = pathkeys_count_contained_in(info->pathkeys,
 											path->pathkeys,
 											&presorted_keys);
 
@@ -6842,13 +6843,13 @@ try_presorting(PlannerInfo *root, RelOptInfo *grouped_rel,
 			path = (Path *) create_sort_path(root,
 											 grouped_rel,
 											 path,
-											 root->group_pathkeys,
+											 info->pathkeys,
 											 -1.0);
 		else
 			path = (Path *) create_incremental_sort_path(root,
 														 grouped_rel,
 														 path,
-														 root->group_pathkeys,
+														 info->pathkeys,
 														 presorted_keys,
 														 -1.0);
 	}
@@ -6885,56 +6886,74 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = try_presorting(root, grouped_rel, path, cheapest_path);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
+			foreach(lc2, pathkey_orderings)
 			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				path = try_presorting(root, grouped_rel, path, cheapest_path, info);
+
+				if (path == NULL)
+				continue;
+
+				/* Now decide what to stick atop it */
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash,
+												gd, agg_costs, dNumGroups);
+				}
+				else if (parse->hasAggs)
+				{
+					/*
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_agg_path(root,
 										 grouped_rel,
 										 path,
 										 grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_SIMPLE,
-										 root->processed_groupClause,
+										 info->clauses,
 										 havingQual,
 										 agg_costs,
 										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				}
+				else if (parse->groupClause)
+				{
+					/*
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_group_path(root,
 										   grouped_rel,
 										   path,
-										   root->processed_groupClause,
+										   info->clauses,
 										   havingQual,
 										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
+				}
+				else
+				{
+					/* Other cases should have been handled above */
+					Assert(false);
+				}
 			}
 		}
 
@@ -6946,35 +6965,55 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
+				Path	   *path_save = path;
+				List	   *pathkey_orderings = NIL;
 
-				path = try_presorting(root, grouped_rel, path,
-									  partially_grouped_rel->cheapest_total_path);
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys);
 
-				if (path == NULL)
-					continue;
+				Assert(list_length(pathkey_orderings) > 0);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
+				/* process all potentially interesting grouping reorderings */
+				foreach(lc2, pathkey_orderings)
+				{
+					PathKeyInfo    *info = (PathKeyInfo *) lfirst(lc2);
+
+					/* restore the path (we replace it in the loop) */
+					path = path_save;
+
+					path = try_presorting(root, grouped_rel, path,
+										  partially_grouped_rel->cheapest_total_path,
+										  info);
+
+					if (path == NULL)
+						continue;
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 root->processed_groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
+					else
+						add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   root->processed_groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 
+				}
 			}
 		}
 	}
@@ -7177,34 +7216,53 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = try_presorting(root, partially_grouped_rel, path,
-								  cheapest_total_path);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			if (parse->hasAggs)
-				add_path(partially_grouped_rel, (Path *)
+				path = try_presorting(root, partially_grouped_rel, path,
+									  cheapest_total_path, info);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_path(partially_grouped_rel, (Path *)
 						 create_agg_path(root,
 										 partially_grouped_rel,
 										 path,
 										 partially_grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_INITIAL_SERIAL,
-										 root->processed_groupClause,
+										 info->clauses,
 										 NIL,
 										 agg_partial_costs,
 										 dNumPartialGroups));
-			else
-				add_path(partially_grouped_rel, (Path *)
+				else
+					add_path(partially_grouped_rel, (Path *)
 						 create_group_path(root,
 										   partially_grouped_rel,
 										   path,
-										   root->processed_groupClause,
+										   info->clauses,
 										   NIL,
 										   dNumPartialGroups));
+			}
 		}
 	}
 
@@ -7213,34 +7271,54 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = try_presorting(root, partially_grouped_rel, path,
-								  cheapest_partial_path);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
+				path = try_presorting(root, partially_grouped_rel, path,
+									  cheapest_partial_path, info);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_agg_path(root,
 												 partially_grouped_rel,
 												 path,
 												 partially_grouped_rel->reltarget,
 												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 												 AGGSPLIT_INITIAL_SERIAL,
-												 root->processed_groupClause,
+												 info->clauses,
 												 NIL,
 												 agg_partial_costs,
 												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
+				else
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_group_path(root,
 												   partially_grouped_rel,
 												   path,
-												   root->processed_groupClause,
+												   info->clauses,
 												   NIL,
 												   dNumPartialPartialGroups));
+			}
 		}
 	}
 
@@ -7354,6 +7432,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	 * We can also skip the entire loop when we only have a single-item
 	 * group_pathkeys because then we can't possibly have a presorted prefix
 	 * of the list without having the list be fully sorted.
+	 *
+	 * XXX Shouldn't this also consider the group-key-reordering?
 	 */
 	if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
 		return;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index e53ebc6dc2..7fe58518d7 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1050,6 +1050,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables reordering of GROUP BY keys."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b2809c711a..dc82fceb05 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -399,6 +399,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_group_by_reordering = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index b9713ec9aa..0a251984c1 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1456,6 +1456,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index efd4abc28f..e80156c49f 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -204,6 +205,8 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f635c5a1af..423c8ec3b6 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2728,6 +2728,138 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+                  QUERY PLAN                  
+----------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Incremental Sort
+         Sort Key: btg.x, btg.y, btg.w, btg.z
+         Presorted Key: btg.x, btg.y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+                     QUERY PLAN                     
+----------------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Limit
+         ->  Incremental Sort
+               Sort Key: btg.x, btg.y, btg.w, btg.z
+               Presorted Key: btg.x, btg.y
+               ->  Index Scan using abc on btg
+(7 rows)
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+          QUERY PLAN          
+------------------------------
+ Group
+   Group Key: y, x, z, w
+   ->  Sort
+         Sort Key: y, x, z, w
+         ->  Seq Scan on btg
+(5 rows)
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Sort
+   Sort Key: ((x * x)), z
+   ->  Group
+         Group Key: x, y, w, z
+         ->  Incremental Sort
+               Sort Key: x, y, w, z
+               Presorted Key: x, y
+               ->  Index Scan using abc on btg
+(8 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 271313ebf8..9be7aca2b8 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -133,7 +134,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(22 rows)
+(23 rows)
 
 -- There are always wait event descriptions for various types.
 select type, count(*) > 0 as ok FROM pg_wait_events
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index cc8f0efad5..b9fcceedd7 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1181,6 +1181,53 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
-- 
2.43.0

#112

Pavel Borisov

pashkin.elfe@gmail.com

about 2 years ago

In reply to: Andrei Lepikhov (#111)

Re: POC: GROUP BY optimization

Hi, Andrei!

Hmm, I don't see this old code in these patches. Resend 0002-* because
of trailing spaces.

AFAIK, cfbot does not seek old versions of patchset parts in previous
messages. So for it to run correctly, a new version of the whole patchset
should be sent even if most patches are unchanged.

Regards,
Pavel Borisov

#113

Alexander Korotkov

aekorotkov@gmail.com

about 2 years ago

In reply to: Pavel Borisov (#112)

2 attachment(s)

Re: POC: GROUP BY optimization

Hi!

On Tue, Jan 9, 2024 at 1:14 PM Pavel Borisov <pashkin.elfe@gmail.com> wrote:

Hmm, I don't see this old code in these patches. Resend 0002-* because
of trailing spaces.

AFAIK, cfbot does not seek old versions of patchset parts in previous messages. So for it to run correctly, a new version of the whole patchset should be sent even if most patches are unchanged.

Please, find the revised patchset with some refactoring and comments
improvement from me. I'll continue looking into this.

------
Regards,
Alexander Korotkov

Attachments:

0001-Generalize-common-code-of-adding-sort-befor-20240111.patchapplication/octet-stream; name=0001-Generalize-common-code-of-adding-sort-befor-20240111.patchDownload

From 58b80541aea56299c572612ddf1d3a16bfb4e866 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 11 Jan 2024 13:27:58 +0200
Subject: [PATCH 1/2] Generalize common code of adding sort before generation
 of grouping paths

Extract the repetitive code pattern into a new function make_ordered_path().

Discussion: https://postgr.es/m/CAPpHfdtzaVa7S4onKy3YvttF2rrH5hQNHx9HtcSTLbpjx%2BMJ%2Bw%40mail.gmail.com
Author: Andrei Lepikhov
---
 src/backend/optimizer/plan/planner.c | 228 ++++++++++-----------------
 1 file changed, 80 insertions(+), 148 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 667723b6753..014b179c3f0 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6809,6 +6809,58 @@ done:
 	return parallel_workers;
 }
 
+/*
+ * make_ordered_path
+ *		Return a path ordered by 'pathkeys' based on the given 'path'.  May
+ *		return NULL if it doesn't make sense to generate an ordered path in
+ *		this case.
+ */
+static Path *
+make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path,
+				  Path *cheapest_path, List *pathkeys)
+{
+	bool		is_sorted;
+	int			presorted_keys;
+
+	is_sorted = pathkeys_count_contained_in(pathkeys,
+											path->pathkeys,
+											&presorted_keys);
+
+	if (!is_sorted)
+	{
+		/*
+		 * Try at least sorting the cheapest path and also try incrementally
+		 * sorting any path which is partially sorted already (no need to deal
+		 * with paths which have presorted keys when incremental sort is
+		 * disabled unless it's the cheapest input path).
+		 */
+		if (path != cheapest_path &&
+			(presorted_keys == 0 || !enable_incremental_sort))
+			return NULL;
+
+		/*
+		 * We've no need to consider both a sort and incremental sort. We'll
+		 * just do a sort if there are no presorted keys and an incremental
+		 * sort when there are presorted keys.
+		 */
+		if (presorted_keys == 0 || !enable_incremental_sort)
+			path = (Path *) create_sort_path(root,
+											 rel,
+											 path,
+											 pathkeys,
+											 -1.0);
+		else
+			path = (Path *) create_incremental_sort_path(root,
+														 rel,
+														 path,
+														 pathkeys,
+														 presorted_keys,
+														 -1.0);
+	}
+
+	return path;
+}
+
 /*
  * add_paths_to_grouping_rel
  *
@@ -6840,45 +6892,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		foreach(lc, input_rel->pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 grouped_rel,
+									 path,
+									 cheapest_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			/* Now decide what to stick atop it */
 			if (parse->groupingSets)
@@ -6935,46 +6957,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
-				bool		is_sorted;
-				int			presorted_keys;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
-
-				if (!is_sorted)
-				{
-					/*
-					 * Try at least sorting the cheapest path and also try
-					 * incrementally sorting any path which is partially
-					 * sorted already (no need to deal with paths which have
-					 * presorted keys when incremental sort is disabled unless
-					 * it's the cheapest input path).
-					 */
-					if (path != partially_grouped_rel->cheapest_total_path &&
-						(presorted_keys == 0 || !enable_incremental_sort))
-						continue;
+				path = make_ordered_path(root,
+										 grouped_rel,
+										 path,
+										 partially_grouped_rel->cheapest_total_path,
+										 root->group_pathkeys);
 
-					/*
-					 * We've no need to consider both a sort and incremental
-					 * sort.  We'll just do a sort if there are no pre-sorted
-					 * keys and an incremental sort when there are presorted
-					 * keys.
-					 */
-					if (presorted_keys == 0 || !enable_incremental_sort)
-						path = (Path *) create_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 -1.0);
-					else
-						path = (Path *) create_incremental_sort_path(root,
-																	 grouped_rel,
-																	 path,
-																	 root->group_pathkeys,
-																	 presorted_keys,
-																	 -1.0);
-				}
+				if (path == NULL)
+					continue;
 
 				if (parse->hasAggs)
 					add_path(grouped_rel, (Path *)
@@ -7200,44 +7191,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		foreach(lc, input_rel->pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_total_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 partially_grouped_rel,
+									 path,
+									 cheapest_total_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			if (parse->hasAggs)
 				add_path(partially_grouped_rel, (Path *)
@@ -7268,45 +7230,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		foreach(lc, input_rel->partial_pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
-
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
 
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_partial_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 partially_grouped_rel,
+									 path,
+									 cheapest_partial_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			if (parse->hasAggs)
 				add_partial_path(partially_grouped_rel, (Path *)
-- 
2.39.3 (Apple Git-145)

0002-Explore-alternative-orderings-of-group-by-p-20240111.patchapplication/octet-stream; name=0002-Explore-alternative-orderings-of-group-by-p-20240111.patchDownload

From afe12f9eb23859809a881d86a8a18c5a6e3dc50b Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 11 Jan 2024 13:26:49 +0200
Subject: [PATCH 2/2] Explore alternative orderings of group-by pathkeys during
 optimization.

When evaluating a query with a multi-column GROUP BY clause, we can minimize
sort operations or avoid them if we synchronize the order of GROUP BY clauses
with the ORDER BY sort clause or sort order, which comes from the underlying
query tree. Grouping does not imply any ordering, so we can compare
the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg,
we simply compared keys in the order specified in the query. This commit
explores alternative ordering of the keys, trying to find a cheaper one.

The ordering of group keys may interact with other parts of the query, some of
which may not be known while planning the grouping. For example, there may be
an explicit ORDER BY clause or some other ordering-dependent operation higher up
in the query, and using the same ordering may allow using either incremental
sort or even eliminating the sort entirely.

The patch always keeps the ordering specified in the query, assuming the user
might have additional insights.

This introduces a new GUC enable_group_by_reordering so that the optimization
may be disabled if needed.

Discussion: https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ru
Author: Andrey V. Lepikhov, Teodor Sigaev
Reviewed-by: Tomas Vondra, Claudio Freire, Gavin Flower, Dmitry Dolgov
Reviewed-by: Robert Haas, Pavel Borisov, David Rowley, Zhihong Yu
Reviewed-by: Tom Lane, Alexander Korotkov
---
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 215 ++++++++++++
 src/backend/optimizer/plan/planner.c          | 316 +++++++++++-------
 src/backend/utils/misc/guc_tables.c           |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/paths.h                 |   2 +
 src/test/regress/expected/aggregates.out      | 132 ++++++++
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/sql/aggregates.sql           |  47 +++
 src/tools/pgindent/typedefs.list              |   1 +
 11 files changed, 625 insertions(+), 125 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index e86dfeaecd4..7dd14d0a43b 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy the sortref if it wasn't set yet. That may happen if
+				 * the ec was constructed from WHERE clause, i.e. it doesn't
+				 * have a target reference at all.
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ca94a31f71e..f4b7dcac214 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -22,12 +22,15 @@
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool		enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -350,6 +353,165 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match the input pathkeys.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+static int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses,
+							   int num_groupby_pathkeys)
+{
+	List	   *new_group_pathkeys = NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append it to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find the first pathkey without a matching GROUP BY key, the
+	 * rest of the pathkeys are useless and can't be used to evaluate the
+	 * grouping, so we abort the loop and ignore the remaining pathkeys.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause *sgc;
+
+		/*
+		 * Pathkeys are built in a way that allows simply comparing pointers.
+		 * Give up if we can't find the matching pointer.
+		 */
+		if (foreach_current_index(lc) >= num_groupby_pathkeys ||
+			!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+											*group_clauses);
+		if (!sgc)
+			/* The grouping clause does not cover this pathkey */
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns a list of PathKeyInfo items, each representing an interesting
+ * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
+ * matching order.
+ *
+ * The function considers (and keeps) multiple GROUP BY orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause,
+ * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible).
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+
+	List	   *pathkeys = root->group_pathkeys;
+	List	   *clauses = root->processed_groupClause;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+	infos = lappend(infos, info);
+
+	/*
+	 * Should we try generating alternative orderings of the group keys? If
+	 * not, we produce only the order specified in the query, i.e. the
+	 * optimization is effectively disabled.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
+	/*
+	 * Grouping sets have own and more complex logic to decide the ordering.
+	 */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to
+	 * match the path as much of the ordering as possible.  Then thanks to
+	 * incremental sort we would get this sort as cheap as possible.
+	 */
+	if (path->pathkeys)
+	{
+		int			n;
+
+		n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses,
+										   root->num_groupby_pathkeys);
+
+		if (n > 0 &&
+			(enable_incremental_sort || n == list_length(path->pathkeys)))
+		{
+			info = makeNode(PathKeyInfo);
+			info->pathkeys = pathkeys;
+			info->clauses = clauses;
+
+			infos = lappend(infos, info);
+		}
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause).
+	 */
+	if (root->sort_pathkeys)
+	{
+		int			n;
+
+		n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
+										   &clauses,
+										   root->num_groupby_pathkeys);
+
+		if (n > 0 &&
+			(enable_incremental_sort || n == list_length(path->pathkeys)))
+		{
+			info = makeNode(PathKeyInfo);
+			info->pathkeys = pathkeys;
+			info->clauses = clauses;
+
+			infos = lappend(infos, info);
+		}
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1939,6 +2101,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the ordering. The
+ * ordering may not be "complete" and may require incremental sort, but that's
+ * fine. So we simply count prefix pathkeys with a matching group key, and
+ * stop once we find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordering not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell   *key;
+	int			n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1953,6 +2163,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1988,6 +2201,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 014b179c3f0..239b799a6b2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2805,8 +2805,9 @@ remove_useless_groupby_columns(PlannerInfo *root)
  *
  * In principle it might be interesting to consider other orderings of the
  * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost.  We don't
- * bother with that, though.  Hashed grouping will frequently win anyway.
+ * possible plans (eg an indexscan) and thereby reduce cost.  However, we
+ * don't yet have sufficient information to do that here, so that's left until
+ * later in planning.  See get_useful_group_keys_orderings().
  *
  * Note: we need no comparable processing of the distinctClause because
  * the parser already enforced that that matches ORDER BY.
@@ -6891,103 +6892,135 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 grouped_rel,
-									 path,
-									 cheapest_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_agg_path(root,
-										 grouped_rel,
-										 path,
-										 grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_SIMPLE,
-										 root->processed_groupClause,
-										 havingQual,
-										 agg_costs,
-										 dNumGroups));
-			}
-			else if (parse->groupClause)
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_group_path(root,
-										   grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   havingQual,
-										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
-			}
-		}
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-		/*
-		 * Instead of operating directly on the input relation, we can
-		 * consider finalizing a partially aggregated path.
-		 */
-		if (partially_grouped_rel != NULL)
-		{
-			foreach(lc, partially_grouped_rel->pathlist)
-			{
-				Path	   *path = (Path *) lfirst(lc);
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
 				path = make_ordered_path(root,
 										 grouped_rel,
 										 path,
-										 partially_grouped_rel->cheapest_total_path,
-										 root->group_pathkeys);
-
+										 cheapest_path,
+										 info->pathkeys);
 				if (path == NULL)
 					continue;
 
-				if (parse->hasAggs)
+				/* Now decide what to stick atop it */
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash,
+												gd, agg_costs, dNumGroups);
+				}
+				else if (parse->hasAggs)
+				{
+					/*
+					 * We have aggregation, possibly with plain GROUP BY. Make
+					 * an AggPath.
+					 */
 					add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 root->processed_groupClause,
+											 AGGSPLIT_SIMPLE,
+											 info->clauses,
 											 havingQual,
-											 agg_final_costs,
+											 agg_costs,
 											 dNumGroups));
-				else
+				}
+				else if (parse->groupClause)
+				{
+					/*
+					 * We have GROUP BY without aggregation or grouping sets.
+					 * Make a GroupPath.
+					 */
 					add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   root->processed_groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
+				}
+				else
+				{
+					/* Other cases should have been handled above */
+					Assert(false);
+				}
+			}
+		}
+
+		/*
+		 * Instead of operating directly on the input relation, we can
+		 * consider finalizing a partially aggregated path.
+		 */
+		if (partially_grouped_rel != NULL)
+		{
+			foreach(lc, partially_grouped_rel->pathlist)
+			{
+				ListCell   *lc2;
+				Path	   *path = (Path *) lfirst(lc);
+				Path	   *path_save = path;
+				List	   *pathkey_orderings = NIL;
+
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root, path);
+
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach(lc2, pathkey_orderings)
+				{
+					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
+					/* restore the path (we replace it in the loop) */
+					path = path_save;
+
+					path = make_ordered_path(root,
+											 grouped_rel,
+											 path,
+											 partially_grouped_rel->cheapest_total_path,
+											 info->pathkeys);
+
+					if (path == NULL)
+						continue;
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+
+				}
 			}
 		}
 	}
@@ -7190,37 +7223,54 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 partially_grouped_rel,
-									 path,
-									 cheapest_total_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (parse->hasAggs)
-				add_path(partially_grouped_rel, (Path *)
-						 create_agg_path(root,
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				path = make_ordered_path(root,
 										 partially_grouped_rel,
 										 path,
-										 partially_grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_INITIAL_SERIAL,
-										 root->processed_groupClause,
-										 NIL,
-										 agg_partial_costs,
-										 dNumPartialGroups));
-			else
-				add_path(partially_grouped_rel, (Path *)
-						 create_group_path(root,
-										   partially_grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   NIL,
-										   dNumPartialGroups));
+										 cheapest_total_path,
+										 info->pathkeys);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_path(partially_grouped_rel, (Path *)
+							 create_agg_path(root,
+											 partially_grouped_rel,
+											 path,
+											 partially_grouped_rel->reltarget,
+											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 AGGSPLIT_INITIAL_SERIAL,
+											 info->clauses,
+											 NIL,
+											 agg_partial_costs,
+											 dNumPartialGroups));
+				else
+					add_path(partially_grouped_rel, (Path *)
+							 create_group_path(root,
+											   partially_grouped_rel,
+											   path,
+											   info->clauses,
+											   NIL,
+											   dNumPartialGroups));
+			}
 		}
 	}
 
@@ -7229,37 +7279,55 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 partially_grouped_rel,
-									 path,
-									 cheapest_partial_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_agg_path(root,
-												 partially_grouped_rel,
-												 path,
-												 partially_grouped_rel->reltarget,
-												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-												 AGGSPLIT_INITIAL_SERIAL,
-												 root->processed_groupClause,
-												 NIL,
-												 agg_partial_costs,
-												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_group_path(root,
-												   partially_grouped_rel,
-												   path,
-												   root->processed_groupClause,
-												   NIL,
-												   dNumPartialPartialGroups));
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				path = make_ordered_path(root,
+										 partially_grouped_rel,
+										 path,
+										 cheapest_partial_path,
+										 info->pathkeys);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_partial_path(partially_grouped_rel, (Path *)
+									 create_agg_path(root,
+													 partially_grouped_rel,
+													 path,
+													 partially_grouped_rel->reltarget,
+													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 AGGSPLIT_INITIAL_SERIAL,
+													 info->clauses,
+													 NIL,
+													 agg_partial_costs,
+													 dNumPartialPartialGroups));
+				else
+					add_partial_path(partially_grouped_rel, (Path *)
+									 create_group_path(root,
+													   partially_grouped_rel,
+													   path,
+													   info->clauses,
+													   NIL,
+													   dNumPartialPartialGroups));
+			}
 		}
 	}
 
@@ -7373,6 +7441,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	 * We can also skip the entire loop when we only have a single-item
 	 * group_pathkeys because then we can't possibly have a presorted prefix
 	 * of the list without having the list be fully sorted.
+	 *
+	 * XXX Shouldn't this also consider the group-key-reordering?
 	 */
 	if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
 		return;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index e53ebc6dc2b..7fe58518d7d 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1050,6 +1050,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables reordering of GROUP BY keys."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b2809c711a1..dc82fceb050 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -399,6 +399,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_group_by_reordering = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index b9713ec9aa6..137da178dc7 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1456,6 +1456,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines the information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index efd4abc28f3..0e8a9c94ba6 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -204,6 +205,7 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, Path *path);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f635c5a1afb..423c8ec3b6b 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2728,6 +2728,138 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+                  QUERY PLAN                  
+----------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Incremental Sort
+         Sort Key: btg.x, btg.y, btg.w, btg.z
+         Presorted Key: btg.x, btg.y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+                     QUERY PLAN                     
+----------------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Limit
+         ->  Incremental Sort
+               Sort Key: btg.x, btg.y, btg.w, btg.z
+               Presorted Key: btg.x, btg.y
+               ->  Index Scan using abc on btg
+(7 rows)
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+          QUERY PLAN          
+------------------------------
+ Group
+   Group Key: y, x, z, w
+   ->  Sort
+         Sort Key: y, x, z, w
+         ->  Seq Scan on btg
+(5 rows)
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Sort
+   Sort Key: ((x * x)), z
+   ->  Group
+         Group Key: x, y, w, z
+         ->  Incremental Sort
+               Sort Key: x, y, w, z
+               Presorted Key: x, y
+               ->  Index Scan using abc on btg
+(8 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 271313ebf86..9be7aca2b8a 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -133,7 +134,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(22 rows)
+(23 rows)
 
 -- There are always wait event descriptions for various types.
 select type, count(*) > 0 as ok FROM pg_wait_events
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index cc8f0efad55..b9fcceedd7d 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1181,6 +1181,53 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 5fd46b7bd1f..9aca748f422 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -4039,3 +4039,4 @@ cb_tablespace_mapping
 manifest_data
 manifest_writer
 rfile
+PathKeyInfo
-- 
2.39.3 (Apple Git-145)

#114

Andrei Lepikhov

a.lepikhov@postgrespro.ru

almost 2 years ago

In reply to: Alexander Korotkov (#113)

1 attachment(s)

Re: POC: GROUP BY optimization

On 11/1/2024 18:30, Alexander Korotkov wrote:

Hi!

On Tue, Jan 9, 2024 at 1:14 PM Pavel Borisov <pashkin.elfe@gmail.com> wrote:

Hmm, I don't see this old code in these patches. Resend 0002-* because
of trailing spaces.

AFAIK, cfbot does not seek old versions of patchset parts in previous messages. So for it to run correctly, a new version of the whole patchset should be sent even if most patches are unchanged.

Please, find the revised patchset with some refactoring and comments
improvement from me. I'll continue looking into this.

The patch looks better, thanks to your refactoring.
I propose additional comments and tests to make the code more
understandable (see attachment).
I intended to look into this part of the code more, but the tests show a
difference between PostgreSQL v.15 and v.16, which causes changes in the
code of this feature.

--
regards,
Andrei Lepikhov
Postgres Professional

Attachments:

details.txttext/plain; charset=UTF-8; name=details.txtDownload

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index f4b7dcac21..5aac6d6677 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -397,6 +397,11 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 			!list_member_ptr(*group_pathkeys, pathkey))
 			break;
 
+		/*
+		 * Since 1349d27 pathkey coming from underlying node can be in the
+		 * root->group_pathkeys but not in the processed_groupClause. So, we
+		 * should be careful here.
+		 */
 		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
 											*group_clauses);
 		if (!sgc)
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 423c8ec3b6..0d46e096e5 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2857,6 +2857,28 @@ explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
 (8 rows)
 
 DROP TABLE btg;
+-- The case, when scanning sort order correspond to aggregate sort order but
+-- can not be found in the group-by list
+CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int);
+CREATE UNIQUE INDEX ON t1(c2);
+explain (costs off)
+SELECT array_agg(c1 ORDER BY c2),c2
+FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Sort
+   Sort Key: c2
+   ->  GroupAggregate
+         Group Key: c1
+         ->  Sort
+               Sort Key: c1, c2
+               ->  Bitmap Heap Scan on t1
+                     Recheck Cond: (c2 < 100)
+                     ->  Bitmap Index Scan on t1_c2_idx
+                           Index Cond: (c2 < 100)
+(10 rows)
+
+DROP TABLE t1 CASCADE;
 RESET enable_hashagg;
 RESET max_parallel_workers;
 RESET max_parallel_workers_per_gather;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index b9fcceedd7..f99167ac9e 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1224,6 +1224,15 @@ explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
 
 DROP TABLE btg;
 
+-- The case, when scanning sort order correspond to aggregate sort order but
+-- can not be found in the group-by list
+CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int);
+CREATE UNIQUE INDEX ON t1(c2);
+explain (costs off)
+SELECT array_agg(c1 ORDER BY c2),c2
+FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
+DROP TABLE t1 CASCADE;
+
 RESET enable_hashagg;
 RESET max_parallel_workers;
 RESET max_parallel_workers_per_gather;

#115

Alexander Korotkov

aekorotkov@gmail.com

almost 2 years ago

In reply to: Andrei Lepikhov (#114)

2 attachment(s)

Re: POC: GROUP BY optimization

On Sat, Jan 13, 2024 at 11:09 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

On 11/1/2024 18:30, Alexander Korotkov wrote:

On Tue, Jan 9, 2024 at 1:14 PM Pavel Borisov <pashkin.elfe@gmail.com> wrote:

Hmm, I don't see this old code in these patches. Resend 0002-* because
of trailing spaces.

AFAIK, cfbot does not seek old versions of patchset parts in previous messages. So for it to run correctly, a new version of the whole patchset should be sent even if most patches are unchanged.

Please, find the revised patchset with some refactoring and comments
improvement from me. I'll continue looking into this.

The patch looks better, thanks to your refactoring.
I propose additional comments and tests to make the code more
understandable (see attachment).
I intended to look into this part of the code more, but the tests show a
difference between PostgreSQL v.15 and v.16, which causes changes in the
code of this feature.

Makes sense. I've incorporated your changes into the patchset.

------
Regards,
Alexander Korotkov

Attachments:

0001-Generalize-common-code-of-adding-sort-befor-20240113.patchapplication/octet-stream; name=0001-Generalize-common-code-of-adding-sort-befor-20240113.patchDownload

From cc7d0123c4923b2cf10a4ac6805f362da1f6e7e6 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 11 Jan 2024 13:27:58 +0200
Subject: [PATCH 1/2] Generalize common code of adding sort before generation
 of grouping paths

Extract the repetitive code pattern into a new function make_ordered_path().

Discussion: https://postgr.es/m/CAPpHfdtzaVa7S4onKy3YvttF2rrH5hQNHx9HtcSTLbpjx%2BMJ%2Bw%40mail.gmail.com
Author: Andrei Lepikhov
---
 src/backend/optimizer/plan/planner.c | 228 ++++++++++-----------------
 1 file changed, 80 insertions(+), 148 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 667723b6753..014b179c3f0 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6809,6 +6809,58 @@ done:
 	return parallel_workers;
 }
 
+/*
+ * make_ordered_path
+ *		Return a path ordered by 'pathkeys' based on the given 'path'.  May
+ *		return NULL if it doesn't make sense to generate an ordered path in
+ *		this case.
+ */
+static Path *
+make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path,
+				  Path *cheapest_path, List *pathkeys)
+{
+	bool		is_sorted;
+	int			presorted_keys;
+
+	is_sorted = pathkeys_count_contained_in(pathkeys,
+											path->pathkeys,
+											&presorted_keys);
+
+	if (!is_sorted)
+	{
+		/*
+		 * Try at least sorting the cheapest path and also try incrementally
+		 * sorting any path which is partially sorted already (no need to deal
+		 * with paths which have presorted keys when incremental sort is
+		 * disabled unless it's the cheapest input path).
+		 */
+		if (path != cheapest_path &&
+			(presorted_keys == 0 || !enable_incremental_sort))
+			return NULL;
+
+		/*
+		 * We've no need to consider both a sort and incremental sort. We'll
+		 * just do a sort if there are no presorted keys and an incremental
+		 * sort when there are presorted keys.
+		 */
+		if (presorted_keys == 0 || !enable_incremental_sort)
+			path = (Path *) create_sort_path(root,
+											 rel,
+											 path,
+											 pathkeys,
+											 -1.0);
+		else
+			path = (Path *) create_incremental_sort_path(root,
+														 rel,
+														 path,
+														 pathkeys,
+														 presorted_keys,
+														 -1.0);
+	}
+
+	return path;
+}
+
 /*
  * add_paths_to_grouping_rel
  *
@@ -6840,45 +6892,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		foreach(lc, input_rel->pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 grouped_rel,
+									 path,
+									 cheapest_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			/* Now decide what to stick atop it */
 			if (parse->groupingSets)
@@ -6935,46 +6957,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
-				bool		is_sorted;
-				int			presorted_keys;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
-
-				if (!is_sorted)
-				{
-					/*
-					 * Try at least sorting the cheapest path and also try
-					 * incrementally sorting any path which is partially
-					 * sorted already (no need to deal with paths which have
-					 * presorted keys when incremental sort is disabled unless
-					 * it's the cheapest input path).
-					 */
-					if (path != partially_grouped_rel->cheapest_total_path &&
-						(presorted_keys == 0 || !enable_incremental_sort))
-						continue;
+				path = make_ordered_path(root,
+										 grouped_rel,
+										 path,
+										 partially_grouped_rel->cheapest_total_path,
+										 root->group_pathkeys);
 
-					/*
-					 * We've no need to consider both a sort and incremental
-					 * sort.  We'll just do a sort if there are no pre-sorted
-					 * keys and an incremental sort when there are presorted
-					 * keys.
-					 */
-					if (presorted_keys == 0 || !enable_incremental_sort)
-						path = (Path *) create_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 -1.0);
-					else
-						path = (Path *) create_incremental_sort_path(root,
-																	 grouped_rel,
-																	 path,
-																	 root->group_pathkeys,
-																	 presorted_keys,
-																	 -1.0);
-				}
+				if (path == NULL)
+					continue;
 
 				if (parse->hasAggs)
 					add_path(grouped_rel, (Path *)
@@ -7200,44 +7191,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		foreach(lc, input_rel->pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_total_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 partially_grouped_rel,
+									 path,
+									 cheapest_total_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			if (parse->hasAggs)
 				add_path(partially_grouped_rel, (Path *)
@@ -7268,45 +7230,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		foreach(lc, input_rel->partial_pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
-
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
 
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_partial_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 partially_grouped_rel,
+									 path,
+									 cheapest_partial_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			if (parse->hasAggs)
 				add_partial_path(partially_grouped_rel, (Path *)
-- 
2.39.3 (Apple Git-145)

0002-Explore-alternative-orderings-of-group-by-p-20240113.patchapplication/octet-stream; name=0002-Explore-alternative-orderings-of-group-by-p-20240113.patchDownload

From 159936c405e51b721e14033af35d886f9948b959 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 11 Jan 2024 13:26:49 +0200
Subject: [PATCH 2/2] Explore alternative orderings of group-by pathkeys during
 optimization.

When evaluating a query with a multi-column GROUP BY clause, we can minimize
sort operations or avoid them if we synchronize the order of GROUP BY clauses
with the ORDER BY sort clause or sort order, which comes from the underlying
query tree. Grouping does not imply any ordering, so we can compare
the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg,
we simply compared keys in the order specified in the query. This commit
explores alternative ordering of the keys, trying to find a cheaper one.

The ordering of group keys may interact with other parts of the query, some of
which may not be known while planning the grouping. For example, there may be
an explicit ORDER BY clause or some other ordering-dependent operation higher up
in the query, and using the same ordering may allow using either incremental
sort or even eliminating the sort entirely.

The patch always keeps the ordering specified in the query, assuming the user
might have additional insights.

This introduces a new GUC enable_group_by_reordering so that the optimization
may be disabled if needed.

Discussion: https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ru
Author: Andrey V. Lepikhov, Teodor Sigaev
Reviewed-by: Tomas Vondra, Claudio Freire, Gavin Flower, Dmitry Dolgov
Reviewed-by: Robert Haas, Pavel Borisov, David Rowley, Zhihong Yu
Reviewed-by: Tom Lane, Alexander Korotkov
---
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 220 ++++++++++++
 src/backend/optimizer/plan/planner.c          | 316 +++++++++++-------
 src/backend/utils/misc/guc_tables.c           |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/paths.h                 |   2 +
 src/test/regress/expected/aggregates.out      | 154 +++++++++
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/sql/aggregates.sql           |  56 ++++
 src/tools/pgindent/typedefs.list              |   1 +
 11 files changed, 661 insertions(+), 125 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index e86dfeaecd4..7dd14d0a43b 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy the sortref if it wasn't set yet. That may happen if
+				 * the ec was constructed from WHERE clause, i.e. it doesn't
+				 * have a target reference at all.
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ca94a31f71e..5aac6d66776 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -22,12 +22,15 @@
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool		enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -350,6 +353,170 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match the input pathkeys.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+static int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses,
+							   int num_groupby_pathkeys)
+{
+	List	   *new_group_pathkeys = NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append it to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find the first pathkey without a matching GROUP BY key, the
+	 * rest of the pathkeys are useless and can't be used to evaluate the
+	 * grouping, so we abort the loop and ignore the remaining pathkeys.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause *sgc;
+
+		/*
+		 * Pathkeys are built in a way that allows simply comparing pointers.
+		 * Give up if we can't find the matching pointer.
+		 */
+		if (foreach_current_index(lc) >= num_groupby_pathkeys ||
+			!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		/*
+		 * Since 1349d27 pathkey coming from underlying node can be in the
+		 * root->group_pathkeys but not in the processed_groupClause. So, we
+		 * should be careful here.
+		 */
+		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+											*group_clauses);
+		if (!sgc)
+			/* The grouping clause does not cover this pathkey */
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns a list of PathKeyInfo items, each representing an interesting
+ * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
+ * matching order.
+ *
+ * The function considers (and keeps) multiple GROUP BY orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause,
+ * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible).
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+
+	List	   *pathkeys = root->group_pathkeys;
+	List	   *clauses = root->processed_groupClause;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+	infos = lappend(infos, info);
+
+	/*
+	 * Should we try generating alternative orderings of the group keys? If
+	 * not, we produce only the order specified in the query, i.e. the
+	 * optimization is effectively disabled.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
+	/*
+	 * Grouping sets have own and more complex logic to decide the ordering.
+	 */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to
+	 * match the path as much of the ordering as possible.  Then thanks to
+	 * incremental sort we would get this sort as cheap as possible.
+	 */
+	if (path->pathkeys)
+	{
+		int			n;
+
+		n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses,
+										   root->num_groupby_pathkeys);
+
+		if (n > 0 &&
+			(enable_incremental_sort || n == list_length(path->pathkeys)))
+		{
+			info = makeNode(PathKeyInfo);
+			info->pathkeys = pathkeys;
+			info->clauses = clauses;
+
+			infos = lappend(infos, info);
+		}
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause).
+	 */
+	if (root->sort_pathkeys)
+	{
+		int			n;
+
+		n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
+										   &clauses,
+										   root->num_groupby_pathkeys);
+
+		if (n > 0 &&
+			(enable_incremental_sort || n == list_length(path->pathkeys)))
+		{
+			info = makeNode(PathKeyInfo);
+			info->pathkeys = pathkeys;
+			info->clauses = clauses;
+
+			infos = lappend(infos, info);
+		}
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1939,6 +2106,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the ordering. The
+ * ordering may not be "complete" and may require incremental sort, but that's
+ * fine. So we simply count prefix pathkeys with a matching group key, and
+ * stop once we find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordering not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell   *key;
+	int			n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1953,6 +2168,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1988,6 +2206,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 014b179c3f0..239b799a6b2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2805,8 +2805,9 @@ remove_useless_groupby_columns(PlannerInfo *root)
  *
  * In principle it might be interesting to consider other orderings of the
  * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost.  We don't
- * bother with that, though.  Hashed grouping will frequently win anyway.
+ * possible plans (eg an indexscan) and thereby reduce cost.  However, we
+ * don't yet have sufficient information to do that here, so that's left until
+ * later in planning.  See get_useful_group_keys_orderings().
  *
  * Note: we need no comparable processing of the distinctClause because
  * the parser already enforced that that matches ORDER BY.
@@ -6891,103 +6892,135 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 grouped_rel,
-									 path,
-									 cheapest_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_agg_path(root,
-										 grouped_rel,
-										 path,
-										 grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_SIMPLE,
-										 root->processed_groupClause,
-										 havingQual,
-										 agg_costs,
-										 dNumGroups));
-			}
-			else if (parse->groupClause)
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_group_path(root,
-										   grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   havingQual,
-										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
-			}
-		}
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-		/*
-		 * Instead of operating directly on the input relation, we can
-		 * consider finalizing a partially aggregated path.
-		 */
-		if (partially_grouped_rel != NULL)
-		{
-			foreach(lc, partially_grouped_rel->pathlist)
-			{
-				Path	   *path = (Path *) lfirst(lc);
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
 				path = make_ordered_path(root,
 										 grouped_rel,
 										 path,
-										 partially_grouped_rel->cheapest_total_path,
-										 root->group_pathkeys);
-
+										 cheapest_path,
+										 info->pathkeys);
 				if (path == NULL)
 					continue;
 
-				if (parse->hasAggs)
+				/* Now decide what to stick atop it */
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash,
+												gd, agg_costs, dNumGroups);
+				}
+				else if (parse->hasAggs)
+				{
+					/*
+					 * We have aggregation, possibly with plain GROUP BY. Make
+					 * an AggPath.
+					 */
 					add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 root->processed_groupClause,
+											 AGGSPLIT_SIMPLE,
+											 info->clauses,
 											 havingQual,
-											 agg_final_costs,
+											 agg_costs,
 											 dNumGroups));
-				else
+				}
+				else if (parse->groupClause)
+				{
+					/*
+					 * We have GROUP BY without aggregation or grouping sets.
+					 * Make a GroupPath.
+					 */
 					add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   root->processed_groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
+				}
+				else
+				{
+					/* Other cases should have been handled above */
+					Assert(false);
+				}
+			}
+		}
+
+		/*
+		 * Instead of operating directly on the input relation, we can
+		 * consider finalizing a partially aggregated path.
+		 */
+		if (partially_grouped_rel != NULL)
+		{
+			foreach(lc, partially_grouped_rel->pathlist)
+			{
+				ListCell   *lc2;
+				Path	   *path = (Path *) lfirst(lc);
+				Path	   *path_save = path;
+				List	   *pathkey_orderings = NIL;
+
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root, path);
+
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach(lc2, pathkey_orderings)
+				{
+					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
+					/* restore the path (we replace it in the loop) */
+					path = path_save;
+
+					path = make_ordered_path(root,
+											 grouped_rel,
+											 path,
+											 partially_grouped_rel->cheapest_total_path,
+											 info->pathkeys);
+
+					if (path == NULL)
+						continue;
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+
+				}
 			}
 		}
 	}
@@ -7190,37 +7223,54 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 partially_grouped_rel,
-									 path,
-									 cheapest_total_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (parse->hasAggs)
-				add_path(partially_grouped_rel, (Path *)
-						 create_agg_path(root,
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				path = make_ordered_path(root,
 										 partially_grouped_rel,
 										 path,
-										 partially_grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_INITIAL_SERIAL,
-										 root->processed_groupClause,
-										 NIL,
-										 agg_partial_costs,
-										 dNumPartialGroups));
-			else
-				add_path(partially_grouped_rel, (Path *)
-						 create_group_path(root,
-										   partially_grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   NIL,
-										   dNumPartialGroups));
+										 cheapest_total_path,
+										 info->pathkeys);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_path(partially_grouped_rel, (Path *)
+							 create_agg_path(root,
+											 partially_grouped_rel,
+											 path,
+											 partially_grouped_rel->reltarget,
+											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 AGGSPLIT_INITIAL_SERIAL,
+											 info->clauses,
+											 NIL,
+											 agg_partial_costs,
+											 dNumPartialGroups));
+				else
+					add_path(partially_grouped_rel, (Path *)
+							 create_group_path(root,
+											   partially_grouped_rel,
+											   path,
+											   info->clauses,
+											   NIL,
+											   dNumPartialGroups));
+			}
 		}
 	}
 
@@ -7229,37 +7279,55 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 partially_grouped_rel,
-									 path,
-									 cheapest_partial_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_agg_path(root,
-												 partially_grouped_rel,
-												 path,
-												 partially_grouped_rel->reltarget,
-												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-												 AGGSPLIT_INITIAL_SERIAL,
-												 root->processed_groupClause,
-												 NIL,
-												 agg_partial_costs,
-												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_group_path(root,
-												   partially_grouped_rel,
-												   path,
-												   root->processed_groupClause,
-												   NIL,
-												   dNumPartialPartialGroups));
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				path = make_ordered_path(root,
+										 partially_grouped_rel,
+										 path,
+										 cheapest_partial_path,
+										 info->pathkeys);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_partial_path(partially_grouped_rel, (Path *)
+									 create_agg_path(root,
+													 partially_grouped_rel,
+													 path,
+													 partially_grouped_rel->reltarget,
+													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 AGGSPLIT_INITIAL_SERIAL,
+													 info->clauses,
+													 NIL,
+													 agg_partial_costs,
+													 dNumPartialPartialGroups));
+				else
+					add_partial_path(partially_grouped_rel, (Path *)
+									 create_group_path(root,
+													   partially_grouped_rel,
+													   path,
+													   info->clauses,
+													   NIL,
+													   dNumPartialPartialGroups));
+			}
 		}
 	}
 
@@ -7373,6 +7441,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	 * We can also skip the entire loop when we only have a single-item
 	 * group_pathkeys because then we can't possibly have a presorted prefix
 	 * of the list without having the list be fully sorted.
+	 *
+	 * XXX Shouldn't this also consider the group-key-reordering?
 	 */
 	if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
 		return;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index e53ebc6dc2b..7fe58518d7d 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1050,6 +1050,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables reordering of GROUP BY keys."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 835b0e9ba89..da10b43dac3 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -399,6 +399,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_group_by_reordering = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index b9713ec9aa6..137da178dc7 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1456,6 +1456,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines the information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index efd4abc28f3..0e8a9c94ba6 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -204,6 +205,7 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, Path *path);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f635c5a1afb..0d46e096e5a 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2728,6 +2728,160 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+                  QUERY PLAN                  
+----------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Incremental Sort
+         Sort Key: btg.x, btg.y, btg.w, btg.z
+         Presorted Key: btg.x, btg.y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+                     QUERY PLAN                     
+----------------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Limit
+         ->  Incremental Sort
+               Sort Key: btg.x, btg.y, btg.w, btg.z
+               Presorted Key: btg.x, btg.y
+               ->  Index Scan using abc on btg
+(7 rows)
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+          QUERY PLAN          
+------------------------------
+ Group
+   Group Key: y, x, z, w
+   ->  Sort
+         Sort Key: y, x, z, w
+         ->  Seq Scan on btg
+(5 rows)
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Sort
+   Sort Key: ((x * x)), z
+   ->  Group
+         Group Key: x, y, w, z
+         ->  Incremental Sort
+               Sort Key: x, y, w, z
+               Presorted Key: x, y
+               ->  Index Scan using abc on btg
+(8 rows)
+
+DROP TABLE btg;
+-- The case, when scanning sort order correspond to aggregate sort order but
+-- can not be found in the group-by list
+CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int);
+CREATE UNIQUE INDEX ON t1(c2);
+explain (costs off)
+SELECT array_agg(c1 ORDER BY c2),c2
+FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Sort
+   Sort Key: c2
+   ->  GroupAggregate
+         Group Key: c1
+         ->  Sort
+               Sort Key: c1, c2
+               ->  Bitmap Heap Scan on t1
+                     Recheck Cond: (c2 < 100)
+                     ->  Bitmap Index Scan on t1_c2_idx
+                           Index Cond: (c2 < 100)
+(10 rows)
+
+DROP TABLE t1 CASCADE;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 271313ebf86..9be7aca2b8a 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -133,7 +134,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(22 rows)
+(23 rows)
 
 -- There are always wait event descriptions for various types.
 select type, count(*) > 0 as ok FROM pg_wait_events
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index cc8f0efad55..f99167ac9ea 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1181,6 +1181,62 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+
+DROP TABLE btg;
+
+-- The case, when scanning sort order correspond to aggregate sort order but
+-- can not be found in the group-by list
+CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int);
+CREATE UNIQUE INDEX ON t1(c2);
+explain (costs off)
+SELECT array_agg(c1 ORDER BY c2),c2
+FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
+DROP TABLE t1 CASCADE;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index f582eb59e7d..e575fa13c63 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -4041,3 +4041,4 @@ manifest_writer
 rfile
 ws_options
 ws_file_info
+PathKeyInfo
-- 
2.39.3 (Apple Git-145)

#116

Andrei Lepikhov

a.lepikhov@postgrespro.ru

almost 2 years ago

In reply to: Alexander Korotkov (#115)

1 attachment(s)

Re: POC: GROUP BY optimization

On 13/1/2024 22:00, Alexander Korotkov wrote:

On Sat, Jan 13, 2024 at 11:09 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

On 11/1/2024 18:30, Alexander Korotkov wrote:

On Tue, Jan 9, 2024 at 1:14 PM Pavel Borisov <pashkin.elfe@gmail.com> wrote:

Hmm, I don't see this old code in these patches. Resend 0002-* because
of trailing spaces.

AFAIK, cfbot does not seek old versions of patchset parts in previous messages. So for it to run correctly, a new version of the whole patchset should be sent even if most patches are unchanged.

Please, find the revised patchset with some refactoring and comments
improvement from me. I'll continue looking into this.

The patch looks better, thanks to your refactoring.
I propose additional comments and tests to make the code more
understandable (see attachment).
I intended to look into this part of the code more, but the tests show a
difference between PostgreSQL v.15 and v.16, which causes changes in the
code of this feature.

Makes sense. I've incorporated your changes into the patchset.

One more improvement. To underpin code change:

-               return cur_ec;  /* Match! */
+           {
+               /*
+                * Match!
+                *
+                * Copy the sortref if it wasn't set yet. That may happen if
+                * the ec was constructed from WHERE clause, i.e. it doesn't
+                * have a target reference at all.
+                */
+               if (cur_ec->ec_sortref == 0 && sortref > 0)
+                   cur_ec->ec_sortref = sortref;
+               return cur_ec;
+           }

I propose the test (see attachment). It shows why we introduce this
change: GROUP-BY should juggle not only pathkeys generated by explicit
sort operators but also planner-made, likewise in this example, by
MergeJoin.

--
regards,
Andrei Lepikhov
Postgres Professional

Attachments:

details-2.txttext/plain; charset=UTF-8; name=details-2.txtDownload

diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 0d46e096e5..ca38e78f21 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2879,6 +2879,37 @@ FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
 (10 rows)
 
 DROP TABLE t1 CASCADE;
+-- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
+-- by planner itself. For example, by MergeJoin.
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+explain (COSTS OFF)
+SELECT c1.relname,c1.relpages
+FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
+GROUP BY c1.reltuples,c1.relpages,c1.relname
+ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
+                                         QUERY PLAN                                          
+---------------------------------------------------------------------------------------------
+ Incremental Sort
+   Sort Key: c1.relpages, c1.relname, ((c1.relpages * c1.relpages))
+   Presorted Key: c1.relpages, c1.relname
+   ->  Group
+         Group Key: c1.relpages, c1.relname, c1.reltuples
+         ->  Incremental Sort
+               Sort Key: c1.relpages, c1.relname, c1.reltuples
+               Presorted Key: c1.relpages, c1.relname
+               ->  Merge Join
+                     Merge Cond: ((c1.relpages = c2.relpages) AND (c1.relname = c2.relname))
+                     ->  Sort
+                           Sort Key: c1.relpages, c1.relname
+                           ->  Seq Scan on pg_class c1
+                     ->  Sort
+                           Sort Key: c2.relpages, c2.relname
+                           ->  Seq Scan on pg_class c2
+(16 rows)
+
+RESET enable_hashjoin;
+RESET enable_nestloop;
 RESET enable_hashagg;
 RESET max_parallel_workers;
 RESET max_parallel_workers_per_gather;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index f99167ac9e..cf87b5d5dd 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1233,6 +1233,18 @@ SELECT array_agg(c1 ORDER BY c2),c2
 FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
 DROP TABLE t1 CASCADE;
 
+-- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
+-- by planner itself. For example, by MergeJoin.
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+explain (COSTS OFF)
+SELECT c1.relname,c1.relpages
+FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
+GROUP BY c1.reltuples,c1.relpages,c1.relname
+ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
+RESET enable_hashjoin;
+RESET enable_nestloop;
+
 RESET enable_hashagg;
 RESET max_parallel_workers;
 RESET max_parallel_workers_per_gather;

#117

Alexander Korotkov

aekorotkov@gmail.com

almost 2 years ago

In reply to: Andrei Lepikhov (#116)

2 attachment(s)

Re: POC: GROUP BY optimization

On Sun, Jan 14, 2024 at 2:14 PM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

On 13/1/2024 22:00, Alexander Korotkov wrote:

On Sat, Jan 13, 2024 at 11:09 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

On 11/1/2024 18:30, Alexander Korotkov wrote:

On Tue, Jan 9, 2024 at 1:14 PM Pavel Borisov <pashkin.elfe@gmail.com> wrote:

Hmm, I don't see this old code in these patches. Resend 0002-* because
of trailing spaces.

AFAIK, cfbot does not seek old versions of patchset parts in previous messages. So for it to run correctly, a new version of the whole patchset should be sent even if most patches are unchanged.

Please, find the revised patchset with some refactoring and comments
improvement from me. I'll continue looking into this.

The patch looks better, thanks to your refactoring.
I propose additional comments and tests to make the code more
understandable (see attachment).
I intended to look into this part of the code more, but the tests show a
difference between PostgreSQL v.15 and v.16, which causes changes in the
code of this feature.

Makes sense. I've incorporated your changes into the patchset.

One more improvement. To underpin code change:
-               return cur_ec;  /* Match! */
+           {
+               /*
+                * Match!
+                *
+                * Copy the sortref if it wasn't set yet. That may happen if
+                * the ec was constructed from WHERE clause, i.e. it doesn't
+                * have a target reference at all.
+                */
+               if (cur_ec->ec_sortref == 0 && sortref > 0)
+                   cur_ec->ec_sortref = sortref;
+               return cur_ec;
+           }
I propose the test (see attachment). It shows why we introduce this
change: GROUP-BY should juggle not only pathkeys generated by explicit
sort operators but also planner-made, likewise in this example, by
MergeJoin.

Thank you for providing the test case relevant for this code change.
The revised patch incorporating this change is attached. Now the
patchset looks good to me. I'm going to push it if there are no
objections.

------
Regards,
Alexander Korotkov

Attachments:

0002-Explore-alternative-orderings-of-group-by-p-20240115.patchapplication/octet-stream; name=0002-Explore-alternative-orderings-of-group-by-p-20240115.patchDownload

From 3ee8c4b602285882af116df6e35f76d4b1972433 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 11 Jan 2024 13:26:49 +0200
Subject: [PATCH 2/2] Explore alternative orderings of group-by pathkeys during
 optimization.

When evaluating a query with a multi-column GROUP BY clause, we can minimize
sort operations or avoid them if we synchronize the order of GROUP BY clauses
with the ORDER BY sort clause or sort order, which comes from the underlying
query tree. Grouping does not imply any ordering, so we can compare
the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg,
we simply compared keys in the order specified in the query. This commit
explores alternative ordering of the keys, trying to find a cheaper one.

The ordering of group keys may interact with other parts of the query, some of
which may not be known while planning the grouping. For example, there may be
an explicit ORDER BY clause or some other ordering-dependent operation higher up
in the query, and using the same ordering may allow using either incremental
sort or even eliminating the sort entirely.

The patch always keeps the ordering specified in the query, assuming the user
might have additional insights.

This introduces a new GUC enable_group_by_reordering so that the optimization
may be disabled if needed.

Discussion: https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ru
Author: Andrey V. Lepikhov, Teodor Sigaev
Reviewed-by: Tomas Vondra, Claudio Freire, Gavin Flower, Dmitry Dolgov
Reviewed-by: Robert Haas, Pavel Borisov, David Rowley, Zhihong Yu
Reviewed-by: Tom Lane, Alexander Korotkov
---
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 220 ++++++++++++
 src/backend/optimizer/plan/planner.c          | 316 +++++++++++-------
 src/backend/utils/misc/guc_tables.c           |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/paths.h                 |   2 +
 src/test/regress/expected/aggregates.out      | 185 ++++++++++
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/sql/aggregates.sql           |  68 ++++
 src/tools/pgindent/typedefs.list              |   1 +
 11 files changed, 704 insertions(+), 125 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index e86dfeaecd4..4bd60a09c69 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy the sortref if it wasn't set yet.  That may happen if
+				 * the ec was constructed from a WHERE clause, i.e. it doesn't
+				 * have a target reference at all.
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ca94a31f71e..5aac6d66776 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -22,12 +22,15 @@
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool		enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -350,6 +353,170 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match the input pathkeys.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+static int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses,
+							   int num_groupby_pathkeys)
+{
+	List	   *new_group_pathkeys = NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append it to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find the first pathkey without a matching GROUP BY key, the
+	 * rest of the pathkeys are useless and can't be used to evaluate the
+	 * grouping, so we abort the loop and ignore the remaining pathkeys.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause *sgc;
+
+		/*
+		 * Pathkeys are built in a way that allows simply comparing pointers.
+		 * Give up if we can't find the matching pointer.
+		 */
+		if (foreach_current_index(lc) >= num_groupby_pathkeys ||
+			!list_member_ptr(*group_pathkeys, pathkey))
+			break;
+
+		/*
+		 * Since 1349d27 pathkey coming from underlying node can be in the
+		 * root->group_pathkeys but not in the processed_groupClause. So, we
+		 * should be careful here.
+		 */
+		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+											*group_clauses);
+		if (!sgc)
+			/* The grouping clause does not cover this pathkey */
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns a list of PathKeyInfo items, each representing an interesting
+ * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
+ * matching order.
+ *
+ * The function considers (and keeps) multiple GROUP BY orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause,
+ * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible).
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+
+	List	   *pathkeys = root->group_pathkeys;
+	List	   *clauses = root->processed_groupClause;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+	infos = lappend(infos, info);
+
+	/*
+	 * Should we try generating alternative orderings of the group keys? If
+	 * not, we produce only the order specified in the query, i.e. the
+	 * optimization is effectively disabled.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
+	/*
+	 * Grouping sets have own and more complex logic to decide the ordering.
+	 */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to
+	 * match the path as much of the ordering as possible.  Then thanks to
+	 * incremental sort we would get this sort as cheap as possible.
+	 */
+	if (path->pathkeys)
+	{
+		int			n;
+
+		n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses,
+										   root->num_groupby_pathkeys);
+
+		if (n > 0 &&
+			(enable_incremental_sort || n == list_length(path->pathkeys)))
+		{
+			info = makeNode(PathKeyInfo);
+			info->pathkeys = pathkeys;
+			info->clauses = clauses;
+
+			infos = lappend(infos, info);
+		}
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause).
+	 */
+	if (root->sort_pathkeys)
+	{
+		int			n;
+
+		n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
+										   &clauses,
+										   root->num_groupby_pathkeys);
+
+		if (n > 0 &&
+			(enable_incremental_sort || n == list_length(path->pathkeys)))
+		{
+			info = makeNode(PathKeyInfo);
+			info->pathkeys = pathkeys;
+			info->clauses = clauses;
+
+			infos = lappend(infos, info);
+		}
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1939,6 +2106,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the ordering. The
+ * ordering may not be "complete" and may require incremental sort, but that's
+ * fine. So we simply count prefix pathkeys with a matching group key, and
+ * stop once we find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordering not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell   *key;
+	int			n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1953,6 +2168,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1988,6 +2206,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 014b179c3f0..239b799a6b2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2805,8 +2805,9 @@ remove_useless_groupby_columns(PlannerInfo *root)
  *
  * In principle it might be interesting to consider other orderings of the
  * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost.  We don't
- * bother with that, though.  Hashed grouping will frequently win anyway.
+ * possible plans (eg an indexscan) and thereby reduce cost.  However, we
+ * don't yet have sufficient information to do that here, so that's left until
+ * later in planning.  See get_useful_group_keys_orderings().
  *
  * Note: we need no comparable processing of the distinctClause because
  * the parser already enforced that that matches ORDER BY.
@@ -6891,103 +6892,135 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 grouped_rel,
-									 path,
-									 cheapest_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_agg_path(root,
-										 grouped_rel,
-										 path,
-										 grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_SIMPLE,
-										 root->processed_groupClause,
-										 havingQual,
-										 agg_costs,
-										 dNumGroups));
-			}
-			else if (parse->groupClause)
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_group_path(root,
-										   grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   havingQual,
-										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
-			}
-		}
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-		/*
-		 * Instead of operating directly on the input relation, we can
-		 * consider finalizing a partially aggregated path.
-		 */
-		if (partially_grouped_rel != NULL)
-		{
-			foreach(lc, partially_grouped_rel->pathlist)
-			{
-				Path	   *path = (Path *) lfirst(lc);
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
 				path = make_ordered_path(root,
 										 grouped_rel,
 										 path,
-										 partially_grouped_rel->cheapest_total_path,
-										 root->group_pathkeys);
-
+										 cheapest_path,
+										 info->pathkeys);
 				if (path == NULL)
 					continue;
 
-				if (parse->hasAggs)
+				/* Now decide what to stick atop it */
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash,
+												gd, agg_costs, dNumGroups);
+				}
+				else if (parse->hasAggs)
+				{
+					/*
+					 * We have aggregation, possibly with plain GROUP BY. Make
+					 * an AggPath.
+					 */
 					add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 root->processed_groupClause,
+											 AGGSPLIT_SIMPLE,
+											 info->clauses,
 											 havingQual,
-											 agg_final_costs,
+											 agg_costs,
 											 dNumGroups));
-				else
+				}
+				else if (parse->groupClause)
+				{
+					/*
+					 * We have GROUP BY without aggregation or grouping sets.
+					 * Make a GroupPath.
+					 */
 					add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   root->processed_groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
+				}
+				else
+				{
+					/* Other cases should have been handled above */
+					Assert(false);
+				}
+			}
+		}
+
+		/*
+		 * Instead of operating directly on the input relation, we can
+		 * consider finalizing a partially aggregated path.
+		 */
+		if (partially_grouped_rel != NULL)
+		{
+			foreach(lc, partially_grouped_rel->pathlist)
+			{
+				ListCell   *lc2;
+				Path	   *path = (Path *) lfirst(lc);
+				Path	   *path_save = path;
+				List	   *pathkey_orderings = NIL;
+
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root, path);
+
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach(lc2, pathkey_orderings)
+				{
+					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
+					/* restore the path (we replace it in the loop) */
+					path = path_save;
+
+					path = make_ordered_path(root,
+											 grouped_rel,
+											 path,
+											 partially_grouped_rel->cheapest_total_path,
+											 info->pathkeys);
+
+					if (path == NULL)
+						continue;
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+
+				}
 			}
 		}
 	}
@@ -7190,37 +7223,54 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 partially_grouped_rel,
-									 path,
-									 cheapest_total_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (parse->hasAggs)
-				add_path(partially_grouped_rel, (Path *)
-						 create_agg_path(root,
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				path = make_ordered_path(root,
 										 partially_grouped_rel,
 										 path,
-										 partially_grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_INITIAL_SERIAL,
-										 root->processed_groupClause,
-										 NIL,
-										 agg_partial_costs,
-										 dNumPartialGroups));
-			else
-				add_path(partially_grouped_rel, (Path *)
-						 create_group_path(root,
-										   partially_grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   NIL,
-										   dNumPartialGroups));
+										 cheapest_total_path,
+										 info->pathkeys);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_path(partially_grouped_rel, (Path *)
+							 create_agg_path(root,
+											 partially_grouped_rel,
+											 path,
+											 partially_grouped_rel->reltarget,
+											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 AGGSPLIT_INITIAL_SERIAL,
+											 info->clauses,
+											 NIL,
+											 agg_partial_costs,
+											 dNumPartialGroups));
+				else
+					add_path(partially_grouped_rel, (Path *)
+							 create_group_path(root,
+											   partially_grouped_rel,
+											   path,
+											   info->clauses,
+											   NIL,
+											   dNumPartialGroups));
+			}
 		}
 	}
 
@@ -7229,37 +7279,55 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 partially_grouped_rel,
-									 path,
-									 cheapest_partial_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_agg_path(root,
-												 partially_grouped_rel,
-												 path,
-												 partially_grouped_rel->reltarget,
-												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-												 AGGSPLIT_INITIAL_SERIAL,
-												 root->processed_groupClause,
-												 NIL,
-												 agg_partial_costs,
-												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_group_path(root,
-												   partially_grouped_rel,
-												   path,
-												   root->processed_groupClause,
-												   NIL,
-												   dNumPartialPartialGroups));
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				path = make_ordered_path(root,
+										 partially_grouped_rel,
+										 path,
+										 cheapest_partial_path,
+										 info->pathkeys);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_partial_path(partially_grouped_rel, (Path *)
+									 create_agg_path(root,
+													 partially_grouped_rel,
+													 path,
+													 partially_grouped_rel->reltarget,
+													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 AGGSPLIT_INITIAL_SERIAL,
+													 info->clauses,
+													 NIL,
+													 agg_partial_costs,
+													 dNumPartialPartialGroups));
+				else
+					add_partial_path(partially_grouped_rel, (Path *)
+									 create_group_path(root,
+													   partially_grouped_rel,
+													   path,
+													   info->clauses,
+													   NIL,
+													   dNumPartialPartialGroups));
+			}
 		}
 	}
 
@@ -7373,6 +7441,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	 * We can also skip the entire loop when we only have a single-item
 	 * group_pathkeys because then we can't possibly have a presorted prefix
 	 * of the list without having the list be fully sorted.
+	 *
+	 * XXX Shouldn't this also consider the group-key-reordering?
 	 */
 	if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
 		return;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index e53ebc6dc2b..7fe58518d7d 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1050,6 +1050,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables reordering of GROUP BY keys."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 835b0e9ba89..da10b43dac3 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -399,6 +399,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_group_by_reordering = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index b9713ec9aa6..137da178dc7 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1456,6 +1456,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines the information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index efd4abc28f3..0e8a9c94ba6 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -204,6 +205,7 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, Path *path);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f635c5a1afb..ca38e78f211 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2728,6 +2728,191 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+                  QUERY PLAN                  
+----------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Incremental Sort
+         Sort Key: btg.x, btg.y, btg.w, btg.z
+         Presorted Key: btg.x, btg.y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+                     QUERY PLAN                     
+----------------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Limit
+         ->  Incremental Sort
+               Sort Key: btg.x, btg.y, btg.w, btg.z
+               Presorted Key: btg.x, btg.y
+               ->  Index Scan using abc on btg
+(7 rows)
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+          QUERY PLAN          
+------------------------------
+ Group
+   Group Key: y, x, z, w
+   ->  Sort
+         Sort Key: y, x, z, w
+         ->  Seq Scan on btg
+(5 rows)
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Sort
+   Sort Key: ((x * x)), z
+   ->  Group
+         Group Key: x, y, w, z
+         ->  Incremental Sort
+               Sort Key: x, y, w, z
+               Presorted Key: x, y
+               ->  Index Scan using abc on btg
+(8 rows)
+
+DROP TABLE btg;
+-- The case, when scanning sort order correspond to aggregate sort order but
+-- can not be found in the group-by list
+CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int);
+CREATE UNIQUE INDEX ON t1(c2);
+explain (costs off)
+SELECT array_agg(c1 ORDER BY c2),c2
+FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Sort
+   Sort Key: c2
+   ->  GroupAggregate
+         Group Key: c1
+         ->  Sort
+               Sort Key: c1, c2
+               ->  Bitmap Heap Scan on t1
+                     Recheck Cond: (c2 < 100)
+                     ->  Bitmap Index Scan on t1_c2_idx
+                           Index Cond: (c2 < 100)
+(10 rows)
+
+DROP TABLE t1 CASCADE;
+-- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
+-- by planner itself. For example, by MergeJoin.
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+explain (COSTS OFF)
+SELECT c1.relname,c1.relpages
+FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
+GROUP BY c1.reltuples,c1.relpages,c1.relname
+ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
+                                         QUERY PLAN                                          
+---------------------------------------------------------------------------------------------
+ Incremental Sort
+   Sort Key: c1.relpages, c1.relname, ((c1.relpages * c1.relpages))
+   Presorted Key: c1.relpages, c1.relname
+   ->  Group
+         Group Key: c1.relpages, c1.relname, c1.reltuples
+         ->  Incremental Sort
+               Sort Key: c1.relpages, c1.relname, c1.reltuples
+               Presorted Key: c1.relpages, c1.relname
+               ->  Merge Join
+                     Merge Cond: ((c1.relpages = c2.relpages) AND (c1.relname = c2.relname))
+                     ->  Sort
+                           Sort Key: c1.relpages, c1.relname
+                           ->  Seq Scan on pg_class c1
+                     ->  Sort
+                           Sort Key: c2.relpages, c2.relname
+                           ->  Seq Scan on pg_class c2
+(16 rows)
+
+RESET enable_hashjoin;
+RESET enable_nestloop;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 271313ebf86..9be7aca2b8a 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -133,7 +134,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(22 rows)
+(23 rows)
 
 -- There are always wait event descriptions for various types.
 select type, count(*) > 0 as ok FROM pg_wait_events
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index cc8f0efad55..cf87b5d5ddd 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1181,6 +1181,74 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+
+DROP TABLE btg;
+
+-- The case, when scanning sort order correspond to aggregate sort order but
+-- can not be found in the group-by list
+CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int);
+CREATE UNIQUE INDEX ON t1(c2);
+explain (costs off)
+SELECT array_agg(c1 ORDER BY c2),c2
+FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
+DROP TABLE t1 CASCADE;
+
+-- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
+-- by planner itself. For example, by MergeJoin.
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+explain (COSTS OFF)
+SELECT c1.relname,c1.relpages
+FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
+GROUP BY c1.reltuples,c1.relpages,c1.relname
+ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
+RESET enable_hashjoin;
+RESET enable_nestloop;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index f582eb59e7d..e575fa13c63 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -4041,3 +4041,4 @@ manifest_writer
 rfile
 ws_options
 ws_file_info
+PathKeyInfo
-- 
2.39.3 (Apple Git-145)

0001-Generalize-common-code-of-adding-sort-befor-20240115.patchapplication/octet-stream; name=0001-Generalize-common-code-of-adding-sort-befor-20240115.patchDownload

From 2167ada5fc9e2d07b1f20cd840bea409e0e594c9 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 11 Jan 2024 13:27:58 +0200
Subject: [PATCH 1/2] Generalize common code of adding sort before generation
 of grouping paths

Extract the repetitive code pattern into a new function make_ordered_path().

Discussion: https://postgr.es/m/CAPpHfdtzaVa7S4onKy3YvttF2rrH5hQNHx9HtcSTLbpjx%2BMJ%2Bw%40mail.gmail.com
Author: Andrei Lepikhov
---
 src/backend/optimizer/plan/planner.c | 228 ++++++++++-----------------
 1 file changed, 80 insertions(+), 148 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 667723b6753..014b179c3f0 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6809,6 +6809,58 @@ done:
 	return parallel_workers;
 }
 
+/*
+ * make_ordered_path
+ *		Return a path ordered by 'pathkeys' based on the given 'path'.  May
+ *		return NULL if it doesn't make sense to generate an ordered path in
+ *		this case.
+ */
+static Path *
+make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path,
+				  Path *cheapest_path, List *pathkeys)
+{
+	bool		is_sorted;
+	int			presorted_keys;
+
+	is_sorted = pathkeys_count_contained_in(pathkeys,
+											path->pathkeys,
+											&presorted_keys);
+
+	if (!is_sorted)
+	{
+		/*
+		 * Try at least sorting the cheapest path and also try incrementally
+		 * sorting any path which is partially sorted already (no need to deal
+		 * with paths which have presorted keys when incremental sort is
+		 * disabled unless it's the cheapest input path).
+		 */
+		if (path != cheapest_path &&
+			(presorted_keys == 0 || !enable_incremental_sort))
+			return NULL;
+
+		/*
+		 * We've no need to consider both a sort and incremental sort. We'll
+		 * just do a sort if there are no presorted keys and an incremental
+		 * sort when there are presorted keys.
+		 */
+		if (presorted_keys == 0 || !enable_incremental_sort)
+			path = (Path *) create_sort_path(root,
+											 rel,
+											 path,
+											 pathkeys,
+											 -1.0);
+		else
+			path = (Path *) create_incremental_sort_path(root,
+														 rel,
+														 path,
+														 pathkeys,
+														 presorted_keys,
+														 -1.0);
+	}
+
+	return path;
+}
+
 /*
  * add_paths_to_grouping_rel
  *
@@ -6840,45 +6892,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		foreach(lc, input_rel->pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 grouped_rel,
+									 path,
+									 cheapest_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			/* Now decide what to stick atop it */
 			if (parse->groupingSets)
@@ -6935,46 +6957,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
-				bool		is_sorted;
-				int			presorted_keys;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
-
-				if (!is_sorted)
-				{
-					/*
-					 * Try at least sorting the cheapest path and also try
-					 * incrementally sorting any path which is partially
-					 * sorted already (no need to deal with paths which have
-					 * presorted keys when incremental sort is disabled unless
-					 * it's the cheapest input path).
-					 */
-					if (path != partially_grouped_rel->cheapest_total_path &&
-						(presorted_keys == 0 || !enable_incremental_sort))
-						continue;
+				path = make_ordered_path(root,
+										 grouped_rel,
+										 path,
+										 partially_grouped_rel->cheapest_total_path,
+										 root->group_pathkeys);
 
-					/*
-					 * We've no need to consider both a sort and incremental
-					 * sort.  We'll just do a sort if there are no pre-sorted
-					 * keys and an incremental sort when there are presorted
-					 * keys.
-					 */
-					if (presorted_keys == 0 || !enable_incremental_sort)
-						path = (Path *) create_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 -1.0);
-					else
-						path = (Path *) create_incremental_sort_path(root,
-																	 grouped_rel,
-																	 path,
-																	 root->group_pathkeys,
-																	 presorted_keys,
-																	 -1.0);
-				}
+				if (path == NULL)
+					continue;
 
 				if (parse->hasAggs)
 					add_path(grouped_rel, (Path *)
@@ -7200,44 +7191,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		foreach(lc, input_rel->pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_total_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 partially_grouped_rel,
+									 path,
+									 cheapest_total_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			if (parse->hasAggs)
 				add_path(partially_grouped_rel, (Path *)
@@ -7268,45 +7230,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		foreach(lc, input_rel->partial_pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
-
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
 
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_partial_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 partially_grouped_rel,
+									 path,
+									 cheapest_partial_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			if (parse->hasAggs)
 				add_partial_path(partially_grouped_rel, (Path *)
-- 
2.39.3 (Apple Git-145)

#118

Richard Guo

guofenglinux@gmail.com

almost 2 years ago

In reply to: Alexander Korotkov (#117)

Re: POC: GROUP BY optimization

On Mon, Jan 15, 2024 at 8:20 AM Alexander Korotkov <aekorotkov@gmail.com>
wrote:

Thank you for providing the test case relevant for this code change.
The revised patch incorporating this change is attached. Now the
patchset looks good to me. I'm going to push it if there are no
objections.

Seems I'm late for the party. Can we hold for several more days? I'd
like to have a review on this patch.

Thanks
Richard

#119

Alexander Korotkov

aekorotkov@gmail.com

almost 2 years ago

In reply to: Richard Guo (#118)

Re: POC: GROUP BY optimization

On Mon, Jan 15, 2024 at 8:42 AM Richard Guo <guofenglinux@gmail.com> wrote:

On Mon, Jan 15, 2024 at 8:20 AM Alexander Korotkov <aekorotkov@gmail.com> wrote:

Thank you for providing the test case relevant for this code change.
The revised patch incorporating this change is attached. Now the
patchset looks good to me. I'm going to push it if there are no
objections.

Seems I'm late for the party. Can we hold for several more days? I'd
like to have a review on this patch.

Sure, NP. I'll hold it till your review.

------
Regards,
Alexander Korotkov

#120

Andrei Lepikhov

a.lepikhov@postgrespro.ru

almost 2 years ago

In reply to: Richard Guo (#118)

Re: POC: GROUP BY optimization

On 15/1/2024 13:42, Richard Guo wrote:

On Mon, Jan 15, 2024 at 8:20 AM Alexander Korotkov <aekorotkov@gmail.com
<mailto:aekorotkov@gmail.com>> wrote:

Thank you for providing the test case relevant for this code change.
The revised patch incorporating this change is attached. Now the
patchset looks good to me. I'm going to push it if there are no
objections.

Seems I'm late for the party. Can we hold for several more days? I'd
like to have a review on this patch.

Get on board! It looks like this feature needs as much review as
possible (likewise SJE).

--
regards,
Andrei Lepikhov
Postgres Professional

#121

Alena Rybakina

a.rybakina@postgrespro.ru

almost 2 years ago

In reply to: Andrei Lepikhov (#120)

1 attachment(s)

Re: POC: GROUP BY optimization

On 15.01.2024 12:46, Andrei Lepikhov wrote:

On 15/1/2024 13:42, Richard Guo wrote:

On Mon, Jan 15, 2024 at 8:20 AM Alexander Korotkov
<aekorotkov@gmail.com <mailto:aekorotkov@gmail.com>> wrote:

    Thank you for providing the test case relevant for this code change.
    The revised patch incorporating this change is attached. Now the
    patchset looks good to me. I'm going to push it if there are no
    objections.

Seems I'm late for the party. Can we hold for several more days? I'd
like to have a review on this patch.

Get on board! It looks like this feature needs as much review as
possible (likewise SJE).

Hi! Thank you for your work on this issue! I believe that this will help
the scheduler to make a more optimal query plan here and therefore speed
up their execution.

I have reviewed patches and noticed that we can add some code
refactoring. I have attached a diff file (group_by.diff) to this email.

The changes involve spelling corrections, renaming variables and porting
some common parts.

In addition, I have a few questions, since some points in the code
remained unclear to me.

1. I didn't understand why we have a question in the comment next to
the enable_group_by_reordering variable in
src/backend/optimizer/path/pathkeys.c file, I assumed it was spelling
and fixed it in the diff file.

2. Why do we set the variable (path = path_save) here
(add_paths_to_grouping_rel function) if we change its variable below and
we can pass path_save as a parameter?

foreach(lc2, pathkey_orderings)
{
PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);

/* restore the path (we replace it in the loop) */
path = path_save;

   path = make_ordered_path(root,
                            grouped_rel,
                            path,
                            cheapest_path,
                            info->pathkeys);
   if (path == NULL)
        continue;

--
Regards,
Alena Rybakina
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Attachments:

group_by.difftext/x-patch; charset=UTF-8; name=group_by.diffDownload

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 5aac6d66776..8be58fa2b0e 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -29,7 +29,7 @@
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 
-/* Consider reordering of GROUP BY keys? */
+/* Consider reordering of GROUP BY keys */
 bool		enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
@@ -362,7 +362,7 @@ pathkeys_contained_in(List *keys1, List *keys2)
  *
  * Returns the number of GROUP BY keys with a matching pathkey.
  */
-static int
+static PathKeyInfo *
 group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 							   List **group_clauses,
 							   int num_groupby_pathkeys)
@@ -421,7 +421,16 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	*group_clauses = list_concat_unique_ptr(new_group_clauses,
 											*group_clauses);
 
-	return n;
+	if (n > 0 &&
+		(enable_incremental_sort || n == list_length(*group_pathkeys)))
+	{
+		PathKeyInfo *info = makeNode(PathKeyInfo);
+		info->pathkeys = *group_pathkeys;
+		info->clauses = *group_clauses;
+		return info;
+	}
+
+	return NULL;
 }
 
 /*
@@ -436,7 +445,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
  *
  * - the original ordering, as specified by the GROUP BY clause,
  * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
- * - GROUP BY keys to match target ORDER BY clause (as much as possible).
+ * - GROUP BY keys should match the target ORDER BY clause (as much as possible).
  */
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
@@ -475,20 +484,11 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 	 */
 	if (path->pathkeys)
 	{
-		int			n;
-
-		n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses,
+		info = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses,
 										   root->num_groupby_pathkeys);
 
-		if (n > 0 &&
-			(enable_incremental_sort || n == list_length(path->pathkeys)))
-		{
-			info = makeNode(PathKeyInfo);
-			info->pathkeys = pathkeys;
-			info->clauses = clauses;
-
+		if (info)
 			infos = lappend(infos, info);
-		}
 	}
 
 	/*
@@ -497,21 +497,12 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 	 */
 	if (root->sort_pathkeys)
 	{
-		int			n;
-
-		n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
+		info = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
 										   &clauses,
 										   root->num_groupby_pathkeys);
 
-		if (n > 0 &&
-			(enable_incremental_sort || n == list_length(path->pathkeys)))
-		{
-			info = makeNode(PathKeyInfo);
-			info->pathkeys = pathkeys;
-			info->clauses = clauses;
-
+		if (info)
 			infos = lappend(infos, info);
-		}
 	}
 
 	return infos;
@@ -2163,27 +2154,26 @@ truncate_useless_pathkeys(PlannerInfo *root,
 						  RelOptInfo *rel,
 						  List *pathkeys)
 {
-	int			nuseful;
-	int			nuseful2;
+	int			nuseful_pathkeys;
+	int			nuseful_alternative;
+
+	nuseful_pathkeys = pathkeys_useful_for_merging(root, rel, pathkeys);
 
-	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
-	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
-	if (nuseful2 > nuseful)
-		nuseful = nuseful2;
-	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
-	if (nuseful2 > nuseful)
-		nuseful = nuseful2;
+	if((nuseful_alternative = pathkeys_useful_for_ordering(root, pathkeys)) > nuseful_pathkeys)
+		nuseful_pathkeys = nuseful_alternative;
+	if ((nuseful_alternative = pathkeys_useful_for_grouping(root, pathkeys)) > nuseful_pathkeys)
+		nuseful_pathkeys = nuseful_alternative;
 
 	/*
 	 * Note: not safe to modify input list destructively, but we can avoid
 	 * copying the list if we're not actually going to change it
 	 */
-	if (nuseful == 0)
+	if (nuseful_pathkeys == 0)
 		return NIL;
-	else if (nuseful == list_length(pathkeys))
+	else if (nuseful_pathkeys == list_length(pathkeys))
 		return pathkeys;
 	else
-		return list_copy_head(pathkeys, nuseful);
+		return list_copy_head(pathkeys, nuseful_pathkeys);
 }
 
 /*

#122

Richard Guo

guofenglinux@gmail.com

almost 2 years ago

In reply to: Alexander Korotkov (#119)

Re: POC: GROUP BY optimization

On Mon, Jan 15, 2024 at 3:56 PM Alexander Korotkov <aekorotkov@gmail.com>
wrote:

On Mon, Jan 15, 2024 at 8:42 AM Richard Guo <guofenglinux@gmail.com>
wrote:

On Mon, Jan 15, 2024 at 8:20 AM Alexander Korotkov <aekorotkov@gmail.com>

wrote:

Thank you for providing the test case relevant for this code change.
The revised patch incorporating this change is attached. Now the
patchset looks good to me. I'm going to push it if there are no
objections.

Seems I'm late for the party. Can we hold for several more days? I'd
like to have a review on this patch.

Sure, NP. I'll hold it till your review.

Thanks. Appreciate that.

I have briefly reviewed this patch and here are some comments.

* When trying to match the ordering of GROUP BY to that of ORDER BY in
get_useful_group_keys_orderings, this patch checks against the length of
path->pathkeys. This does not make sense. I guess this is a typo and
the real intention is to check against root->sort_pathkeys.

--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -504,7 +504,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path
*path)
                                           root->num_groupby_pathkeys);

        if (n > 0 &&
-           (enable_incremental_sort || n == list_length(path->pathkeys)))
+           (enable_incremental_sort || n ==
list_length(root->sort_pathkeys)))

However, I think this is also incorrect. When incremental sort is
disabled, it is reasonable to consider reordering the GROUP BY keys only
if the number of matching pathkeys is equal to the length of
root->group_pathkeys i.e. if 'n == list_length(root->group_pathkeys)'.

* When the original ordering of GROUP BY keys matches the path's
pathkeys or ORDER BY keys, get_useful_group_keys_orderings would
generate duplicate PathKeyInfos. Consequently, this duplication would
lead to the creation and addition of identical paths multiple times.
This is not great. Consider the query below:

create table t (a int, b int);
create index on t (a, b);
set enable_hashagg to off;

explain select count(*) from t group by a, b;
QUERY PLAN
----------------------------------------------------------------------------------
GroupAggregate (cost=0.15..97.27 rows=226 width=16)
Group Key: a, b
-> Index Only Scan using t_a_b_idx on t (cost=0.15..78.06 rows=2260
width=8)
(3 rows)

The same path with group keys {a, b} is created and added twice.

* Part of the work performed in this patch overlaps with that of
preprocess_groupclause. They are both trying to adjust the ordering of
the GROUP BY keys to match ORDER BY. I wonder if it would be better to
perform this work only once.

* When reordering the GROUP BY keys, I think the following checks need
some improvements.

+       /*
+        * Since 1349d27 pathkey coming from underlying node can be in the
+        * root->group_pathkeys but not in the processed_groupClause. So, we
+        * should be careful here.
+        */
+       sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+                                           *group_clauses);
+       if (!sgc)
+           /* The grouping clause does not cover this pathkey */
+           break;

I think we need to check or assert 'pathkey->pk_eclass->ec_sortref' is
valid before calling get_sortgroupref_clause_noerr with it. Also, how
can we guarantee that the returned GROUP BY item is sortable? Should we
check that with OidIsValid(sgc->sortop)?

Thanks
Richard

#123

Alexander Korotkov

aekorotkov@gmail.com

almost 2 years ago

In reply to: Richard Guo (#122)

2 attachment(s)

Re: POC: GROUP BY optimization

Hi!

Thank you for your review. The revised patchset is attached.

On Tue, Jan 16, 2024 at 4:48 AM Richard Guo <guofenglinux@gmail.com> wrote:

On Mon, Jan 15, 2024 at 3:56 PM Alexander Korotkov <aekorotkov@gmail.com> wrote:

On Mon, Jan 15, 2024 at 8:42 AM Richard Guo <guofenglinux@gmail.com> wrote:

On Mon, Jan 15, 2024 at 8:20 AM Alexander Korotkov <aekorotkov@gmail.com> wrote:

Thank you for providing the test case relevant for this code change.
The revised patch incorporating this change is attached. Now the
patchset looks good to me. I'm going to push it if there are no
objections.

Seems I'm late for the party. Can we hold for several more days? I'd
like to have a review on this patch.

Sure, NP. I'll hold it till your review.

Thanks. Appreciate that.

I have briefly reviewed this patch and here are some comments.

* When trying to match the ordering of GROUP BY to that of ORDER BY in
get_useful_group_keys_orderings, this patch checks against the length of
path->pathkeys. This does not make sense. I guess this is a typo and
the real intention is to check against root->sort_pathkeys.
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -504,7 +504,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
root->num_groupby_pathkeys);
if (n > 0 &&
-           (enable_incremental_sort || n == list_length(path->pathkeys)))
+           (enable_incremental_sort || n == list_length(root->sort_pathkeys)))
However, I think this is also incorrect. When incremental sort is
disabled, it is reasonable to consider reordering the GROUP BY keys only
if the number of matching pathkeys is equal to the length of
root->group_pathkeys i.e. if 'n == list_length(root->group_pathkeys)'.

Hmm... Why should this be list_length(root->group_pathkeys) while
we're targeting root->sort_pathkeys. I yet changed that to
list_length(root->sort_pathkeys).

* When the original ordering of GROUP BY keys matches the path's
pathkeys or ORDER BY keys, get_useful_group_keys_orderings would
generate duplicate PathKeyInfos. Consequently, this duplication would
lead to the creation and addition of identical paths multiple times.
This is not great. Consider the query below:

create table t (a int, b int);
create index on t (a, b);
set enable_hashagg to off;

explain select count(*) from t group by a, b;
QUERY PLAN
----------------------------------------------------------------------------------
GroupAggregate (cost=0.15..97.27 rows=226 width=16)
Group Key: a, b
-> Index Only Scan using t_a_b_idx on t (cost=0.15..78.06 rows=2260 width=8)
(3 rows)

The same path with group keys {a, b} is created and added twice.

I tried to address that.

* Part of the work performed in this patch overlaps with that of
preprocess_groupclause. They are both trying to adjust the ordering of
the GROUP BY keys to match ORDER BY. I wonder if it would be better to
perform this work only once.

Andrei, could you take a look.

* When reordering the GROUP BY keys, I think the following checks need
some improvements.
+       /*
+        * Since 1349d27 pathkey coming from underlying node can be in the
+        * root->group_pathkeys but not in the processed_groupClause. So, we
+        * should be careful here.
+        */
+       sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+                                           *group_clauses);
+       if (!sgc)
+           /* The grouping clause does not cover this pathkey */
+           break;
I think we need to check or assert 'pathkey->pk_eclass->ec_sortref' is
valid before calling get_sortgroupref_clause_noerr with it. Also, how
can we guarantee that the returned GROUP BY item is sortable? Should we
check that with OidIsValid(sgc->sortop)?

Added.

------
Regards,
Alexander Korotkov

Attachments:

0001-Generalize-common-code-of-adding-sort-befor-20240116.patchapplication/octet-stream; name=0001-Generalize-common-code-of-adding-sort-befor-20240116.patchDownload

From 2167ada5fc9e2d07b1f20cd840bea409e0e594c9 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 11 Jan 2024 13:27:58 +0200
Subject: [PATCH 1/2] Generalize common code of adding sort before generation
 of grouping paths

Extract the repetitive code pattern into a new function make_ordered_path().

Discussion: https://postgr.es/m/CAPpHfdtzaVa7S4onKy3YvttF2rrH5hQNHx9HtcSTLbpjx%2BMJ%2Bw%40mail.gmail.com
Author: Andrei Lepikhov
---
 src/backend/optimizer/plan/planner.c | 228 ++++++++++-----------------
 1 file changed, 80 insertions(+), 148 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 667723b6753..014b179c3f0 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6809,6 +6809,58 @@ done:
 	return parallel_workers;
 }
 
+/*
+ * make_ordered_path
+ *		Return a path ordered by 'pathkeys' based on the given 'path'.  May
+ *		return NULL if it doesn't make sense to generate an ordered path in
+ *		this case.
+ */
+static Path *
+make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path,
+				  Path *cheapest_path, List *pathkeys)
+{
+	bool		is_sorted;
+	int			presorted_keys;
+
+	is_sorted = pathkeys_count_contained_in(pathkeys,
+											path->pathkeys,
+											&presorted_keys);
+
+	if (!is_sorted)
+	{
+		/*
+		 * Try at least sorting the cheapest path and also try incrementally
+		 * sorting any path which is partially sorted already (no need to deal
+		 * with paths which have presorted keys when incremental sort is
+		 * disabled unless it's the cheapest input path).
+		 */
+		if (path != cheapest_path &&
+			(presorted_keys == 0 || !enable_incremental_sort))
+			return NULL;
+
+		/*
+		 * We've no need to consider both a sort and incremental sort. We'll
+		 * just do a sort if there are no presorted keys and an incremental
+		 * sort when there are presorted keys.
+		 */
+		if (presorted_keys == 0 || !enable_incremental_sort)
+			path = (Path *) create_sort_path(root,
+											 rel,
+											 path,
+											 pathkeys,
+											 -1.0);
+		else
+			path = (Path *) create_incremental_sort_path(root,
+														 rel,
+														 path,
+														 pathkeys,
+														 presorted_keys,
+														 -1.0);
+	}
+
+	return path;
+}
+
 /*
  * add_paths_to_grouping_rel
  *
@@ -6840,45 +6892,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		foreach(lc, input_rel->pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 grouped_rel,
+									 path,
+									 cheapest_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			/* Now decide what to stick atop it */
 			if (parse->groupingSets)
@@ -6935,46 +6957,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
-				bool		is_sorted;
-				int			presorted_keys;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
-
-				if (!is_sorted)
-				{
-					/*
-					 * Try at least sorting the cheapest path and also try
-					 * incrementally sorting any path which is partially
-					 * sorted already (no need to deal with paths which have
-					 * presorted keys when incremental sort is disabled unless
-					 * it's the cheapest input path).
-					 */
-					if (path != partially_grouped_rel->cheapest_total_path &&
-						(presorted_keys == 0 || !enable_incremental_sort))
-						continue;
+				path = make_ordered_path(root,
+										 grouped_rel,
+										 path,
+										 partially_grouped_rel->cheapest_total_path,
+										 root->group_pathkeys);
 
-					/*
-					 * We've no need to consider both a sort and incremental
-					 * sort.  We'll just do a sort if there are no pre-sorted
-					 * keys and an incremental sort when there are presorted
-					 * keys.
-					 */
-					if (presorted_keys == 0 || !enable_incremental_sort)
-						path = (Path *) create_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 -1.0);
-					else
-						path = (Path *) create_incremental_sort_path(root,
-																	 grouped_rel,
-																	 path,
-																	 root->group_pathkeys,
-																	 presorted_keys,
-																	 -1.0);
-				}
+				if (path == NULL)
+					continue;
 
 				if (parse->hasAggs)
 					add_path(grouped_rel, (Path *)
@@ -7200,44 +7191,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		foreach(lc, input_rel->pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_total_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 partially_grouped_rel,
+									 path,
+									 cheapest_total_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			if (parse->hasAggs)
 				add_path(partially_grouped_rel, (Path *)
@@ -7268,45 +7230,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		foreach(lc, input_rel->partial_pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
-
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
 
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_partial_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 partially_grouped_rel,
+									 path,
+									 cheapest_partial_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			if (parse->hasAggs)
 				add_partial_path(partially_grouped_rel, (Path *)
-- 
2.39.3 (Apple Git-145)

0002-Explore-alternative-orderings-of-group-by-p-20240116.patchapplication/octet-stream; name=0002-Explore-alternative-orderings-of-group-by-p-20240116.patchDownload

From f0c0340555a50415b03d0d684c4cb8422ccdb926 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 11 Jan 2024 13:26:49 +0200
Subject: [PATCH 2/2] Explore alternative orderings of group-by pathkeys during
 optimization.

When evaluating a query with a multi-column GROUP BY clause, we can minimize
sort operations or avoid them if we synchronize the order of GROUP BY clauses
with the ORDER BY sort clause or sort order, which comes from the underlying
query tree. Grouping does not imply any ordering, so we can compare
the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg,
we simply compared keys in the order specified in the query. This commit
explores alternative ordering of the keys, trying to find a cheaper one.

The ordering of group keys may interact with other parts of the query, some of
which may not be known while planning the grouping. For example, there may be
an explicit ORDER BY clause or some other ordering-dependent operation higher up
in the query, and using the same ordering may allow using either incremental
sort or even eliminating the sort entirely.

The patch always keeps the ordering specified in the query, assuming the user
might have additional insights.

This introduces a new GUC enable_group_by_reordering so that the optimization
may be disabled if needed.

Discussion: https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ru
Author: Andrey V. Lepikhov, Teodor Sigaev
Reviewed-by: Tomas Vondra, Claudio Freire, Gavin Flower, Dmitry Dolgov
Reviewed-by: Robert Haas, Pavel Borisov, David Rowley, Zhihong Yu
Reviewed-by: Tom Lane, Alexander Korotkov
---
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 230 +++++++++++++
 src/backend/optimizer/plan/planner.c          | 316 +++++++++++-------
 src/backend/utils/misc/guc_tables.c           |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/paths.h                 |   2 +
 src/test/regress/expected/aggregates.out      | 185 ++++++++++
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/sql/aggregates.sql           |  68 ++++
 src/tools/pgindent/typedefs.list              |   1 +
 11 files changed, 714 insertions(+), 125 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index e86dfeaecd4..4bd60a09c69 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy the sortref if it wasn't set yet.  That may happen if
+				 * the ec was constructed from a WHERE clause, i.e. it doesn't
+				 * have a target reference at all.
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ca94a31f71e..919d54dd790 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -22,12 +22,15 @@
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool		enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -350,6 +353,180 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match the input pathkeys.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+static int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses,
+							   int num_groupby_pathkeys)
+{
+	List	   *new_group_pathkeys = NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append it to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find the first pathkey without a matching GROUP BY key, the
+	 * rest of the pathkeys are useless and can't be used to evaluate the
+	 * grouping, so we abort the loop and ignore the remaining pathkeys.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause *sgc;
+
+		/*
+		 * Pathkeys are built in a way that allows simply comparing pointers.
+		 * Give up if we can't find the matching pointer.  Also give up if
+		 * there is no sortclause reference for some reason.
+		 */
+		if (foreach_current_index(lc) >= num_groupby_pathkeys ||
+			!list_member_ptr(*group_pathkeys, pathkey) ||
+			pathkey->pk_eclass->ec_sortref == 0)
+			break;
+
+		/*
+		 * Since 1349d27 pathkey coming from underlying node can be in the
+		 * root->group_pathkeys but not in the processed_groupClause. So, we
+		 * should be careful here.
+		 */
+		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+											*group_clauses);
+		if (!sgc)
+			/* The grouping clause does not cover this pathkey */
+			break;
+
+		/*
+		 * Sort group clause should have an ordering operator as long as there
+		 * is an associated pathkey.
+		 */
+		Assert(OidIsValid(sgc->sortop));
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns a list of PathKeyInfo items, each representing an interesting
+ * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
+ * matching order.
+ *
+ * The function considers (and keeps) multiple GROUP BY orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause,
+ * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible).
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+
+	List	   *pathkeys = root->group_pathkeys;
+	List	   *clauses = root->processed_groupClause;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+	infos = lappend(infos, info);
+
+	/*
+	 * Should we try generating alternative orderings of the group keys? If
+	 * not, we produce only the order specified in the query, i.e. the
+	 * optimization is effectively disabled.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
+	/*
+	 * Grouping sets have own and more complex logic to decide the ordering.
+	 */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to
+	 * match the path as much of the ordering as possible.  Then thanks to
+	 * incremental sort we would get this sort as cheap as possible.
+	 */
+	if (path->pathkeys &&
+		!pathkeys_contained_in(path->pathkeys, root->group_pathkeys))
+	{
+		int			n;
+
+		n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses,
+										   root->num_groupby_pathkeys);
+
+		if (n > 0 && compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL &&
+			(enable_incremental_sort || n == list_length(path->pathkeys)))
+		{
+			info = makeNode(PathKeyInfo);
+			info->pathkeys = pathkeys;
+			info->clauses = clauses;
+
+			infos = lappend(infos, info);
+		}
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause).
+	 */
+	if (root->sort_pathkeys &&
+		!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
+	{
+		int			n;
+
+		n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
+										   &clauses,
+										   root->num_groupby_pathkeys);
+
+		if (n > 0 && compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL &&
+			(enable_incremental_sort || n == list_length(root->sort_pathkeys)))
+		{
+			info = makeNode(PathKeyInfo);
+			info->pathkeys = pathkeys;
+			info->clauses = clauses;
+
+			infos = lappend(infos, info);
+		}
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1939,6 +2116,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the ordering. The
+ * ordering may not be "complete" and may require incremental sort, but that's
+ * fine. So we simply count prefix pathkeys with a matching group key, and
+ * stop once we find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordering not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell   *key;
+	int			n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1953,6 +2178,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1988,6 +2216,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 014b179c3f0..239b799a6b2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2805,8 +2805,9 @@ remove_useless_groupby_columns(PlannerInfo *root)
  *
  * In principle it might be interesting to consider other orderings of the
  * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost.  We don't
- * bother with that, though.  Hashed grouping will frequently win anyway.
+ * possible plans (eg an indexscan) and thereby reduce cost.  However, we
+ * don't yet have sufficient information to do that here, so that's left until
+ * later in planning.  See get_useful_group_keys_orderings().
  *
  * Note: we need no comparable processing of the distinctClause because
  * the parser already enforced that that matches ORDER BY.
@@ -6891,103 +6892,135 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 grouped_rel,
-									 path,
-									 cheapest_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_agg_path(root,
-										 grouped_rel,
-										 path,
-										 grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_SIMPLE,
-										 root->processed_groupClause,
-										 havingQual,
-										 agg_costs,
-										 dNumGroups));
-			}
-			else if (parse->groupClause)
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_group_path(root,
-										   grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   havingQual,
-										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
-			}
-		}
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-		/*
-		 * Instead of operating directly on the input relation, we can
-		 * consider finalizing a partially aggregated path.
-		 */
-		if (partially_grouped_rel != NULL)
-		{
-			foreach(lc, partially_grouped_rel->pathlist)
-			{
-				Path	   *path = (Path *) lfirst(lc);
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
 				path = make_ordered_path(root,
 										 grouped_rel,
 										 path,
-										 partially_grouped_rel->cheapest_total_path,
-										 root->group_pathkeys);
-
+										 cheapest_path,
+										 info->pathkeys);
 				if (path == NULL)
 					continue;
 
-				if (parse->hasAggs)
+				/* Now decide what to stick atop it */
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash,
+												gd, agg_costs, dNumGroups);
+				}
+				else if (parse->hasAggs)
+				{
+					/*
+					 * We have aggregation, possibly with plain GROUP BY. Make
+					 * an AggPath.
+					 */
 					add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 root->processed_groupClause,
+											 AGGSPLIT_SIMPLE,
+											 info->clauses,
 											 havingQual,
-											 agg_final_costs,
+											 agg_costs,
 											 dNumGroups));
-				else
+				}
+				else if (parse->groupClause)
+				{
+					/*
+					 * We have GROUP BY without aggregation or grouping sets.
+					 * Make a GroupPath.
+					 */
 					add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   root->processed_groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
+				}
+				else
+				{
+					/* Other cases should have been handled above */
+					Assert(false);
+				}
+			}
+		}
+
+		/*
+		 * Instead of operating directly on the input relation, we can
+		 * consider finalizing a partially aggregated path.
+		 */
+		if (partially_grouped_rel != NULL)
+		{
+			foreach(lc, partially_grouped_rel->pathlist)
+			{
+				ListCell   *lc2;
+				Path	   *path = (Path *) lfirst(lc);
+				Path	   *path_save = path;
+				List	   *pathkey_orderings = NIL;
+
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root, path);
+
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach(lc2, pathkey_orderings)
+				{
+					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
+					/* restore the path (we replace it in the loop) */
+					path = path_save;
+
+					path = make_ordered_path(root,
+											 grouped_rel,
+											 path,
+											 partially_grouped_rel->cheapest_total_path,
+											 info->pathkeys);
+
+					if (path == NULL)
+						continue;
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+
+				}
 			}
 		}
 	}
@@ -7190,37 +7223,54 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 partially_grouped_rel,
-									 path,
-									 cheapest_total_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (parse->hasAggs)
-				add_path(partially_grouped_rel, (Path *)
-						 create_agg_path(root,
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				path = make_ordered_path(root,
 										 partially_grouped_rel,
 										 path,
-										 partially_grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_INITIAL_SERIAL,
-										 root->processed_groupClause,
-										 NIL,
-										 agg_partial_costs,
-										 dNumPartialGroups));
-			else
-				add_path(partially_grouped_rel, (Path *)
-						 create_group_path(root,
-										   partially_grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   NIL,
-										   dNumPartialGroups));
+										 cheapest_total_path,
+										 info->pathkeys);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_path(partially_grouped_rel, (Path *)
+							 create_agg_path(root,
+											 partially_grouped_rel,
+											 path,
+											 partially_grouped_rel->reltarget,
+											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 AGGSPLIT_INITIAL_SERIAL,
+											 info->clauses,
+											 NIL,
+											 agg_partial_costs,
+											 dNumPartialGroups));
+				else
+					add_path(partially_grouped_rel, (Path *)
+							 create_group_path(root,
+											   partially_grouped_rel,
+											   path,
+											   info->clauses,
+											   NIL,
+											   dNumPartialGroups));
+			}
 		}
 	}
 
@@ -7229,37 +7279,55 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 partially_grouped_rel,
-									 path,
-									 cheapest_partial_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_agg_path(root,
-												 partially_grouped_rel,
-												 path,
-												 partially_grouped_rel->reltarget,
-												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-												 AGGSPLIT_INITIAL_SERIAL,
-												 root->processed_groupClause,
-												 NIL,
-												 agg_partial_costs,
-												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_group_path(root,
-												   partially_grouped_rel,
-												   path,
-												   root->processed_groupClause,
-												   NIL,
-												   dNumPartialPartialGroups));
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				path = make_ordered_path(root,
+										 partially_grouped_rel,
+										 path,
+										 cheapest_partial_path,
+										 info->pathkeys);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_partial_path(partially_grouped_rel, (Path *)
+									 create_agg_path(root,
+													 partially_grouped_rel,
+													 path,
+													 partially_grouped_rel->reltarget,
+													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 AGGSPLIT_INITIAL_SERIAL,
+													 info->clauses,
+													 NIL,
+													 agg_partial_costs,
+													 dNumPartialPartialGroups));
+				else
+					add_partial_path(partially_grouped_rel, (Path *)
+									 create_group_path(root,
+													   partially_grouped_rel,
+													   path,
+													   info->clauses,
+													   NIL,
+													   dNumPartialPartialGroups));
+			}
 		}
 	}
 
@@ -7373,6 +7441,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	 * We can also skip the entire loop when we only have a single-item
 	 * group_pathkeys because then we can't possibly have a presorted prefix
 	 * of the list without having the list be fully sorted.
+	 *
+	 * XXX Shouldn't this also consider the group-key-reordering?
 	 */
 	if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
 		return;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index e53ebc6dc2b..7fe58518d7d 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1050,6 +1050,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables reordering of GROUP BY keys."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 835b0e9ba89..da10b43dac3 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -399,6 +399,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_group_by_reordering = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index b9713ec9aa6..137da178dc7 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1456,6 +1456,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines the information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index efd4abc28f3..0e8a9c94ba6 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -204,6 +205,7 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, Path *path);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f635c5a1afb..ca38e78f211 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2728,6 +2728,191 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+                  QUERY PLAN                  
+----------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Incremental Sort
+         Sort Key: btg.x, btg.y, btg.w, btg.z
+         Presorted Key: btg.x, btg.y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+                     QUERY PLAN                     
+----------------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Limit
+         ->  Incremental Sort
+               Sort Key: btg.x, btg.y, btg.w, btg.z
+               Presorted Key: btg.x, btg.y
+               ->  Index Scan using abc on btg
+(7 rows)
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+          QUERY PLAN          
+------------------------------
+ Group
+   Group Key: y, x, z, w
+   ->  Sort
+         Sort Key: y, x, z, w
+         ->  Seq Scan on btg
+(5 rows)
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Sort
+   Sort Key: ((x * x)), z
+   ->  Group
+         Group Key: x, y, w, z
+         ->  Incremental Sort
+               Sort Key: x, y, w, z
+               Presorted Key: x, y
+               ->  Index Scan using abc on btg
+(8 rows)
+
+DROP TABLE btg;
+-- The case, when scanning sort order correspond to aggregate sort order but
+-- can not be found in the group-by list
+CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int);
+CREATE UNIQUE INDEX ON t1(c2);
+explain (costs off)
+SELECT array_agg(c1 ORDER BY c2),c2
+FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Sort
+   Sort Key: c2
+   ->  GroupAggregate
+         Group Key: c1
+         ->  Sort
+               Sort Key: c1, c2
+               ->  Bitmap Heap Scan on t1
+                     Recheck Cond: (c2 < 100)
+                     ->  Bitmap Index Scan on t1_c2_idx
+                           Index Cond: (c2 < 100)
+(10 rows)
+
+DROP TABLE t1 CASCADE;
+-- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
+-- by planner itself. For example, by MergeJoin.
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+explain (COSTS OFF)
+SELECT c1.relname,c1.relpages
+FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
+GROUP BY c1.reltuples,c1.relpages,c1.relname
+ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
+                                         QUERY PLAN                                          
+---------------------------------------------------------------------------------------------
+ Incremental Sort
+   Sort Key: c1.relpages, c1.relname, ((c1.relpages * c1.relpages))
+   Presorted Key: c1.relpages, c1.relname
+   ->  Group
+         Group Key: c1.relpages, c1.relname, c1.reltuples
+         ->  Incremental Sort
+               Sort Key: c1.relpages, c1.relname, c1.reltuples
+               Presorted Key: c1.relpages, c1.relname
+               ->  Merge Join
+                     Merge Cond: ((c1.relpages = c2.relpages) AND (c1.relname = c2.relname))
+                     ->  Sort
+                           Sort Key: c1.relpages, c1.relname
+                           ->  Seq Scan on pg_class c1
+                     ->  Sort
+                           Sort Key: c2.relpages, c2.relname
+                           ->  Seq Scan on pg_class c2
+(16 rows)
+
+RESET enable_hashjoin;
+RESET enable_nestloop;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 271313ebf86..9be7aca2b8a 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -133,7 +134,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(22 rows)
+(23 rows)
 
 -- There are always wait event descriptions for various types.
 select type, count(*) > 0 as ok FROM pg_wait_events
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index cc8f0efad55..cf87b5d5ddd 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1181,6 +1181,74 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+
+DROP TABLE btg;
+
+-- The case, when scanning sort order correspond to aggregate sort order but
+-- can not be found in the group-by list
+CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int);
+CREATE UNIQUE INDEX ON t1(c2);
+explain (costs off)
+SELECT array_agg(c1 ORDER BY c2),c2
+FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
+DROP TABLE t1 CASCADE;
+
+-- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
+-- by planner itself. For example, by MergeJoin.
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+explain (COSTS OFF)
+SELECT c1.relname,c1.relpages
+FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
+GROUP BY c1.reltuples,c1.relpages,c1.relname
+ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
+RESET enable_hashjoin;
+RESET enable_nestloop;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index f582eb59e7d..e575fa13c63 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -4041,3 +4041,4 @@ manifest_writer
 rfile
 ws_options
 ws_file_info
+PathKeyInfo
-- 
2.39.3 (Apple Git-145)

#124

Andrei Lepikhov

a.lepikhov@postgrespro.ru

almost 2 years ago

In reply to: Alexander Korotkov (#123)

1 attachment(s)

Re: POC: GROUP BY optimization

On 16/1/2024 22:05, Alexander Korotkov wrote:

On Tue, Jan 16, 2024 at 4:48 AM Richard Guo <guofenglinux@gmail.com> wrote:

* When trying to match the ordering of GROUP BY to that of ORDER BY in
get_useful_group_keys_orderings, this patch checks against the length of
path->pathkeys. This does not make sense. I guess this is a typo and
the real intention is to check against root->sort_pathkeys.

Thanks! It is really my blunder - fresh look works.

--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -504,7 +504,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
root->num_groupby_pathkeys);
if (n > 0 &&
-           (enable_incremental_sort || n == list_length(path->pathkeys)))
+           (enable_incremental_sort || n == list_length(root->sort_pathkeys)))
However, I think this is also incorrect. When incremental sort is
disabled, it is reasonable to consider reordering the GROUP BY keys only
if the number of matching pathkeys is equal to the length of
root->group_pathkeys i.e. if 'n == list_length(root->group_pathkeys)'.
Hmm... Why should this be list_length(root->group_pathkeys) while
we're targeting root->sort_pathkeys. I yet changed that to
list_length(root->sort_pathkeys).

I think, in the first case, when we are trying to arrange group-by keys
according to underlying pathkeys with incremental sort = off, it makes
sense to do if we fetch all group-by keys regardless of a more or equal
number of path keys the incoming path contains. The code and test case
are in step1.txt.

* When the original ordering of GROUP BY keys matches the path's
pathkeys or ORDER BY keys, get_useful_group_keys_orderings would
generate duplicate PathKeyInfos. Consequently, this duplication would
lead to the creation and addition of identical paths multiple times.
This is not great. Consider the query below:

create table t (a int, b int);
create index on t (a, b);
set enable_hashagg to off;

explain select count(*) from t group by a, b;
QUERY PLAN
----------------------------------------------------------------------------------
GroupAggregate (cost=0.15..97.27 rows=226 width=16)
Group Key: a, b
-> Index Only Scan using t_a_b_idx on t (cost=0.15..78.06 rows=2260 width=8)
(3 rows)

The same path with group keys {a, b} is created and added twice.

I tried to address that.

* Part of the work performed in this patch overlaps with that of
preprocess_groupclause. They are both trying to adjust the ordering of
the GROUP BY keys to match ORDER BY. I wonder if it would be better to
perform this work only once.

Andrei, could you take a look.

As I see, the PathKeyInfo list often contains duplicated pathkeys,
coming from either sort_pathkeys or path->pathkeys orderings. So, I can
propose to check duplicates each time (see step2.txt in attachment).

* When reordering the GROUP BY keys, I think the following checks need
some improvements.
+       /*
+        * Since 1349d27 pathkey coming from underlying node can be in the
+        * root->group_pathkeys but not in the processed_groupClause. So, we
+        * should be careful here.
+        */
+       sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+                                           *group_clauses);
+       if (!sgc)
+           /* The grouping clause does not cover this pathkey */
+           break;
I think we need to check or assert 'pathkey->pk_eclass->ec_sortref' is
valid before calling get_sortgroupref_clause_noerr with it. Also, how
can we guarantee that the returned GROUP BY item is sortable? Should we
check that with OidIsValid(sgc->sortop)?
Added.

Reviewed it, looks good.

--
regards,
Andrei Lepikhov
Postgres Professional

Attachments:

step1.txttext/plain; charset=UTF-8; name=step1.txtDownload

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 919d54dd79..1095b73dac 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -489,8 +489,9 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 		n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses,
 										   root->num_groupby_pathkeys);
 
-		if (n > 0 && compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL &&
-			(enable_incremental_sort || n == list_length(path->pathkeys)))
+		if (n > 0 &&
+			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
+			compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL)
 		{
 			info = makeNode(PathKeyInfo);
 			info->pathkeys = pathkeys;
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index ca38e78f21..67dd20f375 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2856,6 +2856,23 @@ explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
                ->  Index Scan using abc on btg
 (8 rows)
 
+SET enable_incremental_sort = off;
+-- The case when the number of incoming subtree path keys is more than
+-- the number of grouping keys.
+CREATE INDEX idx_y_x_z ON btg(y,x,w);
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ GroupAggregate
+   Output: y, x, array_agg(DISTINCT w)
+   Group Key: btg.y, btg.x
+   ->  Index Only Scan using idx_y_x_z on public.btg
+         Output: y, x, w
+         Index Cond: (btg.y < 0)
+(6 rows)
+
+RESET enable_incremental_sort;
 DROP TABLE btg;
 -- The case, when scanning sort order correspond to aggregate sort order but
 -- can not be found in the group-by list
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 9880a188d3..524bdfa67d 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1182,7 +1182,6 @@ SELECT balk(hundred) FROM tenk1;
 ROLLBACK;
 
 -- GROUP BY optimization by reorder columns
-
 CREATE TABLE btg AS SELECT
   i % 100 AS x,
   i % 100 AS y,
@@ -1223,8 +1222,11 @@ explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
 explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
 
 SET enable_incremental_sort = off;
-EXPLAIN (VERBOSE, COSTS ON)
-SELECT x,y, array_agg(z) FROM btg WHERE x < 2 GROUP BY y,x;
+-- The case when the number of incoming subtree path keys is more than
+-- the number of grouping keys.
+CREATE INDEX idx_y_x_z ON btg(y,x,w);
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
 RESET enable_incremental_sort;
 
 DROP TABLE btg;

#125

Andrei Lepikhov

a.lepikhov@postgrespro.ru

almost 2 years ago

In reply to: Andrei Lepikhov (#124)

1 attachment(s)

Re: POC: GROUP BY optimization

Just forgotten second attachment.

--
regards,
Andrei Lepikhov
Postgres Professional

Attachments:

step2.txttext/plain; charset=UTF-8; name=step2.txtDownload

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 1095b73dac..b612420547 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -432,6 +432,21 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	return n;
 }
 
+static bool
+duplicated_pathkey_combination(List *infos, List *pathkeys)
+{
+	ListCell *lc;
+
+	foreach (lc, infos)
+	{
+		PathKeyInfo *info = lfirst_node(PathKeyInfo, lc);
+
+		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
+			return true;
+	}
+	return false;
+}
+
 /*
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
@@ -491,7 +506,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 		if (n > 0 &&
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
-			compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL)
+			!duplicated_pathkey_combination(infos, pathkeys))
 		{
 			info = makeNode(PathKeyInfo);
 			info->pathkeys = pathkeys;
@@ -514,8 +529,9 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 										   &clauses,
 										   root->num_groupby_pathkeys);
 
-		if (n > 0 && compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL &&
-			(enable_incremental_sort || n == list_length(root->sort_pathkeys)))
+		if (n > 0 &&
+			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
+			!duplicated_pathkey_combination(infos, pathkeys))
 		{
 			info = makeNode(PathKeyInfo);
 			info->pathkeys = pathkeys;

#126

Alexander Korotkov

aekorotkov@gmail.com

almost 2 years ago

In reply to: Andrei Lepikhov (#124)

2 attachment(s)

Re: POC: GROUP BY optimization

Hi!

I've applied your changes with minor editing, thank you.

On Thu, Jan 18, 2024 at 11:49 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

* Part of the work performed in this patch overlaps with that of
preprocess_groupclause. They are both trying to adjust the ordering of
the GROUP BY keys to match ORDER BY. I wonder if it would be better to
perform this work only once.

Andrei, could you take a look.

As I see, the PathKeyInfo list often contains duplicated pathkeys,
coming from either sort_pathkeys or path->pathkeys orderings. So, I can
propose to check duplicates each time (see step2.txt in attachment).

Actually I asked to recheck if we can cut some part of
preprocess_groupclause() given that we're reordering the pathkeys
later. It seems that we can remove everything except the work with a
grouping set. I've done this in the revised patchset.

I'm going to push this if there are no objections.

------
Regards,
Alexander Korotkov

Attachments:

0002-Explore-alternative-orderings-of-group-by-p-20240119.patchapplication/octet-stream; name=0002-Explore-alternative-orderings-of-group-by-p-20240119.patchDownload

From e9a6e72777c2e776b480f2d5e0cf8dcf49f273b8 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Fri, 19 Jan 2024 16:05:08 +0200
Subject: [PATCH 2/2] Explore alternative orderings of group-by pathkeys during
 optimization.

When evaluating a query with a multi-column GROUP BY clause, we can minimize
sort operations or avoid them if we synchronize the order of GROUP BY clauses
with the ORDER BY sort clause or sort order, which comes from the underlying
query tree. Grouping does not imply any ordering, so we can compare
the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg,
we simply compared keys in the order specified in the query. This commit
explores alternative ordering of the keys, trying to find a cheaper one.

The ordering of group keys may interact with other parts of the query, some of
which may not be known while planning the grouping. For example, there may be
an explicit ORDER BY clause or some other ordering-dependent operation higher up
in the query, and using the same ordering may allow using either incremental
sort or even eliminating the sort entirely.

The patch always keeps the ordering specified in the query, assuming the user
might have additional insights.

This introduces a new GUC enable_group_by_reordering so that the optimization
may be disabled if needed.

Discussion: https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ru
Author: Andrey V. Lepikhov, Teodor Sigaev
Reviewed-by: Tomas Vondra, Claudio Freire, Gavin Flower, Dmitry Dolgov
Reviewed-by: Robert Haas, Pavel Borisov, David Rowley, Zhihong Yu
Reviewed-by: Tom Lane, Alexander Korotkov
---
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 252 +++++++++++
 src/backend/optimizer/plan/planner.c          | 424 +++++++++---------
 src/backend/utils/misc/guc_tables.c           |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/paths.h                 |   2 +
 src/test/regress/expected/aggregates.out      | 202 +++++++++
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/sql/aggregates.sql           |  75 ++++
 src/tools/pgindent/typedefs.list              |   1 +
 11 files changed, 770 insertions(+), 223 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index e86dfeaecd4..4bd60a09c69 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy the sortref if it wasn't set yet.  That may happen if
+				 * the ec was constructed from a WHERE clause, i.e. it doesn't
+				 * have a target reference at all.
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ca94a31f71e..82ff31273b9 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -22,12 +22,15 @@
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/plannodes.h"
+#include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool		enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -350,6 +353,202 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match the input pathkeys.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+static int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses,
+							   int num_groupby_pathkeys)
+{
+	List	   *new_group_pathkeys = NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append it to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find the first pathkey without a matching GROUP BY key, the
+	 * rest of the pathkeys are useless and can't be used to evaluate the
+	 * grouping, so we abort the loop and ignore the remaining pathkeys.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause *sgc;
+
+		/*
+		 * Pathkeys are built in a way that allows simply comparing pointers.
+		 * Give up if we can't find the matching pointer.  Also give up if
+		 * there is no sortclause reference for some reason.
+		 */
+		if (foreach_current_index(lc) >= num_groupby_pathkeys ||
+			!list_member_ptr(*group_pathkeys, pathkey) ||
+			pathkey->pk_eclass->ec_sortref == 0)
+			break;
+
+		/*
+		 * Since 1349d27 pathkey coming from underlying node can be in the
+		 * root->group_pathkeys but not in the processed_groupClause. So, we
+		 * should be careful here.
+		 */
+		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+											*group_clauses);
+		if (!sgc)
+			/* The grouping clause does not cover this pathkey */
+			break;
+
+		/*
+		 * Sort group clause should have an ordering operator as long as there
+		 * is an associated pathkey.
+		 */
+		Assert(OidIsValid(sgc->sortop));
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * pathkeys_are_duplicate
+ *		Check if give pathkeys are already contained the list of
+ *		PathKeyInfo's.
+ */
+static bool
+pathkeys_are_duplicate(List *infos, List *pathkeys)
+{
+	ListCell   *lc;
+
+	foreach(lc, infos)
+	{
+		PathKeyInfo *info = lfirst_node(PathKeyInfo, lc);
+
+		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
+			return true;
+	}
+	return false;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns a list of PathKeyInfo items, each representing an interesting
+ * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
+ * matching order.
+ *
+ * The function considers (and keeps) multiple GROUP BY orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause,
+ * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible).
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+
+	List	   *pathkeys = root->group_pathkeys;
+	List	   *clauses = root->processed_groupClause;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+	infos = lappend(infos, info);
+
+	/*
+	 * Should we try generating alternative orderings of the group keys? If
+	 * not, we produce only the order specified in the query, i.e. the
+	 * optimization is effectively disabled.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
+	/*
+	 * Grouping sets have own and more complex logic to decide the ordering.
+	 */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to
+	 * match the path as much of the ordering as possible.  Then thanks to
+	 * incremental sort we would get this sort as cheap as possible.
+	 */
+	if (path->pathkeys &&
+		!pathkeys_contained_in(path->pathkeys, root->group_pathkeys))
+	{
+		int			n;
+
+		n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses,
+										   root->num_groupby_pathkeys);
+
+		if (n > 0 &&
+			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
+			!pathkeys_are_duplicate(infos, pathkeys))
+		{
+			info = makeNode(PathKeyInfo);
+			info->pathkeys = pathkeys;
+			info->clauses = clauses;
+
+			infos = lappend(infos, info);
+		}
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause).
+	 */
+	if (root->sort_pathkeys &&
+		!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
+	{
+		int			n;
+
+		n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
+										   &clauses,
+										   root->num_groupby_pathkeys);
+
+		if (n > 0 &&
+			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
+			!pathkeys_are_duplicate(infos, pathkeys))
+		{
+			info = makeNode(PathKeyInfo);
+			info->pathkeys = pathkeys;
+			info->clauses = clauses;
+
+			infos = lappend(infos, info);
+		}
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1939,6 +2138,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the ordering. The
+ * ordering may not be "complete" and may require incremental sort, but that's
+ * fine. So we simply count prefix pathkeys with a matching group key, and
+ * stop once we find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordering not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell   *key;
+	int			n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1953,6 +2200,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1988,6 +2238,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 014b179c3f0..2e2458b1284 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -140,7 +140,7 @@ static double preprocess_limit(PlannerInfo *root,
 							   double tuple_fraction,
 							   int64 *offset_est, int64 *count_est);
 static void remove_useless_groupby_columns(PlannerInfo *root);
-static List *preprocess_groupclause(PlannerInfo *root, List *force);
+static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
 static List *extract_rollup_sets(List *groupingSets);
 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
 static void standard_qp_callback(PlannerInfo *root, void *extra);
@@ -1423,7 +1423,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		else if (parse->groupClause)
 		{
 			/* Preprocess regular GROUP BY clause, if any */
-			root->processed_groupClause = preprocess_groupclause(root, NIL);
+			root->processed_groupClause = list_copy(parse->groupClause);;
 			/* Remove any redundant GROUP BY columns */
 			remove_useless_groupby_columns(root);
 		}
@@ -2144,7 +2144,7 @@ preprocess_grouping_sets(PlannerInfo *root)
 		 * The groupClauses for hashed grouping sets are built later on.)
 		 */
 		if (gs->set)
-			rollup->groupClause = preprocess_groupclause(root, gs->set);
+			rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
 		else
 			rollup->groupClause = NIL;
 
@@ -2796,111 +2796,24 @@ remove_useless_groupby_columns(PlannerInfo *root)
 }
 
 /*
- * preprocess_groupclause - do preparatory work on GROUP BY clause
- *
- * The idea here is to adjust the ordering of the GROUP BY elements
- * (which in itself is semantically insignificant) to match ORDER BY,
- * thereby allowing a single sort operation to both implement the ORDER BY
- * requirement and set up for a Unique step that implements GROUP BY.
- *
- * In principle it might be interesting to consider other orderings of the
- * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost.  We don't
- * bother with that, though.  Hashed grouping will frequently win anyway.
- *
- * Note: we need no comparable processing of the distinctClause because
- * the parser already enforced that that matches ORDER BY.
- *
- * Note: we return a fresh List, but its elements are the same
- * SortGroupClauses appearing in parse->groupClause.  This is important
- * because later processing may modify the processed_groupClause list.
- *
- * For grouping sets, the order of items is instead forced to agree with that
- * of the grouping set (and items not in the grouping set are skipped). The
- * work of sorting the order of grouping set elements to match the ORDER BY if
- * possible is done elsewhere.
+ * groupclause_apply_groupingset
+ *		Apply the order of GROUP BY clauses defined by grouping sets.  Items
+ *		not in the grouping set are skipped.
  */
 static List *
-preprocess_groupclause(PlannerInfo *root, List *force)
+groupclause_apply_groupingset(PlannerInfo *root, List *gset)
 {
 	Query	   *parse = root->parse;
 	List	   *new_groupclause = NIL;
-	bool		partial_match;
 	ListCell   *sl;
-	ListCell   *gl;
 
-	/* For grouping sets, we need to force the ordering */
-	if (force)
+	foreach(sl, gset)
 	{
-		foreach(sl, force)
-		{
-			Index		ref = lfirst_int(sl);
-			SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
+		Index		ref = lfirst_int(sl);
+		SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
 
-			new_groupclause = lappend(new_groupclause, cl);
-		}
-
-		return new_groupclause;
+		new_groupclause = lappend(new_groupclause, cl);
 	}
-
-	/* If no ORDER BY, nothing useful to do here */
-	if (parse->sortClause == NIL)
-		return list_copy(parse->groupClause);
-
-	/*
-	 * Scan the ORDER BY clause and construct a list of matching GROUP BY
-	 * items, but only as far as we can make a matching prefix.
-	 *
-	 * This code assumes that the sortClause contains no duplicate items.
-	 */
-	foreach(sl, parse->sortClause)
-	{
-		SortGroupClause *sc = lfirst_node(SortGroupClause, sl);
-
-		foreach(gl, parse->groupClause)
-		{
-			SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
-
-			if (equal(gc, sc))
-			{
-				new_groupclause = lappend(new_groupclause, gc);
-				break;
-			}
-		}
-		if (gl == NULL)
-			break;				/* no match, so stop scanning */
-	}
-
-	/* Did we match all of the ORDER BY list, or just some of it? */
-	partial_match = (sl != NULL);
-
-	/* If no match at all, no point in reordering GROUP BY */
-	if (new_groupclause == NIL)
-		return list_copy(parse->groupClause);
-
-	/*
-	 * Add any remaining GROUP BY items to the new list, but only if we were
-	 * able to make a complete match.  In other words, we only rearrange the
-	 * GROUP BY list if the result is that one list is a prefix of the other
-	 * --- otherwise there's no possibility of a common sort.  Also, give up
-	 * if there are any non-sortable GROUP BY items, since then there's no
-	 * hope anyway.
-	 */
-	foreach(gl, parse->groupClause)
-	{
-		SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
-
-		if (list_member_ptr(new_groupclause, gc))
-			continue;			/* it matched an ORDER BY item */
-		if (partial_match)		/* give up, no common sort possible */
-			return list_copy(parse->groupClause);
-		if (!OidIsValid(gc->sortop))	/* give up, GROUP BY can't be sorted */
-			return list_copy(parse->groupClause);
-		new_groupclause = lappend(new_groupclause, gc);
-	}
-
-	/* Success --- install the rearranged GROUP BY list */
-	Assert(list_length(parse->groupClause) == list_length(new_groupclause));
 	return new_groupclause;
 }
 
@@ -4200,7 +4113,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 			{
 				rollup = makeNode(RollupData);
 
-				rollup->groupClause = preprocess_groupclause(root, gset);
+				rollup->groupClause = groupclause_apply_groupingset(root, gset);
 				rollup->gsets_data = list_make1(gs);
 				rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
 														 rollup->gsets_data,
@@ -4389,7 +4302,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 
 			Assert(gs->set != NIL);
 
-			rollup->groupClause = preprocess_groupclause(root, gs->set);
+			rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
 			rollup->gsets_data = list_make1(gs);
 			rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
 													 rollup->gsets_data,
@@ -6891,103 +6804,135 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 grouped_rel,
-									 path,
-									 cheapest_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
-			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_agg_path(root,
-										 grouped_rel,
-										 path,
-										 grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_SIMPLE,
-										 root->processed_groupClause,
-										 havingQual,
-										 agg_costs,
-										 dNumGroups));
-			}
-			else if (parse->groupClause)
+			foreach(lc2, pathkey_orderings)
 			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
-						 create_group_path(root,
-										   grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   havingQual,
-										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
-			}
-		}
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-		/*
-		 * Instead of operating directly on the input relation, we can
-		 * consider finalizing a partially aggregated path.
-		 */
-		if (partially_grouped_rel != NULL)
-		{
-			foreach(lc, partially_grouped_rel->pathlist)
-			{
-				Path	   *path = (Path *) lfirst(lc);
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
 				path = make_ordered_path(root,
 										 grouped_rel,
 										 path,
-										 partially_grouped_rel->cheapest_total_path,
-										 root->group_pathkeys);
-
+										 cheapest_path,
+										 info->pathkeys);
 				if (path == NULL)
 					continue;
 
-				if (parse->hasAggs)
+				/* Now decide what to stick atop it */
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash,
+												gd, agg_costs, dNumGroups);
+				}
+				else if (parse->hasAggs)
+				{
+					/*
+					 * We have aggregation, possibly with plain GROUP BY. Make
+					 * an AggPath.
+					 */
 					add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											 AGGSPLIT_FINAL_DESERIAL,
-											 root->processed_groupClause,
+											 AGGSPLIT_SIMPLE,
+											 info->clauses,
 											 havingQual,
-											 agg_final_costs,
+											 agg_costs,
 											 dNumGroups));
-				else
+				}
+				else if (parse->groupClause)
+				{
+					/*
+					 * We have GROUP BY without aggregation or grouping sets.
+					 * Make a GroupPath.
+					 */
 					add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   root->processed_groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
+				}
+				else
+				{
+					/* Other cases should have been handled above */
+					Assert(false);
+				}
+			}
+		}
 
+		/*
+		 * Instead of operating directly on the input relation, we can
+		 * consider finalizing a partially aggregated path.
+		 */
+		if (partially_grouped_rel != NULL)
+		{
+			foreach(lc, partially_grouped_rel->pathlist)
+			{
+				ListCell   *lc2;
+				Path	   *path = (Path *) lfirst(lc);
+				Path	   *path_save = path;
+				List	   *pathkey_orderings = NIL;
+
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root, path);
+
+				Assert(list_length(pathkey_orderings) > 0);
+
+				/* process all potentially interesting grouping reorderings */
+				foreach(lc2, pathkey_orderings)
+				{
+					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+					/* restore the path (we replace it in the loop) */
+					path = path_save;
+
+					path = make_ordered_path(root,
+											 grouped_rel,
+											 path,
+											 partially_grouped_rel->cheapest_total_path,
+											 info->pathkeys);
+
+					if (path == NULL)
+						continue;
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
+								 create_agg_path(root,
+												 grouped_rel,
+												 path,
+												 grouped_rel->reltarget,
+												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_FINAL_DESERIAL,
+												 info->clauses,
+												 havingQual,
+												 agg_final_costs,
+												 dNumGroups));
+					else
+						add_path(grouped_rel, (Path *)
+								 create_group_path(root,
+												   grouped_rel,
+												   path,
+												   info->clauses,
+												   havingQual,
+												   dNumGroups));
+
+				}
 			}
 		}
 	}
@@ -7190,37 +7135,54 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 partially_grouped_rel,
-									 path,
-									 cheapest_total_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (parse->hasAggs)
-				add_path(partially_grouped_rel, (Path *)
-						 create_agg_path(root,
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				path = make_ordered_path(root,
 										 partially_grouped_rel,
 										 path,
-										 partially_grouped_rel->reltarget,
-										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-										 AGGSPLIT_INITIAL_SERIAL,
-										 root->processed_groupClause,
-										 NIL,
-										 agg_partial_costs,
-										 dNumPartialGroups));
-			else
-				add_path(partially_grouped_rel, (Path *)
-						 create_group_path(root,
-										   partially_grouped_rel,
-										   path,
-										   root->processed_groupClause,
-										   NIL,
-										   dNumPartialGroups));
+										 cheapest_total_path,
+										 info->pathkeys);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_path(partially_grouped_rel, (Path *)
+							 create_agg_path(root,
+											 partially_grouped_rel,
+											 path,
+											 partially_grouped_rel->reltarget,
+											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 AGGSPLIT_INITIAL_SERIAL,
+											 info->clauses,
+											 NIL,
+											 agg_partial_costs,
+											 dNumPartialGroups));
+				else
+					add_path(partially_grouped_rel, (Path *)
+							 create_group_path(root,
+											   partially_grouped_rel,
+											   path,
+											   info->clauses,
+											   NIL,
+											   dNumPartialGroups));
+			}
 		}
 	}
 
@@ -7229,37 +7191,55 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = make_ordered_path(root,
-									 partially_grouped_rel,
-									 path,
-									 cheapest_partial_path,
-									 root->group_pathkeys);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root, path);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_agg_path(root,
-												 partially_grouped_rel,
-												 path,
-												 partially_grouped_rel->reltarget,
-												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-												 AGGSPLIT_INITIAL_SERIAL,
-												 root->processed_groupClause,
-												 NIL,
-												 agg_partial_costs,
-												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
-								 create_group_path(root,
-												   partially_grouped_rel,
-												   path,
-												   root->processed_groupClause,
-												   NIL,
-												   dNumPartialPartialGroups));
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				path = make_ordered_path(root,
+										 partially_grouped_rel,
+										 path,
+										 cheapest_partial_path,
+										 info->pathkeys);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_partial_path(partially_grouped_rel, (Path *)
+									 create_agg_path(root,
+													 partially_grouped_rel,
+													 path,
+													 partially_grouped_rel->reltarget,
+													 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+													 AGGSPLIT_INITIAL_SERIAL,
+													 info->clauses,
+													 NIL,
+													 agg_partial_costs,
+													 dNumPartialPartialGroups));
+				else
+					add_partial_path(partially_grouped_rel, (Path *)
+									 create_group_path(root,
+													   partially_grouped_rel,
+													   path,
+													   info->clauses,
+													   NIL,
+													   dNumPartialPartialGroups));
+			}
 		}
 	}
 
@@ -7373,6 +7353,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	 * We can also skip the entire loop when we only have a single-item
 	 * group_pathkeys because then we can't possibly have a presorted prefix
 	 * of the list without having the list be fully sorted.
+	 *
+	 * XXX Shouldn't this also consider the group-key-reordering?
 	 */
 	if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
 		return;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index e53ebc6dc2b..7fe58518d7d 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1050,6 +1050,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables reordering of GROUP BY keys."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 835b0e9ba89..da10b43dac3 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -399,6 +399,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_group_by_reordering = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index b9713ec9aa6..137da178dc7 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1456,6 +1456,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines the information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index efd4abc28f3..0e8a9c94ba6 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -204,6 +205,7 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, Path *path);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f635c5a1afb..67dd20f3751 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2728,6 +2728,208 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+                  QUERY PLAN                  
+----------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Incremental Sort
+         Sort Key: btg.x, btg.y, btg.w, btg.z
+         Presorted Key: btg.x, btg.y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+                     QUERY PLAN                     
+----------------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Limit
+         ->  Incremental Sort
+               Sort Key: btg.x, btg.y, btg.w, btg.z
+               Presorted Key: btg.x, btg.y
+               ->  Index Scan using abc on btg
+(7 rows)
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+          QUERY PLAN          
+------------------------------
+ Group
+   Group Key: y, x, z, w
+   ->  Sort
+         Sort Key: y, x, z, w
+         ->  Seq Scan on btg
+(5 rows)
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Sort
+   Sort Key: ((x * x)), z
+   ->  Group
+         Group Key: x, y, w, z
+         ->  Incremental Sort
+               Sort Key: x, y, w, z
+               Presorted Key: x, y
+               ->  Index Scan using abc on btg
+(8 rows)
+
+SET enable_incremental_sort = off;
+-- The case when the number of incoming subtree path keys is more than
+-- the number of grouping keys.
+CREATE INDEX idx_y_x_z ON btg(y,x,w);
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ GroupAggregate
+   Output: y, x, array_agg(DISTINCT w)
+   Group Key: btg.y, btg.x
+   ->  Index Only Scan using idx_y_x_z on public.btg
+         Output: y, x, w
+         Index Cond: (btg.y < 0)
+(6 rows)
+
+RESET enable_incremental_sort;
+DROP TABLE btg;
+-- The case, when scanning sort order correspond to aggregate sort order but
+-- can not be found in the group-by list
+CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int);
+CREATE UNIQUE INDEX ON t1(c2);
+explain (costs off)
+SELECT array_agg(c1 ORDER BY c2),c2
+FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Sort
+   Sort Key: c2
+   ->  GroupAggregate
+         Group Key: c1
+         ->  Sort
+               Sort Key: c1, c2
+               ->  Bitmap Heap Scan on t1
+                     Recheck Cond: (c2 < 100)
+                     ->  Bitmap Index Scan on t1_c2_idx
+                           Index Cond: (c2 < 100)
+(10 rows)
+
+DROP TABLE t1 CASCADE;
+-- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
+-- by planner itself. For example, by MergeJoin.
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+explain (COSTS OFF)
+SELECT c1.relname,c1.relpages
+FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
+GROUP BY c1.reltuples,c1.relpages,c1.relname
+ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
+                                         QUERY PLAN                                          
+---------------------------------------------------------------------------------------------
+ Incremental Sort
+   Sort Key: c1.relpages, c1.relname, ((c1.relpages * c1.relpages))
+   Presorted Key: c1.relpages, c1.relname
+   ->  Group
+         Group Key: c1.relpages, c1.relname, c1.reltuples
+         ->  Incremental Sort
+               Sort Key: c1.relpages, c1.relname, c1.reltuples
+               Presorted Key: c1.relpages, c1.relname
+               ->  Merge Join
+                     Merge Cond: ((c1.relpages = c2.relpages) AND (c1.relname = c2.relname))
+                     ->  Sort
+                           Sort Key: c1.relpages, c1.relname
+                           ->  Seq Scan on pg_class c1
+                     ->  Sort
+                           Sort Key: c2.relpages, c2.relname
+                           ->  Seq Scan on pg_class c2
+(16 rows)
+
+RESET enable_hashjoin;
+RESET enable_nestloop;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 271313ebf86..9be7aca2b8a 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -133,7 +134,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(22 rows)
+(23 rows)
 
 -- There are always wait event descriptions for various types.
 select type, count(*) > 0 as ok FROM pg_wait_events
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index cc8f0efad55..524bdfa67d6 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1181,6 +1181,81 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+
+SET enable_incremental_sort = off;
+-- The case when the number of incoming subtree path keys is more than
+-- the number of grouping keys.
+CREATE INDEX idx_y_x_z ON btg(y,x,w);
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
+RESET enable_incremental_sort;
+
+DROP TABLE btg;
+
+-- The case, when scanning sort order correspond to aggregate sort order but
+-- can not be found in the group-by list
+CREATE TABLE t1 (c1 int PRIMARY KEY, c2 int);
+CREATE UNIQUE INDEX ON t1(c2);
+explain (costs off)
+SELECT array_agg(c1 ORDER BY c2),c2
+FROM t1 WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
+DROP TABLE t1 CASCADE;
+
+-- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
+-- by planner itself. For example, by MergeJoin.
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+explain (COSTS OFF)
+SELECT c1.relname,c1.relpages
+FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
+GROUP BY c1.reltuples,c1.relpages,c1.relname
+ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
+RESET enable_hashjoin;
+RESET enable_nestloop;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 16421f034cb..97750ddcf8f 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -4042,3 +4042,4 @@ manifest_writer
 rfile
 ws_options
 ws_file_info
+PathKeyInfo
-- 
2.39.3 (Apple Git-145)

0001-Generalize-common-code-of-adding-sort-befor-20240119.patchapplication/octet-stream; name=0001-Generalize-common-code-of-adding-sort-befor-20240119.patchDownload

From c76a5828ff4925d03ce071658cc4e413e9a3aaf3 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 11 Jan 2024 13:27:58 +0200
Subject: [PATCH 1/2] Generalize common code of adding sort before generation
 of grouping paths

Extract the repetitive code pattern into a new function make_ordered_path().

Discussion: https://postgr.es/m/CAPpHfdtzaVa7S4onKy3YvttF2rrH5hQNHx9HtcSTLbpjx%2BMJ%2Bw%40mail.gmail.com
Author: Andrei Lepikhov
---
 src/backend/optimizer/plan/planner.c | 228 ++++++++++-----------------
 1 file changed, 80 insertions(+), 148 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 667723b6753..014b179c3f0 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6809,6 +6809,58 @@ done:
 	return parallel_workers;
 }
 
+/*
+ * make_ordered_path
+ *		Return a path ordered by 'pathkeys' based on the given 'path'.  May
+ *		return NULL if it doesn't make sense to generate an ordered path in
+ *		this case.
+ */
+static Path *
+make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path,
+				  Path *cheapest_path, List *pathkeys)
+{
+	bool		is_sorted;
+	int			presorted_keys;
+
+	is_sorted = pathkeys_count_contained_in(pathkeys,
+											path->pathkeys,
+											&presorted_keys);
+
+	if (!is_sorted)
+	{
+		/*
+		 * Try at least sorting the cheapest path and also try incrementally
+		 * sorting any path which is partially sorted already (no need to deal
+		 * with paths which have presorted keys when incremental sort is
+		 * disabled unless it's the cheapest input path).
+		 */
+		if (path != cheapest_path &&
+			(presorted_keys == 0 || !enable_incremental_sort))
+			return NULL;
+
+		/*
+		 * We've no need to consider both a sort and incremental sort. We'll
+		 * just do a sort if there are no presorted keys and an incremental
+		 * sort when there are presorted keys.
+		 */
+		if (presorted_keys == 0 || !enable_incremental_sort)
+			path = (Path *) create_sort_path(root,
+											 rel,
+											 path,
+											 pathkeys,
+											 -1.0);
+		else
+			path = (Path *) create_incremental_sort_path(root,
+														 rel,
+														 path,
+														 pathkeys,
+														 presorted_keys,
+														 -1.0);
+	}
+
+	return path;
+}
+
 /*
  * add_paths_to_grouping_rel
  *
@@ -6840,45 +6892,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		foreach(lc, input_rel->pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 grouped_rel,
+									 path,
+									 cheapest_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			/* Now decide what to stick atop it */
 			if (parse->groupingSets)
@@ -6935,46 +6957,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
 				Path	   *path = (Path *) lfirst(lc);
-				bool		is_sorted;
-				int			presorted_keys;
 
-				is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-														path->pathkeys,
-														&presorted_keys);
-
-				if (!is_sorted)
-				{
-					/*
-					 * Try at least sorting the cheapest path and also try
-					 * incrementally sorting any path which is partially
-					 * sorted already (no need to deal with paths which have
-					 * presorted keys when incremental sort is disabled unless
-					 * it's the cheapest input path).
-					 */
-					if (path != partially_grouped_rel->cheapest_total_path &&
-						(presorted_keys == 0 || !enable_incremental_sort))
-						continue;
+				path = make_ordered_path(root,
+										 grouped_rel,
+										 path,
+										 partially_grouped_rel->cheapest_total_path,
+										 root->group_pathkeys);
 
-					/*
-					 * We've no need to consider both a sort and incremental
-					 * sort.  We'll just do a sort if there are no pre-sorted
-					 * keys and an incremental sort when there are presorted
-					 * keys.
-					 */
-					if (presorted_keys == 0 || !enable_incremental_sort)
-						path = (Path *) create_sort_path(root,
-														 grouped_rel,
-														 path,
-														 root->group_pathkeys,
-														 -1.0);
-					else
-						path = (Path *) create_incremental_sort_path(root,
-																	 grouped_rel,
-																	 path,
-																	 root->group_pathkeys,
-																	 presorted_keys,
-																	 -1.0);
-				}
+				if (path == NULL)
+					continue;
 
 				if (parse->hasAggs)
 					add_path(grouped_rel, (Path *)
@@ -7200,44 +7191,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		foreach(lc, input_rel->pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
 
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_total_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 partially_grouped_rel,
+									 path,
+									 cheapest_total_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			if (parse->hasAggs)
 				add_path(partially_grouped_rel, (Path *)
@@ -7268,45 +7230,15 @@ create_partial_grouping_paths(PlannerInfo *root,
 		foreach(lc, input_rel->partial_pathlist)
 		{
 			Path	   *path = (Path *) lfirst(lc);
-			bool		is_sorted;
-			int			presorted_keys;
-
-			is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
-													path->pathkeys,
-													&presorted_keys);
 
-			if (!is_sorted)
-			{
-				/*
-				 * Try at least sorting the cheapest path and also try
-				 * incrementally sorting any path which is partially sorted
-				 * already (no need to deal with paths which have presorted
-				 * keys when incremental sort is disabled unless it's the
-				 * cheapest input path).
-				 */
-				if (path != cheapest_partial_path &&
-					(presorted_keys == 0 || !enable_incremental_sort))
-					continue;
+			path = make_ordered_path(root,
+									 partially_grouped_rel,
+									 path,
+									 cheapest_partial_path,
+									 root->group_pathkeys);
 
-				/*
-				 * We've no need to consider both a sort and incremental sort.
-				 * We'll just do a sort if there are no presorted keys and an
-				 * incremental sort when there are presorted keys.
-				 */
-				if (presorted_keys == 0 || !enable_incremental_sort)
-					path = (Path *) create_sort_path(root,
-													 partially_grouped_rel,
-													 path,
-													 root->group_pathkeys,
-													 -1.0);
-				else
-					path = (Path *) create_incremental_sort_path(root,
-																 partially_grouped_rel,
-																 path,
-																 root->group_pathkeys,
-																 presorted_keys,
-																 -1.0);
-			}
+			if (path == NULL)
+				continue;
 
 			if (parse->hasAggs)
 				add_partial_path(partially_grouped_rel, (Path *)
-- 
2.39.3 (Apple Git-145)

#127

Nathan Bossart

nathandbossart@gmail.com

almost 2 years ago

In reply to: Alexander Korotkov (#126)

Re: POC: GROUP BY optimization

A recent buildfarm failure [0]https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=piculet&dt=2024-01-24%2014%3A05%3A14 seems to indicate a name collision with the
"abc" index in the aggregates.sql test and the "abc" table in
namespace.sql.

[0]: https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=piculet&dt=2024-01-24%2014%3A05%3A14

--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

#128

Alexander Korotkov

aekorotkov@gmail.com

almost 2 years ago

In reply to: Nathan Bossart (#127)

Re: POC: GROUP BY optimization

On Wed, Jan 24, 2024 at 7:38 PM Nathan Bossart <nathandbossart@gmail.com> wrote:

A recent buildfarm failure [0] seems to indicate a name collision with the
"abc" index in the aggregates.sql test and the "abc" table in
namespace.sql.

[0] https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=piculet&dt=2024-01-24%2014%3A05%3A14

Thank you for catching this. Fixed.

------
Regards,
Alexander Korotkov

#129

vignesh C

vignesh21@gmail.com

almost 2 years ago

In reply to: Alexander Korotkov (#128)

Re: POC: GROUP BY optimization

On Thu, 25 Jan 2024 at 01:15, Alexander Korotkov <aekorotkov@gmail.com> wrote:

On Wed, Jan 24, 2024 at 7:38 PM Nathan Bossart <nathandbossart@gmail.com> wrote:

A recent buildfarm failure [0] seems to indicate a name collision with the
"abc" index in the aggregates.sql test and the "abc" table in
namespace.sql.

[0] https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=piculet&dt=2024-01-24%2014%3A05%3A14

Thank you for catching this. Fixed.

Since the patch has been committed, I have marked this entry as committed.

Regards,
Vignesh

#130

Robert Haas

robertmhaas@gmail.com

almost 2 years ago

In reply to: Tom Lane (#101)

Re: POC: GROUP BY optimization

On Tue, Dec 26, 2023 at 10:23 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:

I think it's a fool's errand to even try to separate different sort
column orderings by cost. We simply do not have sufficiently accurate
cost information. The previous patch in this thread got reverted because
of that (well, also some implementation issues, but mostly that), and
nothing has happened to make me think that another try will fare any
better.

I'm late to the party, but I'd like to better understand what's being
argued here. If you're saying that, for some particular planner
problem, we should prefer a solution that doesn't need to know about
the relative cost of various sorts over one that does, I agree, for
exactly the reason that you state: our knowledge of sort costs won't
be reliable, and we will make mistakes. That's true in lots of
situations, not just related to sorts,
because estimation is a hard problem. Heuristics not based on cost are
going to be, in many cases, more accurate than heuristics based on
cost. They're also often cheaper, since they often let us reject
possible approaches very early, without all the bother of a cost
comparison.

But if you're saying that it's utterly impossible to know whether
sorting text will be cheaper or more expensive than sorting 4-byte
integers, and that if a particular problem can be solved only by
knowing which one is cheaper we should just give up, then I disagree.
In the absence of any other information, it must be right, at the very
least, to bank on varlena data types being more expensive to sort than
fixed-length data types. How much more expensive is hard to know,
because toasted blobs are going to be more expensive to sort than
short varlenas. But even before you reach the comparison function, a
pass-by-value datum has a significantly lower access cost than a
pass-by-reference datum. The fact that the pass-by-reference value
might be huge only compounds the problem.

--
Robert Haas
EDB: http://www.enterprisedb.com

#131

Tom Lane

tgl@sss.pgh.pa.us

almost 2 years ago

In reply to: Robert Haas (#130)

Re: POC: GROUP BY optimization

Robert Haas <robertmhaas@gmail.com> writes:

On Tue, Dec 26, 2023 at 10:23 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:

I think it's a fool's errand to even try to separate different sort
column orderings by cost. We simply do not have sufficiently accurate
cost information. The previous patch in this thread got reverted because
of that (well, also some implementation issues, but mostly that), and
nothing has happened to make me think that another try will fare any
better.

I'm late to the party, but I'd like to better understand what's being
argued here.

What I am saying is that we don't have sufficiently accurate cost
information to support the sort of logic that got committed and
reverted before. I did not mean to imply that it's not possible
to have such info, only that it is not present today. IOW, what
I'm saying is that if you want to write code that tries to make
a cost-based preference of one sorting over another, you *first*
need to put in a bunch of legwork to create more accurate cost
numbers. Trying to make such logic depend on the numbers we have
today is just going to result in garbage in, garbage out.

Sadly, that's not a small task:

* We'd need to put effort into assigning more realistic procost
values --- preferably across the board, not just comparison functions.
As long as all the comparison functions have procost 1.0, you're
just flying blind.

* As you mentioned, there'd need to be some accounting for the
likely size of varlena inputs, and especially whether they might
be toasted.

* cost_sort knows nothing of the low-level sort algorithm improvements
we've made in recent years, such as abbreviated keys.

That's a lot of work, and I think it has to be done before we try
to build infrastructure on top, not afterwards.

regards, tom lane

#132

Robert Haas

robertmhaas@gmail.com

almost 2 years ago

In reply to: Tom Lane (#131)

Re: POC: GROUP BY optimization

On Fri, Jan 26, 2024 at 10:38 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Sadly, that's not a small task:

* We'd need to put effort into assigning more realistic procost
values --- preferably across the board, not just comparison functions.
As long as all the comparison functions have procost 1.0, you're
just flying blind.

* As you mentioned, there'd need to be some accounting for the
likely size of varlena inputs, and especially whether they might
be toasted.

* cost_sort knows nothing of the low-level sort algorithm improvements
we've made in recent years, such as abbreviated keys.

That's a lot of work, and I think it has to be done before we try
to build infrastructure on top, not afterwards.

OK, that makes sense to me. Thanks.

--
Robert Haas
EDB: http://www.enterprisedb.com

#133

Tom Lane

tgl@sss.pgh.pa.us

almost 2 years ago

In reply to: Alexander Korotkov (#126)

Re: POC: GROUP BY optimization

Alexander Korotkov <aekorotkov@gmail.com> writes:

I'm going to push this if there are no objections.

One of the test cases added by this commit has not been very
stable in the buildfarm. Latest example is here:

https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=prion&dt=2024-02-01%2021%3A28%3A04

and I've seen similar failures intermittently on other machines.

I'd suggest building this test atop a table that is more stable
than pg_class. You're just waving a red flag in front of a bull
if you expect stable statistics from that during a regression run.
Nor do I see any particular reason for pg_class to be especially
suited to the test.

regards, tom lane

#134

Richard Guo

guofenglinux@gmail.com

almost 2 years ago

In reply to: Tom Lane (#133)

Re: POC: GROUP BY optimization

On Fri, Feb 2, 2024 at 10:02 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Alexander Korotkov <aekorotkov@gmail.com> writes:

I'm going to push this if there are no objections.

One of the test cases added by this commit has not been very
stable in the buildfarm. Latest example is here:

https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=prion&dt=2024-02-01%2021%3A28%3A04

and I've seen similar failures intermittently on other machines.

I'd suggest building this test atop a table that is more stable
than pg_class. You're just waving a red flag in front of a bull
if you expect stable statistics from that during a regression run.
Nor do I see any particular reason for pg_class to be especially
suited to the test.

Yeah, it's not a good practice to use pg_class in this place. While
looking through the test cases added by this commit, I noticed some
other minor issues that are not great. Such as

* The table 'btg' is inserted with 10000 tuples, which seems a bit
expensive for a test. I don't think we need such a big table to test
what we want.

* I don't see why we need to manipulate GUC max_parallel_workers and
max_parallel_workers_per_gather.

* I think we'd better write the tests with the keywords being all upper
or all lower. A mixed use of upper and lower is not great. Such as in

explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;

* Some comments for the test queries are not easy to read.

* For this statement

CREATE INDEX idx_y_x_z ON btg(y,x,w);

I think the index name would cause confusion. It creates an index on
columns y, x and w, but the name indicates an index on y, x and z.

I'd like to write a draft patch for the fixes.

Thanks
Richard

#135

Andrei Lepikhov

a.lepikhov@postgrespro.ru

almost 2 years ago

In reply to: Tom Lane (#133)

1 attachment(s)

Re: POC: GROUP BY optimization

On 2/2/2024 09:02, Tom Lane wrote:

Alexander Korotkov <aekorotkov@gmail.com> writes:

I'm going to push this if there are no objections.

One of the test cases added by this commit has not been very
stable in the buildfarm. Latest example is here:

https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=prion&dt=2024-02-01%2021%3A28%3A04

and I've seen similar failures intermittently on other machines.

I'd suggest building this test atop a table that is more stable
than pg_class. You're just waving a red flag in front of a bull
if you expect stable statistics from that during a regression run.
Nor do I see any particular reason for pg_class to be especially
suited to the test.

Yeah, It is my fault. Please, see in the attachment the patch fixing that.
--
regards,
Andrei Lepikhov
Postgres Professional

Attachments:

0001-Replace-the-GROUP-BY-optimization-test-with-the-same.patchtext/plain; charset=UTF-8; name=0001-Replace-the-GROUP-BY-optimization-test-with-the-same.patchDownload

From 11a049d95ee48e38ad569aab7663d8de91f946ad Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Fri, 2 Feb 2024 10:39:55 +0700
Subject: [PATCH] Replace the GROUP-BY optimization test with the same based on
 something less volatile when the pg_class relation.

---
 src/test/regress/expected/aggregates.out | 32 +++++++++++-------------
 src/test/regress/sql/aggregates.sql      |  9 +++----
 2 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 7a73c19314..c2e1b8c9ed 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2873,7 +2873,6 @@ SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
 (6 rows)
 
 RESET enable_incremental_sort;
-DROP TABLE btg;
 -- The case, when scanning sort order correspond to aggregate sort order but
 -- can not be found in the group-by list
 CREATE TABLE agg_sort_order (c1 int PRIMARY KEY, c2 int);
@@ -2901,32 +2900,31 @@ DROP TABLE agg_sort_order CASCADE;
 SET enable_hashjoin = off;
 SET enable_nestloop = off;
 explain (COSTS OFF)
-SELECT c1.relname,c1.relpages
-FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
-GROUP BY c1.reltuples,c1.relpages,c1.relname
-ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
-                                         QUERY PLAN                                          
----------------------------------------------------------------------------------------------
+SELECT b1.x,b1.w FROM btg b1 JOIN btg b2 ON (b1.z=b2.z AND b1.w=b2.w)
+GROUP BY b1.x,b1.z,b1.w ORDER BY b1.z, b1.w, b1.x*b1.x;
+                            QUERY PLAN                             
+-------------------------------------------------------------------
  Incremental Sort
-   Sort Key: c1.relpages, c1.relname, ((c1.relpages * c1.relpages))
-   Presorted Key: c1.relpages, c1.relname
+   Sort Key: b1.z, b1.w, ((b1.x * b1.x))
+   Presorted Key: b1.z, b1.w
    ->  Group
-         Group Key: c1.relpages, c1.relname, c1.reltuples
+         Group Key: b1.z, b1.w, b1.x
          ->  Incremental Sort
-               Sort Key: c1.relpages, c1.relname, c1.reltuples
-               Presorted Key: c1.relpages, c1.relname
+               Sort Key: b1.z, b1.w, b1.x
+               Presorted Key: b1.z, b1.w
                ->  Merge Join
-                     Merge Cond: ((c1.relpages = c2.relpages) AND (c1.relname = c2.relname))
+                     Merge Cond: ((b1.z = b2.z) AND (b1.w = b2.w))
                      ->  Sort
-                           Sort Key: c1.relpages, c1.relname
-                           ->  Seq Scan on pg_class c1
+                           Sort Key: b1.z, b1.w
+                           ->  Seq Scan on btg b1
                      ->  Sort
-                           Sort Key: c2.relpages, c2.relname
-                           ->  Seq Scan on pg_class c2
+                           Sort Key: b2.z, b2.w
+                           ->  Seq Scan on btg b2
 (16 rows)
 
 RESET enable_hashjoin;
 RESET enable_nestloop;
+DROP TABLE btg;
 RESET enable_hashagg;
 RESET max_parallel_workers;
 RESET max_parallel_workers_per_gather;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 916dbf908f..3548fbb8db 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1229,8 +1229,6 @@ EXPLAIN (VERBOSE, COSTS OFF)
 SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
 RESET enable_incremental_sort;
 
-DROP TABLE btg;
-
 -- The case, when scanning sort order correspond to aggregate sort order but
 -- can not be found in the group-by list
 CREATE TABLE agg_sort_order (c1 int PRIMARY KEY, c2 int);
@@ -1245,13 +1243,12 @@ DROP TABLE agg_sort_order CASCADE;
 SET enable_hashjoin = off;
 SET enable_nestloop = off;
 explain (COSTS OFF)
-SELECT c1.relname,c1.relpages
-FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
-GROUP BY c1.reltuples,c1.relpages,c1.relname
-ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
+SELECT b1.x,b1.w FROM btg b1 JOIN btg b2 ON (b1.z=b2.z AND b1.w=b2.w)
+GROUP BY b1.x,b1.z,b1.w ORDER BY b1.z, b1.w, b1.x*b1.x;
 RESET enable_hashjoin;
 RESET enable_nestloop;
 
+DROP TABLE btg;
 RESET enable_hashagg;
 RESET max_parallel_workers;
 RESET max_parallel_workers_per_gather;
-- 
2.43.0

#136

Richard Guo

guofenglinux@gmail.com

almost 2 years ago

In reply to: Richard Guo (#134)

1 attachment(s)

Re: POC: GROUP BY optimization

On Fri, Feb 2, 2024 at 11:32 AM Richard Guo <guofenglinux@gmail.com> wrote:

On Fri, Feb 2, 2024 at 10:02 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

One of the test cases added by this commit has not been very
stable in the buildfarm. Latest example is here:

https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=prion&dt=2024-02-01%2021%3A28%3A04

and I've seen similar failures intermittently on other machines.

I'd suggest building this test atop a table that is more stable
than pg_class. You're just waving a red flag in front of a bull
if you expect stable statistics from that during a regression run.
Nor do I see any particular reason for pg_class to be especially
suited to the test.

Yeah, it's not a good practice to use pg_class in this place. While
looking through the test cases added by this commit, I noticed some
other minor issues that are not great. Such as

* The table 'btg' is inserted with 10000 tuples, which seems a bit
expensive for a test. I don't think we need such a big table to test
what we want.

* I don't see why we need to manipulate GUC max_parallel_workers and
max_parallel_workers_per_gather.

* I think we'd better write the tests with the keywords being all upper
or all lower. A mixed use of upper and lower is not great. Such as in

explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;

* Some comments for the test queries are not easy to read.

* For this statement

CREATE INDEX idx_y_x_z ON btg(y,x,w);

I think the index name would cause confusion. It creates an index on
columns y, x and w, but the name indicates an index on y, x and z.

I'd like to write a draft patch for the fixes.

Here is the draft patch that fixes the issues I complained about in
upthread.

Thanks
Richard

Attachments:

v1-0001-Multiple-revises-for-the-GROUP-BY-reordering-tests.patchapplication/octet-stream; name=v1-0001-Multiple-revises-for-the-GROUP-BY-reordering-tests.patchDownload

From f14c3ae49042608aa43f01f7b95d738e793b4418 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Fri, 2 Feb 2024 12:01:10 +0800
Subject: [PATCH v1] Multiple revises for the GROUP BY reordering tests

---
 src/test/regress/expected/aggregates.out | 200 ++++++++---------------
 src/test/regress/sql/aggregates.sql      |  92 +++++------
 2 files changed, 113 insertions(+), 179 deletions(-)

diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 7a73c19314..9fb68fc280 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2728,29 +2728,20 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
--- GROUP BY optimization by reorder columns
+-- GROUP BY optimization by reordering GROUP BY clauses
 CREATE TABLE btg AS SELECT
-  i % 100 AS x,
-  i % 100 AS y,
+  i % 10 AS x,
+  i % 10 AS y,
   'abc' || i % 10 AS z,
   i AS w
-FROM generate_series(1,10000) AS i;
-CREATE INDEX btg_x_y_idx ON btg(x,y);
+FROM generate_series(1,1000) AS i;
+CREATE INDEX btg_x_y_idx ON btg(x, y);
 ANALYZE btg;
--- GROUP BY optimization by reorder columns by frequency
 SET enable_hashagg=off;
-SET max_parallel_workers= 0;
-SET max_parallel_workers_per_gather = 0;
--- Utilize index scan ordering to avoid a Sort operation
-EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
-                   QUERY PLAN                   
-------------------------------------------------
- GroupAggregate
-   Group Key: x, y
-   ->  Index Only Scan using btg_x_y_idx on btg
-(3 rows)
-
-EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+SET enable_seqscan=off;
+-- Utilize the ordering of index scan to avoid a Sort operation
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY y, x;
                    QUERY PLAN                   
 ------------------------------------------------
  GroupAggregate
@@ -2759,21 +2750,11 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
 (3 rows)
 
 -- Engage incremental sort
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
-                   QUERY PLAN                    
--------------------------------------------------
- Group
-   Group Key: x, y, z, w
-   ->  Incremental Sort
-         Sort Key: x, y, z, w
-         Presorted Key: x, y
-         ->  Index Scan using btg_x_y_idx on btg
-(6 rows)
-
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY z, y, w, x;
                    QUERY PLAN                    
 -------------------------------------------------
- Group
+ GroupAggregate
    Group Key: x, y, z, w
    ->  Incremental Sort
          Sort Key: x, y, z, w
@@ -2781,35 +2762,13 @@ explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
          ->  Index Scan using btg_x_y_idx on btg
 (6 rows)
 
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
-                   QUERY PLAN                    
--------------------------------------------------
- Group
-   Group Key: x, y, w, z
-   ->  Incremental Sort
-         Sort Key: x, y, w, z
-         Presorted Key: x, y
-         ->  Index Scan using btg_x_y_idx on btg
-(6 rows)
-
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
-                   QUERY PLAN                    
--------------------------------------------------
- Group
-   Group Key: x, y, w, z
-   ->  Incremental Sort
-         Sort Key: x, y, w, z
-         Presorted Key: x, y
-         ->  Index Scan using btg_x_y_idx on btg
-(6 rows)
-
--- Subqueries
-explain (COSTS OFF) SELECT x,y
-FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
-GROUP BY (w,x,z,y);
+-- Utilize the ordering of subquery scan to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*)
+FROM (SELECT * FROM btg ORDER BY x, y, w, z) AS q1
+GROUP BY w, x, z, y;
                    QUERY PLAN                    
 -------------------------------------------------
- Group
+ GroupAggregate
    Group Key: btg.x, btg.y, btg.w, btg.z
    ->  Incremental Sort
          Sort Key: btg.x, btg.y, btg.w, btg.z
@@ -2817,38 +2776,52 @@ GROUP BY (w,x,z,y);
          ->  Index Scan using btg_x_y_idx on btg
 (6 rows)
 
-explain (COSTS OFF) SELECT x,y
-FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
-GROUP BY (w,x,z,y);
-                      QUERY PLAN                       
--------------------------------------------------------
- Group
-   Group Key: btg.x, btg.y, btg.w, btg.z
-   ->  Limit
-         ->  Incremental Sort
-               Sort Key: btg.x, btg.y, btg.w, btg.z
-               Presorted Key: btg.x, btg.y
-               ->  Index Scan using btg_x_y_idx on btg
-(7 rows)
+-- Utilize the ordering of merge join to avoid a full-Sort operation
+SET enable_hashjoin=off;
+SET enable_nestloop=off;
+EXPLAIN (COSTS OFF)
+SELECT count(*)
+  FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x
+  GROUP BY t1.x, t1.y, t1.z, t1.w;
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ GroupAggregate
+   Group Key: t1.z, t1.w, t1.x, t1.y
+   ->  Incremental Sort
+         Sort Key: t1.z, t1.w, t1.x, t1.y
+         Presorted Key: t1.z, t1.w, t1.x
+         ->  Merge Join
+               Merge Cond: ((t1.z = t2.z) AND (t1.w = t2.w) AND (t1.x = t2.x))
+               ->  Sort
+                     Sort Key: t1.z, t1.w, t1.x
+                     ->  Index Scan using btg_x_y_idx on btg t1
+               ->  Sort
+                     Sort Key: t2.z, t2.w, t2.x
+                     ->  Index Scan using btg_x_y_idx on btg t2
+(13 rows)
 
+RESET enable_nestloop;
+RESET enable_hashjoin;
 -- Should work with and without GROUP-BY optimization
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
-          QUERY PLAN          
-------------------------------
- Group
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY w, x, z, y ORDER BY y, x, z, w;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
    Group Key: y, x, z, w
    ->  Sort
          Sort Key: y, x, z, w
-         ->  Seq Scan on btg
+         ->  Index Scan using btg_x_y_idx on btg
 (5 rows)
 
 -- Utilize incremental sort to make the ORDER BY rule a bit cheaper
-explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY w, x, y, z ORDER BY x*x, z;
                       QUERY PLAN                       
 -------------------------------------------------------
  Sort
    Sort Key: ((x * x)), z
-   ->  Group
+   ->  GroupAggregate
          Group Key: x, y, w, z
          ->  Incremental Sort
                Sort Key: x, y, w, z
@@ -2856,80 +2829,47 @@ explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
                ->  Index Scan using btg_x_y_idx on btg
 (8 rows)
 
-SET enable_incremental_sort = off;
--- The case when the number of incoming subtree path keys is more than
+-- Test the case where the number of incoming subtree path keys is more than
 -- the number of grouping keys.
-CREATE INDEX idx_y_x_z ON btg(y,x,w);
+CREATE INDEX btg_y_x_w_idx ON btg(y, x, w);
 EXPLAIN (VERBOSE, COSTS OFF)
-SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
-                     QUERY PLAN                      
------------------------------------------------------
+SELECT y, x, array_agg(distinct w)
+  FROM btg WHERE y < 0 GROUP BY x, y;
+                       QUERY PLAN                        
+---------------------------------------------------------
  GroupAggregate
    Output: y, x, array_agg(DISTINCT w)
    Group Key: btg.y, btg.x
-   ->  Index Only Scan using idx_y_x_z on public.btg
+   ->  Index Only Scan using btg_y_x_w_idx on public.btg
          Output: y, x, w
          Index Cond: (btg.y < 0)
 (6 rows)
 
-RESET enable_incremental_sort;
 DROP TABLE btg;
--- The case, when scanning sort order correspond to aggregate sort order but
--- can not be found in the group-by list
+-- Test the case where the the ordering of scan matches the ordering within the
+-- aggregate but cannot be found in the group-by list
 CREATE TABLE agg_sort_order (c1 int PRIMARY KEY, c2 int);
-CREATE UNIQUE INDEX ON agg_sort_order(c2);
-explain (costs off)
+CREATE UNIQUE INDEX agg_sort_order_c2_idx ON agg_sort_order(c2);
+INSERT INTO agg_sort_order SELECT i, i FROM generate_series(1,100)i;
+ANALYZE agg_sort_order;
+EXPLAIN (COSTS OFF)
 SELECT array_agg(c1 ORDER BY c2),c2
 FROM agg_sort_order WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
-                             QUERY PLAN                             
---------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Sort
    Sort Key: c2
    ->  GroupAggregate
          Group Key: c1
          ->  Sort
                Sort Key: c1, c2
-               ->  Bitmap Heap Scan on agg_sort_order
-                     Recheck Cond: (c2 < 100)
-                     ->  Bitmap Index Scan on agg_sort_order_c2_idx
-                           Index Cond: (c2 < 100)
-(10 rows)
+               ->  Index Scan using agg_sort_order_c2_idx on agg_sort_order
+                     Index Cond: (c2 < 100)
+(8 rows)
 
 DROP TABLE agg_sort_order CASCADE;
--- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
--- by planner itself. For example, by MergeJoin.
-SET enable_hashjoin = off;
-SET enable_nestloop = off;
-explain (COSTS OFF)
-SELECT c1.relname,c1.relpages
-FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
-GROUP BY c1.reltuples,c1.relpages,c1.relname
-ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
-                                         QUERY PLAN                                          
----------------------------------------------------------------------------------------------
- Incremental Sort
-   Sort Key: c1.relpages, c1.relname, ((c1.relpages * c1.relpages))
-   Presorted Key: c1.relpages, c1.relname
-   ->  Group
-         Group Key: c1.relpages, c1.relname, c1.reltuples
-         ->  Incremental Sort
-               Sort Key: c1.relpages, c1.relname, c1.reltuples
-               Presorted Key: c1.relpages, c1.relname
-               ->  Merge Join
-                     Merge Cond: ((c1.relpages = c2.relpages) AND (c1.relname = c2.relname))
-                     ->  Sort
-                           Sort Key: c1.relpages, c1.relname
-                           ->  Seq Scan on pg_class c1
-                     ->  Sort
-                           Sort Key: c2.relpages, c2.relname
-                           ->  Seq Scan on pg_class c2
-(16 rows)
-
-RESET enable_hashjoin;
-RESET enable_nestloop;
 RESET enable_hashagg;
-RESET max_parallel_workers;
-RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 916dbf908f..77cddad4fc 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1181,80 +1181,74 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
--- GROUP BY optimization by reorder columns
+-- GROUP BY optimization by reordering GROUP BY clauses
 CREATE TABLE btg AS SELECT
-  i % 100 AS x,
-  i % 100 AS y,
+  i % 10 AS x,
+  i % 10 AS y,
   'abc' || i % 10 AS z,
   i AS w
-FROM generate_series(1,10000) AS i;
-CREATE INDEX btg_x_y_idx ON btg(x,y);
+FROM generate_series(1,1000) AS i;
+CREATE INDEX btg_x_y_idx ON btg(x, y);
 ANALYZE btg;
 
--- GROUP BY optimization by reorder columns by frequency
-
 SET enable_hashagg=off;
-SET max_parallel_workers= 0;
-SET max_parallel_workers_per_gather = 0;
+SET enable_seqscan=off;
 
--- Utilize index scan ordering to avoid a Sort operation
-EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
-EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+-- Utilize the ordering of index scan to avoid a Sort operation
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY y, x;
 
 -- Engage incremental sort
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
-
--- Subqueries
-explain (COSTS OFF) SELECT x,y
-FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
-GROUP BY (w,x,z,y);
-explain (COSTS OFF) SELECT x,y
-FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
-GROUP BY (w,x,z,y);
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY z, y, w, x;
+
+-- Utilize the ordering of subquery scan to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*)
+FROM (SELECT * FROM btg ORDER BY x, y, w, z) AS q1
+GROUP BY w, x, z, y;
+
+-- Utilize the ordering of merge join to avoid a full-Sort operation
+SET enable_hashjoin=off;
+SET enable_nestloop=off;
+EXPLAIN (COSTS OFF)
+SELECT count(*)
+  FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x
+  GROUP BY t1.x, t1.y, t1.z, t1.w;
+RESET enable_nestloop;
+RESET enable_hashjoin;
 
 -- Should work with and without GROUP-BY optimization
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY w, x, z, y ORDER BY y, x, z, w;
 
 -- Utilize incremental sort to make the ORDER BY rule a bit cheaper
-explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY w, x, y, z ORDER BY x*x, z;
 
-SET enable_incremental_sort = off;
--- The case when the number of incoming subtree path keys is more than
+-- Test the case where the number of incoming subtree path keys is more than
 -- the number of grouping keys.
-CREATE INDEX idx_y_x_z ON btg(y,x,w);
+CREATE INDEX btg_y_x_w_idx ON btg(y, x, w);
 EXPLAIN (VERBOSE, COSTS OFF)
-SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
-RESET enable_incremental_sort;
+SELECT y, x, array_agg(distinct w)
+  FROM btg WHERE y < 0 GROUP BY x, y;
 
 DROP TABLE btg;
 
--- The case, when scanning sort order correspond to aggregate sort order but
--- can not be found in the group-by list
+-- Test the case where the the ordering of scan matches the ordering within the
+-- aggregate but cannot be found in the group-by list
 CREATE TABLE agg_sort_order (c1 int PRIMARY KEY, c2 int);
-CREATE UNIQUE INDEX ON agg_sort_order(c2);
-explain (costs off)
+CREATE UNIQUE INDEX agg_sort_order_c2_idx ON agg_sort_order(c2);
+INSERT INTO agg_sort_order SELECT i, i FROM generate_series(1,100)i;
+ANALYZE agg_sort_order;
+
+EXPLAIN (COSTS OFF)
 SELECT array_agg(c1 ORDER BY c2),c2
 FROM agg_sort_order WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
-DROP TABLE agg_sort_order CASCADE;
 
--- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
--- by planner itself. For example, by MergeJoin.
-SET enable_hashjoin = off;
-SET enable_nestloop = off;
-explain (COSTS OFF)
-SELECT c1.relname,c1.relpages
-FROM pg_class c1 JOIN pg_class c2 ON (c1.relname=c2.relname AND c1.relpages=c2.relpages)
-GROUP BY c1.reltuples,c1.relpages,c1.relname
-ORDER BY c1.relpages, c1.relname, c1.relpages*c1.relpages;
-RESET enable_hashjoin;
-RESET enable_nestloop;
+DROP TABLE agg_sort_order CASCADE;
 
 RESET enable_hashagg;
-RESET max_parallel_workers;
-RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
 
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
-- 
2.31.0

#137

Andrei Lepikhov

a.lepikhov@postgrespro.ru

almost 2 years ago

In reply to: Richard Guo (#136)

Re: POC: GROUP BY optimization

On 2/2/2024 11:06, Richard Guo wrote:

On Fri, Feb 2, 2024 at 11:32 AM Richard Guo <guofenglinux@gmail.com
<mailto:guofenglinux@gmail.com>> wrote:

On Fri, Feb 2, 2024 at 10:02 AM Tom Lane <tgl@sss.pgh.pa.us
<mailto:tgl@sss.pgh.pa.us>> wrote:

One of the test cases added by this commit has not been very
stable in the buildfarm. Latest example is here:

https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=prion&dt=2024-02-01%2021%3A28%3A04 <https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=prion&dt=2024-02-01%2021%3A28%3A04>

and I've seen similar failures intermittently on other machines.

I'd suggest building this test atop a table that is more stable
than pg_class. You're just waving a red flag in front of a bull
if you expect stable statistics from that during a regression run.
Nor do I see any particular reason for pg_class to be especially
suited to the test.

Yeah, it's not a good practice to use pg_class in this place. While
looking through the test cases added by this commit, I noticed some
other minor issues that are not great. Such as

* The table 'btg' is inserted with 10000 tuples, which seems a bit
expensive for a test. I don't think we need such a big table to test
what we want.

* I don't see why we need to manipulate GUC max_parallel_workers and
max_parallel_workers_per_gather.

* I think we'd better write the tests with the keywords being all upper
or all lower. A mixed use of upper and lower is not great. Such as in

explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;

* Some comments for the test queries are not easy to read.

* For this statement

CREATE INDEX idx_y_x_z ON btg(y,x,w);

I think the index name would cause confusion. It creates an index on
columns y, x and w, but the name indicates an index on y, x and z.

I'd like to write a draft patch for the fixes.

Here is the draft patch that fixes the issues I complained about in
upthread.

I passed through the patch. Looks like it doesn't break anything. Why do
you prefer to use count(*) in EXPLAIN instead of plain targetlist, like
"SELECT x,y,..."?
Also, according to the test mentioned by Tom:
1. I see, PG uses IndexScan on (x,y). So, column x will be already
sorted before the MergeJoin. Why not use Incremental Sort on (x,z,w)
instead of full sort?
2. For memo, IMO, this test shows us the future near perspective of this
feature: It is cheaper to use grouping order (w,z) instead of (z,w).

--
regards,
Andrei Lepikhov
Postgres Professional

#138

Richard Guo

guofenglinux@gmail.com

almost 2 years ago

In reply to: Andrei Lepikhov (#137)

Re: POC: GROUP BY optimization

On Fri, Feb 2, 2024 at 12:40 PM Andrei Lepikhov <a.lepikhov@postgrespro.ru>
wrote:

On 2/2/2024 11:06, Richard Guo wrote:

On Fri, Feb 2, 2024 at 11:32 AM Richard Guo <guofenglinux@gmail.com
<mailto:guofenglinux@gmail.com>> wrote:

On Fri, Feb 2, 2024 at 10:02 AM Tom Lane <tgl@sss.pgh.pa.us
<mailto:tgl@sss.pgh.pa.us>> wrote:

One of the test cases added by this commit has not been very
stable in the buildfarm. Latest example is here:

https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=prion&dt=2024-02-01%2021%3A28%3A04
<
https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=prion&dt=2024-02-01%2021%3A28%3A04

and I've seen similar failures intermittently on other machines.

I'd suggest building this test atop a table that is more stable
than pg_class. You're just waving a red flag in front of a bull
if you expect stable statistics from that during a regression

run.

Nor do I see any particular reason for pg_class to be especially
suited to the test.

Yeah, it's not a good practice to use pg_class in this place. While
looking through the test cases added by this commit, I noticed some
other minor issues that are not great. Such as

* The table 'btg' is inserted with 10000 tuples, which seems a bit
expensive for a test. I don't think we need such a big table to test
what we want.

* I don't see why we need to manipulate GUC max_parallel_workers and
max_parallel_workers_per_gather.

* I think we'd better write the tests with the keywords being all

upper

or all lower. A mixed use of upper and lower is not great. Such as

in

explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;

* Some comments for the test queries are not easy to read.

* For this statement

CREATE INDEX idx_y_x_z ON btg(y,x,w);

I think the index name would cause confusion. It creates an index on
columns y, x and w, but the name indicates an index on y, x and z.

I'd like to write a draft patch for the fixes.

Here is the draft patch that fixes the issues I complained about in
upthread.

I passed through the patch. Looks like it doesn't break anything. Why do
you prefer to use count(*) in EXPLAIN instead of plain targetlist, like
"SELECT x,y,..."?

Nothing special. Just making the test cases consistent as much as
possible.

Also, according to the test mentioned by Tom:
1. I see, PG uses IndexScan on (x,y). So, column x will be already
sorted before the MergeJoin. Why not use Incremental Sort on (x,z,w)
instead of full sort?

I think that's because the planner chooses to use (z, w, x) to perform
the mergejoin. I did not delve into the details, but I guess the cost
estimation decides this is cheaper.

Hi Alexander,

What do you think about the revisions for the test cases?

Thanks
Richard

#139

Alexander Korotkov

aekorotkov@gmail.com

almost 2 years ago

In reply to: Richard Guo (#138)

1 attachment(s)

Re: POC: GROUP BY optimization

Hi, Richard!

What do you think about the revisions for the test cases?

I've rebased your patch upthread. Did some minor beautifications.

* The table 'btg' is inserted with 10000 tuples, which seems a bit
expensive for a test. I don't think we need such a big table to test
what we want.

Your patch reduces the number of rows to 1000 tuples. I found it
possible to further reduce it to 100 tuples. That also allowed me to
save the plan in the test case introduced by e1b7fde418.

Please check if you're OK with the patch attached.

------
Regards,
Alexander Korotkov

Attachments:

0001-Multiple-revises-for-the-GROUP-BY-reordering-test-v2.patchapplication/octet-stream; name=0001-Multiple-revises-for-the-GROUP-BY-reordering-test-v2.patchDownload

From 3da12defe67efcb10ca719bd0bd64e2acf6b37fa Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Wed, 21 Feb 2024 11:43:00 +0200
Subject: [PATCH] Multiple revises for the GROUP BY reordering tests

Reported-by: Richard Guo
Discussion: https://postgr.es/m/CAMbWs4_NF0stupM8guasC_yCJxdTvRqm3a%2BJLarjvAxcH314TQ%40mail.gmail.com
---
 src/test/regress/expected/aggregates.out | 185 ++++++++++-------------
 src/test/regress/sql/aggregates.sql      |  90 ++++++-----
 2 files changed, 126 insertions(+), 149 deletions(-)

diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index dba6f230014..12460bcb7f5 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2728,29 +2728,20 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
--- GROUP BY optimization by reorder columns
+-- GROUP BY optimization by reordering GROUP BY clauses
 CREATE TABLE btg AS SELECT
-  i % 100 AS x,
-  i % 100 AS y,
+  i % 10 AS x,
+  i % 10 AS y,
   'abc' || i % 10 AS z,
   i AS w
-FROM generate_series(1,10000) AS i;
-CREATE INDEX btg_x_y_idx ON btg(x,y);
+FROM generate_series(1, 100) AS i;
+CREATE INDEX btg_x_y_idx ON btg(x, y);
 ANALYZE btg;
--- GROUP BY optimization by reorder columns by frequency
-SET enable_hashagg=off;
-SET max_parallel_workers= 0;
-SET max_parallel_workers_per_gather = 0;
--- Utilize index scan ordering to avoid a Sort operation
-EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
-                   QUERY PLAN                   
-------------------------------------------------
- GroupAggregate
-   Group Key: x, y
-   ->  Index Only Scan using btg_x_y_idx on btg
-(3 rows)
-
-EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+SET enable_hashagg = off;
+SET enable_seqscan = off;
+-- Utilize the ordering of index scan to avoid a Sort operation
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY y, x;
                    QUERY PLAN                   
 ------------------------------------------------
  GroupAggregate
@@ -2759,21 +2750,11 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
 (3 rows)
 
 -- Engage incremental sort
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
-                   QUERY PLAN                    
--------------------------------------------------
- Group
-   Group Key: x, y, z, w
-   ->  Incremental Sort
-         Sort Key: x, y, z, w
-         Presorted Key: x, y
-         ->  Index Scan using btg_x_y_idx on btg
-(6 rows)
-
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY z, y, w, x;
                    QUERY PLAN                    
 -------------------------------------------------
- Group
+ GroupAggregate
    Group Key: x, y, z, w
    ->  Incremental Sort
          Sort Key: x, y, z, w
@@ -2781,35 +2762,13 @@ explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
          ->  Index Scan using btg_x_y_idx on btg
 (6 rows)
 
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
-                   QUERY PLAN                    
--------------------------------------------------
- Group
-   Group Key: x, y, w, z
-   ->  Incremental Sort
-         Sort Key: x, y, w, z
-         Presorted Key: x, y
-         ->  Index Scan using btg_x_y_idx on btg
-(6 rows)
-
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
-                   QUERY PLAN                    
--------------------------------------------------
- Group
-   Group Key: x, y, w, z
-   ->  Incremental Sort
-         Sort Key: x, y, w, z
-         Presorted Key: x, y
-         ->  Index Scan using btg_x_y_idx on btg
-(6 rows)
-
--- Subqueries
-explain (COSTS OFF) SELECT x,y
-FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
-GROUP BY (w,x,z,y);
+-- Utilize the ordering of subquery scan to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*)
+FROM (SELECT * FROM btg ORDER BY x, y, w, z) AS q1
+GROUP BY w, x, z, y;
                    QUERY PLAN                    
 -------------------------------------------------
- Group
+ GroupAggregate
    Group Key: btg.x, btg.y, btg.w, btg.z
    ->  Incremental Sort
          Sort Key: btg.x, btg.y, btg.w, btg.z
@@ -2817,38 +2776,52 @@ GROUP BY (w,x,z,y);
          ->  Index Scan using btg_x_y_idx on btg
 (6 rows)
 
-explain (COSTS OFF) SELECT x,y
-FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
-GROUP BY (w,x,z,y);
-                      QUERY PLAN                       
--------------------------------------------------------
- Group
-   Group Key: btg.x, btg.y, btg.w, btg.z
-   ->  Limit
-         ->  Incremental Sort
-               Sort Key: btg.x, btg.y, btg.w, btg.z
-               Presorted Key: btg.x, btg.y
-               ->  Index Scan using btg_x_y_idx on btg
-(7 rows)
+-- Utilize the ordering of merge join to avoid a full Sort operation
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+EXPLAIN (COSTS OFF)
+SELECT count(*)
+  FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x
+  GROUP BY t1.x, t1.y, t1.z, t1.w;
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ GroupAggregate
+   Group Key: t1.z, t1.w, t1.x, t1.y
+   ->  Incremental Sort
+         Sort Key: t1.z, t1.w, t1.x, t1.y
+         Presorted Key: t1.z, t1.w, t1.x
+         ->  Merge Join
+               Merge Cond: ((t1.z = t2.z) AND (t1.w = t2.w) AND (t1.x = t2.x))
+               ->  Sort
+                     Sort Key: t1.z, t1.w, t1.x
+                     ->  Index Scan using btg_x_y_idx on btg t1
+               ->  Sort
+                     Sort Key: t2.z, t2.w, t2.x
+                     ->  Index Scan using btg_x_y_idx on btg t2
+(13 rows)
 
+RESET enable_nestloop;
+RESET enable_hashjoin;
 -- Should work with and without GROUP-BY optimization
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
-          QUERY PLAN          
-------------------------------
- Group
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY w, x, z, y ORDER BY y, x, z, w;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
    Group Key: y, x, z, w
    ->  Sort
          Sort Key: y, x, z, w
-         ->  Seq Scan on btg
+         ->  Index Scan using btg_x_y_idx on btg
 (5 rows)
 
 -- Utilize incremental sort to make the ORDER BY rule a bit cheaper
-explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY w, x, y, z ORDER BY x*x, z;
                       QUERY PLAN                       
 -------------------------------------------------------
  Sort
    Sort Key: ((x * x)), z
-   ->  Group
+   ->  GroupAggregate
          Group Key: x, y, w, z
          ->  Incremental Sort
                Sort Key: x, y, w, z
@@ -2856,23 +2829,22 @@ explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
                ->  Index Scan using btg_x_y_idx on btg
 (8 rows)
 
-SET enable_incremental_sort = off;
--- The case when the number of incoming subtree path keys is more than
+-- Test the case where the number of incoming subtree path keys is more than
 -- the number of grouping keys.
-CREATE INDEX idx_y_x_z ON btg(y,x,w);
+CREATE INDEX btg_y_x_w_idx ON btg(y, x, w);
 EXPLAIN (VERBOSE, COSTS OFF)
-SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
-                     QUERY PLAN                      
------------------------------------------------------
+SELECT y, x, array_agg(distinct w)
+  FROM btg WHERE y < 0 GROUP BY x, y;
+                       QUERY PLAN                        
+---------------------------------------------------------
  GroupAggregate
    Output: y, x, array_agg(DISTINCT w)
    Group Key: btg.y, btg.x
-   ->  Index Only Scan using idx_y_x_z on public.btg
+   ->  Index Only Scan using btg_y_x_w_idx on public.btg
          Output: y, x, w
          Index Cond: (btg.y < 0)
 (6 rows)
 
-RESET enable_incremental_sort;
 -- Check we don't pick aggregate path key instead of grouping path key
 CREATE TABLE group_agg_pk AS SELECT
   i % 10 AS x,
@@ -2897,37 +2869,37 @@ GROUP BY c1.w, c1.z;
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
--- The case, when scanning sort order correspond to aggregate sort order but
--- can not be found in the group-by list
+-- Test the case where the the ordering of scan matches the ordering within the
+-- aggregate but cannot be found in the group-by list
 CREATE TABLE agg_sort_order (c1 int PRIMARY KEY, c2 int);
-CREATE UNIQUE INDEX ON agg_sort_order(c2);
-explain (costs off)
+CREATE UNIQUE INDEX agg_sort_order_c2_idx ON agg_sort_order(c2);
+INSERT INTO agg_sort_order SELECT i, i FROM generate_series(1,100)i;
+ANALYZE agg_sort_order;
+EXPLAIN (COSTS OFF)
 SELECT array_agg(c1 ORDER BY c2),c2
 FROM agg_sort_order WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
-                             QUERY PLAN                             
---------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Sort
    Sort Key: c2
    ->  GroupAggregate
          Group Key: c1
          ->  Sort
                Sort Key: c1, c2
-               ->  Bitmap Heap Scan on agg_sort_order
-                     Recheck Cond: (c2 < 100)
-                     ->  Bitmap Index Scan on agg_sort_order_c2_idx
-                           Index Cond: (c2 < 100)
-(10 rows)
+               ->  Index Scan using agg_sort_order_c2_idx on agg_sort_order
+                     Index Cond: (c2 < 100)
+(8 rows)
 
 DROP TABLE agg_sort_order CASCADE;
 -- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
--- by planner itself. For example, by MergeJoin.
+-- by planner itself.  For example, by MergeJoin.
 SET enable_hashjoin = off;
 SET enable_nestloop = off;
-explain (COSTS OFF)
-SELECT b1.x,b1.w FROM btg b1 JOIN btg b2 ON (b1.z=b2.z AND b1.w=b2.w)
-GROUP BY b1.x,b1.z,b1.w ORDER BY b1.z, b1.w, b1.x*b1.x;
-                            QUERY PLAN                             
--------------------------------------------------------------------
+EXPLAIN (COSTS OFF)
+SELECT b1.x, b1.w FROM btg b1 JOIN btg b2 ON (b1.z = b2.z AND b1.w = b2.w)
+GROUP BY b1.x, b1.z, b1.w ORDER BY b1.z, b1.w, b1.x*b1.x;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
  Incremental Sort
    Sort Key: b1.z, b1.w, ((b1.x * b1.x))
    Presorted Key: b1.z, b1.w
@@ -2940,7 +2912,7 @@ GROUP BY b1.x,b1.z,b1.w ORDER BY b1.z, b1.w, b1.x*b1.x;
                      Merge Cond: ((b1.z = b2.z) AND (b1.w = b2.w))
                      ->  Sort
                            Sort Key: b1.z, b1.w
-                           ->  Seq Scan on btg b1
+                           ->  Index Scan using btg_x_y_idx on btg b1
                      ->  Sort
                            Sort Key: b2.z, b2.w
                            ->  Seq Scan on btg b2
@@ -2950,8 +2922,7 @@ RESET enable_hashjoin;
 RESET enable_nestloop;
 DROP TABLE btg;
 RESET enable_hashagg;
-RESET max_parallel_workers;
-RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index d6ed5d0effa..3c75c2aaa65 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1181,53 +1181,56 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
--- GROUP BY optimization by reorder columns
+-- GROUP BY optimization by reordering GROUP BY clauses
 CREATE TABLE btg AS SELECT
-  i % 100 AS x,
-  i % 100 AS y,
+  i % 10 AS x,
+  i % 10 AS y,
   'abc' || i % 10 AS z,
   i AS w
-FROM generate_series(1,10000) AS i;
-CREATE INDEX btg_x_y_idx ON btg(x,y);
+FROM generate_series(1, 100) AS i;
+CREATE INDEX btg_x_y_idx ON btg(x, y);
 ANALYZE btg;
 
--- GROUP BY optimization by reorder columns by frequency
-
-SET enable_hashagg=off;
-SET max_parallel_workers= 0;
-SET max_parallel_workers_per_gather = 0;
+SET enable_hashagg = off;
+SET enable_seqscan = off;
 
--- Utilize index scan ordering to avoid a Sort operation
-EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
-EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+-- Utilize the ordering of index scan to avoid a Sort operation
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY y, x;
 
 -- Engage incremental sort
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
-
--- Subqueries
-explain (COSTS OFF) SELECT x,y
-FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
-GROUP BY (w,x,z,y);
-explain (COSTS OFF) SELECT x,y
-FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
-GROUP BY (w,x,z,y);
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY z, y, w, x;
+
+-- Utilize the ordering of subquery scan to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*)
+FROM (SELECT * FROM btg ORDER BY x, y, w, z) AS q1
+GROUP BY w, x, z, y;
+
+-- Utilize the ordering of merge join to avoid a full Sort operation
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+EXPLAIN (COSTS OFF)
+SELECT count(*)
+  FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x
+  GROUP BY t1.x, t1.y, t1.z, t1.w;
+RESET enable_nestloop;
+RESET enable_hashjoin;
 
 -- Should work with and without GROUP-BY optimization
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY w, x, z, y ORDER BY y, x, z, w;
 
 -- Utilize incremental sort to make the ORDER BY rule a bit cheaper
-explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY w, x, y, z ORDER BY x*x, z;
 
-SET enable_incremental_sort = off;
--- The case when the number of incoming subtree path keys is more than
+-- Test the case where the number of incoming subtree path keys is more than
 -- the number of grouping keys.
-CREATE INDEX idx_y_x_z ON btg(y,x,w);
+CREATE INDEX btg_y_x_w_idx ON btg(y, x, w);
 EXPLAIN (VERBOSE, COSTS OFF)
-SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
-RESET enable_incremental_sort;
+SELECT y, x, array_agg(distinct w)
+  FROM btg WHERE y < 0 GROUP BY x, y;
 
 -- Check we don't pick aggregate path key instead of grouping path key
 CREATE TABLE group_agg_pk AS SELECT
@@ -1248,29 +1251,32 @@ RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
 
--- The case, when scanning sort order correspond to aggregate sort order but
--- can not be found in the group-by list
+-- Test the case where the the ordering of scan matches the ordering within the
+-- aggregate but cannot be found in the group-by list
 CREATE TABLE agg_sort_order (c1 int PRIMARY KEY, c2 int);
-CREATE UNIQUE INDEX ON agg_sort_order(c2);
-explain (costs off)
+CREATE UNIQUE INDEX agg_sort_order_c2_idx ON agg_sort_order(c2);
+INSERT INTO agg_sort_order SELECT i, i FROM generate_series(1,100)i;
+ANALYZE agg_sort_order;
+
+EXPLAIN (COSTS OFF)
 SELECT array_agg(c1 ORDER BY c2),c2
 FROM agg_sort_order WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
+
 DROP TABLE agg_sort_order CASCADE;
 
 -- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
--- by planner itself. For example, by MergeJoin.
+-- by planner itself.  For example, by MergeJoin.
 SET enable_hashjoin = off;
 SET enable_nestloop = off;
-explain (COSTS OFF)
-SELECT b1.x,b1.w FROM btg b1 JOIN btg b2 ON (b1.z=b2.z AND b1.w=b2.w)
-GROUP BY b1.x,b1.z,b1.w ORDER BY b1.z, b1.w, b1.x*b1.x;
+EXPLAIN (COSTS OFF)
+SELECT b1.x, b1.w FROM btg b1 JOIN btg b2 ON (b1.z = b2.z AND b1.w = b2.w)
+GROUP BY b1.x, b1.z, b1.w ORDER BY b1.z, b1.w, b1.x*b1.x;
 RESET enable_hashjoin;
 RESET enable_nestloop;
 
 DROP TABLE btg;
 RESET enable_hashagg;
-RESET max_parallel_workers;
-RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
 
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
-- 
2.39.3 (Apple Git-145)

#140

Maxim Orlov

orlovmg@gmail.com

almost 2 years ago

In reply to: Alexander Korotkov (#139)

1 attachment(s)

Re: POC: GROUP BY optimization

Hi!

Another issue on test introduced in 0452b461bc405. I think it may be
unstable in some circumstances.
For example, if we'll try to use different BLCKSZ. See, I've made a little
change in the number of tuples to be inserted:

$ git diff
diff --git a/src/test/regress/sql/aggregates.sql
b/src/test/regress/sql/aggregates.sql
index d6ed5d0eff..414078d4ec 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1187,7 +1187,7 @@ CREATE TABLE btg AS SELECT
   i % 100 AS y,
   'abc' || i % 10 AS z,
   i AS w
-FROM generate_series(1,10000) AS i;
+FROM generate_series(1,11900) AS i;
 CREATE INDEX btg_x_y_idx ON btg(x,y);
 ANALYZE btg;

And the bulk extension is kicked, so we got zeroed pages in the relation.
The plane is also changed,
switched to seq scan from index scan:
@@ -2734,7 +2734,7 @@
   i % 100 AS y,
   'abc' || i % 10 AS z,
   i AS w
-FROM generate_series(1,10000) AS i;
+FROM generate_series(1,11900) AS i;
 CREATE INDEX btg_x_y_idx ON btg(x,y);
 ANALYZE btg;
 -- GROUP BY optimization by reorder columns by frequency
@@ -2760,62 +2760,57 @@

 -- Engage incremental sort
 explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
-                   QUERY PLAN
--------------------------------------------------
+          QUERY PLAN
+------------------------------
  Group
    Group Key: x, y, z, w
-   ->  Incremental Sort
+   ->  Sort
          Sort Key: x, y, z, w
-         Presorted Key: x, y
-         ->  Index Scan using btg_x_y_idx on btg
-(6 rows)
+         ->  Seq Scan on btg
+(5 rows)
... and so on.

So, my proposal is simple. I think we need not just "ANALYZE btg", but
"VACUUM ANALYZE btg", to get rid of zeroed pages in this particular
case. PFA corresponding patch.

--
Best regards,
Maxim Orlov.

Attachments:

0001-Force-VACUUM-to-avoid-zeroed-pages-from-bulk-extensi.patchapplication/octet-stream; name=0001-Force-VACUUM-to-avoid-zeroed-pages-from-bulk-extensi.patchDownload

From 4352e08c22ffc19d764e037967f164e8e58dba5d Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Wed, 21 Feb 2024 17:58:26 +0300
Subject: [PATCH] Force VACUUM to avoid zeroed pages from bulk extension mess
 with planning.

---
 src/test/regress/expected/aggregates.out | 2 +-
 src/test/regress/sql/aggregates.sql      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index dba6f23001..7f387e3023 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2736,7 +2736,7 @@ CREATE TABLE btg AS SELECT
   i AS w
 FROM generate_series(1,10000) AS i;
 CREATE INDEX btg_x_y_idx ON btg(x,y);
-ANALYZE btg;
+VACUUM ANALYZE btg;
 -- GROUP BY optimization by reorder columns by frequency
 SET enable_hashagg=off;
 SET max_parallel_workers= 0;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index d6ed5d0eff..31e73ec1f1 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1189,7 +1189,7 @@ CREATE TABLE btg AS SELECT
   i AS w
 FROM generate_series(1,10000) AS i;
 CREATE INDEX btg_x_y_idx ON btg(x,y);
-ANALYZE btg;
+VACUUM ANALYZE btg;
 
 -- GROUP BY optimization by reorder columns by frequency
 
-- 
2.43.0

#141

Richard Guo

guofenglinux@gmail.com

almost 2 years ago

In reply to: Alexander Korotkov (#139)

1 attachment(s)

Re: POC: GROUP BY optimization

On Wed, Feb 21, 2024 at 6:20 PM Alexander Korotkov <aekorotkov@gmail.com>
wrote:

Hi, Richard!

What do you think about the revisions for the test cases?

I've rebased your patch upthread. Did some minor beautifications.

* The table 'btg' is inserted with 10000 tuples, which seems a bit
expensive for a test. I don't think we need such a big table to test
what we want.

Your patch reduces the number of rows to 1000 tuples. I found it
possible to further reduce it to 100 tuples. That also allowed me to
save the plan in the test case introduced by e1b7fde418.

Please check if you're OK with the patch attached.

I looked through the v2 patch and have two comments.

* The test case under "Check we don't pick aggregate path key instead of
grouping path key" does not have EXPLAIN to show the plan. So how can
we ensure we do not mistakenly select the aggregate pathkeys instead of
the grouping pathkeys?

* I don't think the test case introduced by e1b7fde418 is still needed,
because we already have one under "Utilize the ordering of merge join to
avoid a full Sort operation". This kind of test case is just to ensure
that we are able to utilize the ordering of the subplans underneath. So
it should be parallel to the test cases for utilize the ordering of
index scan and subquery scan.

See the attached v3 patch. I also made cosmetic tweaks to the comments,
and simplified a test case query.

Thanks
Richard

Attachments:

v3-0001-Multiple-revisions-to-the-GROUP-BY-reordering-tests.patchapplication/octet-stream; name=v3-0001-Multiple-revisions-to-the-GROUP-BY-reordering-tests.patchDownload

From 13a383f16f9057ea8a96efe57221bb2458a456f0 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Thu, 22 Feb 2024 09:20:16 +0800
Subject: [PATCH v3] Multiple revisions to the GROUP BY reordering tests

Author: Richard Guo
---
 src/test/regress/expected/aggregates.out | 236 ++++++++++-------------
 src/test/regress/sql/aggregates.sql      | 107 +++++-----
 2 files changed, 154 insertions(+), 189 deletions(-)

diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index dba6f23001..f86cf8d258 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2728,29 +2728,20 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
--- GROUP BY optimization by reorder columns
+-- GROUP BY optimization by reordering GROUP BY clauses
 CREATE TABLE btg AS SELECT
-  i % 100 AS x,
-  i % 100 AS y,
+  i % 10 AS x,
+  i % 10 AS y,
   'abc' || i % 10 AS z,
   i AS w
-FROM generate_series(1,10000) AS i;
-CREATE INDEX btg_x_y_idx ON btg(x,y);
+FROM generate_series(1, 100) AS i;
+CREATE INDEX btg_x_y_idx ON btg(x, y);
 ANALYZE btg;
--- GROUP BY optimization by reorder columns by frequency
-SET enable_hashagg=off;
-SET max_parallel_workers= 0;
-SET max_parallel_workers_per_gather = 0;
--- Utilize index scan ordering to avoid a Sort operation
-EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
-                   QUERY PLAN                   
-------------------------------------------------
- GroupAggregate
-   Group Key: x, y
-   ->  Index Only Scan using btg_x_y_idx on btg
-(3 rows)
-
-EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+SET enable_hashagg = off;
+SET enable_seqscan = off;
+-- Utilize the ordering of index scan to avoid a Sort operation
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY y, x;
                    QUERY PLAN                   
 ------------------------------------------------
  GroupAggregate
@@ -2759,21 +2750,11 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
 (3 rows)
 
 -- Engage incremental sort
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY z, y, w, x;
                    QUERY PLAN                    
 -------------------------------------------------
- Group
-   Group Key: x, y, z, w
-   ->  Incremental Sort
-         Sort Key: x, y, z, w
-         Presorted Key: x, y
-         ->  Index Scan using btg_x_y_idx on btg
-(6 rows)
-
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
-                   QUERY PLAN                    
--------------------------------------------------
- Group
+ GroupAggregate
    Group Key: x, y, z, w
    ->  Incremental Sort
          Sort Key: x, y, z, w
@@ -2781,35 +2762,13 @@ explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
          ->  Index Scan using btg_x_y_idx on btg
 (6 rows)
 
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
-                   QUERY PLAN                    
--------------------------------------------------
- Group
-   Group Key: x, y, w, z
-   ->  Incremental Sort
-         Sort Key: x, y, w, z
-         Presorted Key: x, y
-         ->  Index Scan using btg_x_y_idx on btg
-(6 rows)
-
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+-- Utilize the ordering of subquery scan to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*)
+FROM (SELECT * FROM btg ORDER BY x, y, w, z) AS q1
+GROUP BY w, x, z, y;
                    QUERY PLAN                    
 -------------------------------------------------
- Group
-   Group Key: x, y, w, z
-   ->  Incremental Sort
-         Sort Key: x, y, w, z
-         Presorted Key: x, y
-         ->  Index Scan using btg_x_y_idx on btg
-(6 rows)
-
--- Subqueries
-explain (COSTS OFF) SELECT x,y
-FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
-GROUP BY (w,x,z,y);
-                   QUERY PLAN                    
--------------------------------------------------
- Group
+ GroupAggregate
    Group Key: btg.x, btg.y, btg.w, btg.z
    ->  Incremental Sort
          Sort Key: btg.x, btg.y, btg.w, btg.z
@@ -2817,38 +2776,52 @@ GROUP BY (w,x,z,y);
          ->  Index Scan using btg_x_y_idx on btg
 (6 rows)
 
-explain (COSTS OFF) SELECT x,y
-FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
-GROUP BY (w,x,z,y);
-                      QUERY PLAN                       
--------------------------------------------------------
- Group
-   Group Key: btg.x, btg.y, btg.w, btg.z
-   ->  Limit
-         ->  Incremental Sort
-               Sort Key: btg.x, btg.y, btg.w, btg.z
-               Presorted Key: btg.x, btg.y
-               ->  Index Scan using btg_x_y_idx on btg
-(7 rows)
+-- Utilize the ordering of merge join to avoid a full Sort operation
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+EXPLAIN (COSTS OFF)
+SELECT count(*)
+  FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x
+  GROUP BY t1.x, t1.y, t1.z, t1.w;
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ GroupAggregate
+   Group Key: t1.z, t1.w, t1.x, t1.y
+   ->  Incremental Sort
+         Sort Key: t1.z, t1.w, t1.x, t1.y
+         Presorted Key: t1.z, t1.w, t1.x
+         ->  Merge Join
+               Merge Cond: ((t1.z = t2.z) AND (t1.w = t2.w) AND (t1.x = t2.x))
+               ->  Sort
+                     Sort Key: t1.z, t1.w, t1.x
+                     ->  Index Scan using btg_x_y_idx on btg t1
+               ->  Sort
+                     Sort Key: t2.z, t2.w, t2.x
+                     ->  Index Scan using btg_x_y_idx on btg t2
+(13 rows)
 
+RESET enable_nestloop;
+RESET enable_hashjoin;
 -- Should work with and without GROUP-BY optimization
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
-          QUERY PLAN          
-------------------------------
- Group
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY w, x, z, y ORDER BY y, x, z, w;
+                   QUERY PLAN                    
+-------------------------------------------------
+ GroupAggregate
    Group Key: y, x, z, w
    ->  Sort
          Sort Key: y, x, z, w
-         ->  Seq Scan on btg
+         ->  Index Scan using btg_x_y_idx on btg
 (5 rows)
 
 -- Utilize incremental sort to make the ORDER BY rule a bit cheaper
-explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY w, x, y, z ORDER BY x*x, z;
                       QUERY PLAN                       
 -------------------------------------------------------
  Sort
    Sort Key: ((x * x)), z
-   ->  Group
+   ->  GroupAggregate
          Group Key: x, y, w, z
          ->  Incremental Sort
                Sort Key: x, y, w, z
@@ -2856,24 +2829,24 @@ explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
                ->  Index Scan using btg_x_y_idx on btg
 (8 rows)
 
-SET enable_incremental_sort = off;
--- The case when the number of incoming subtree path keys is more than
+-- Test the case where the number of incoming subtree path keys is more than
 -- the number of grouping keys.
-CREATE INDEX idx_y_x_z ON btg(y,x,w);
+CREATE INDEX btg_y_x_w_idx ON btg(y, x, w);
 EXPLAIN (VERBOSE, COSTS OFF)
-SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
-                     QUERY PLAN                      
------------------------------------------------------
+SELECT y, x, array_agg(distinct w)
+  FROM btg WHERE y < 0 GROUP BY x, y;
+                       QUERY PLAN                        
+---------------------------------------------------------
  GroupAggregate
    Output: y, x, array_agg(DISTINCT w)
    Group Key: btg.y, btg.x
-   ->  Index Only Scan using idx_y_x_z on public.btg
+   ->  Index Only Scan using btg_y_x_w_idx on public.btg
          Output: y, x, w
          Index Cond: (btg.y < 0)
 (6 rows)
 
-RESET enable_incremental_sort;
--- Check we don't pick aggregate path key instead of grouping path key
+-- Ensure that we do not select the aggregate pathkeys instead of the grouping
+-- pathkeys
 CREATE TABLE group_agg_pk AS SELECT
   i % 10 AS x,
   i % 2 AS y,
@@ -2884,74 +2857,63 @@ FROM generate_series(1,100) AS i;
 ANALYZE group_agg_pk;
 SET enable_nestloop = off;
 SET enable_hashjoin = off;
-SELECT
-  c1.z, c1.w, string_agg(''::text, repeat(''::text, c1.f) ORDER BY c1.x,c1.y)
-FROM group_agg_pk c1 JOIN group_agg_pk c2 ON (c1.x = c2.f)
+EXPLAIN (COSTS OFF)
+SELECT avg(c1.f ORDER BY c1.x, c1.y)
+FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
 GROUP BY c1.w, c1.z;
- z | w | string_agg 
----+---+------------
- 0 | 2 | 
- 1 | 2 | 
+                     QUERY PLAN                      
+-----------------------------------------------------
+ GroupAggregate
+   Group Key: c1.w, c1.z
+   ->  Sort
+         Sort Key: c1.w, c1.z, c1.x, c1.y
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(12 rows)
+
+SELECT avg(c1.f ORDER BY c1.x, c1.y)
+FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
+GROUP BY c1.w, c1.z;
+        avg         
+--------------------
+ 4.0000000000000000
+ 5.0000000000000000
 (2 rows)
 
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
--- The case, when scanning sort order correspond to aggregate sort order but
--- can not be found in the group-by list
+-- Test the case where the the ordering of scan matches the ordering within the
+-- aggregate but cannot be found in the group-by list
 CREATE TABLE agg_sort_order (c1 int PRIMARY KEY, c2 int);
-CREATE UNIQUE INDEX ON agg_sort_order(c2);
-explain (costs off)
+CREATE UNIQUE INDEX agg_sort_order_c2_idx ON agg_sort_order(c2);
+INSERT INTO agg_sort_order SELECT i, i FROM generate_series(1,100)i;
+ANALYZE agg_sort_order;
+EXPLAIN (COSTS OFF)
 SELECT array_agg(c1 ORDER BY c2),c2
 FROM agg_sort_order WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
-                             QUERY PLAN                             
---------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Sort
    Sort Key: c2
    ->  GroupAggregate
          Group Key: c1
          ->  Sort
                Sort Key: c1, c2
-               ->  Bitmap Heap Scan on agg_sort_order
-                     Recheck Cond: (c2 < 100)
-                     ->  Bitmap Index Scan on agg_sort_order_c2_idx
-                           Index Cond: (c2 < 100)
-(10 rows)
+               ->  Index Scan using agg_sort_order_c2_idx on agg_sort_order
+                     Index Cond: (c2 < 100)
+(8 rows)
 
 DROP TABLE agg_sort_order CASCADE;
--- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
--- by planner itself. For example, by MergeJoin.
-SET enable_hashjoin = off;
-SET enable_nestloop = off;
-explain (COSTS OFF)
-SELECT b1.x,b1.w FROM btg b1 JOIN btg b2 ON (b1.z=b2.z AND b1.w=b2.w)
-GROUP BY b1.x,b1.z,b1.w ORDER BY b1.z, b1.w, b1.x*b1.x;
-                            QUERY PLAN                             
--------------------------------------------------------------------
- Incremental Sort
-   Sort Key: b1.z, b1.w, ((b1.x * b1.x))
-   Presorted Key: b1.z, b1.w
-   ->  Group
-         Group Key: b1.z, b1.w, b1.x
-         ->  Incremental Sort
-               Sort Key: b1.z, b1.w, b1.x
-               Presorted Key: b1.z, b1.w
-               ->  Merge Join
-                     Merge Cond: ((b1.z = b2.z) AND (b1.w = b2.w))
-                     ->  Sort
-                           Sort Key: b1.z, b1.w
-                           ->  Seq Scan on btg b1
-                     ->  Sort
-                           Sort Key: b2.z, b2.w
-                           ->  Seq Scan on btg b2
-(16 rows)
-
-RESET enable_hashjoin;
-RESET enable_nestloop;
 DROP TABLE btg;
 RESET enable_hashagg;
-RESET max_parallel_workers;
-RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index d6ed5d0eff..441e01d150 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1181,55 +1181,59 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
--- GROUP BY optimization by reorder columns
+-- GROUP BY optimization by reordering GROUP BY clauses
 CREATE TABLE btg AS SELECT
-  i % 100 AS x,
-  i % 100 AS y,
+  i % 10 AS x,
+  i % 10 AS y,
   'abc' || i % 10 AS z,
   i AS w
-FROM generate_series(1,10000) AS i;
-CREATE INDEX btg_x_y_idx ON btg(x,y);
+FROM generate_series(1, 100) AS i;
+CREATE INDEX btg_x_y_idx ON btg(x, y);
 ANALYZE btg;
 
--- GROUP BY optimization by reorder columns by frequency
-
-SET enable_hashagg=off;
-SET max_parallel_workers= 0;
-SET max_parallel_workers_per_gather = 0;
+SET enable_hashagg = off;
+SET enable_seqscan = off;
 
--- Utilize index scan ordering to avoid a Sort operation
-EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
-EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+-- Utilize the ordering of index scan to avoid a Sort operation
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY y, x;
 
 -- Engage incremental sort
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
-
--- Subqueries
-explain (COSTS OFF) SELECT x,y
-FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
-GROUP BY (w,x,z,y);
-explain (COSTS OFF) SELECT x,y
-FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
-GROUP BY (w,x,z,y);
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY z, y, w, x;
+
+-- Utilize the ordering of subquery scan to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*)
+FROM (SELECT * FROM btg ORDER BY x, y, w, z) AS q1
+GROUP BY w, x, z, y;
+
+-- Utilize the ordering of merge join to avoid a full Sort operation
+SET enable_hashjoin = off;
+SET enable_nestloop = off;
+EXPLAIN (COSTS OFF)
+SELECT count(*)
+  FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x
+  GROUP BY t1.x, t1.y, t1.z, t1.w;
+RESET enable_nestloop;
+RESET enable_hashjoin;
 
 -- Should work with and without GROUP-BY optimization
-explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY w, x, z, y ORDER BY y, x, z, w;
 
 -- Utilize incremental sort to make the ORDER BY rule a bit cheaper
-explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM btg GROUP BY w, x, y, z ORDER BY x*x, z;
 
-SET enable_incremental_sort = off;
--- The case when the number of incoming subtree path keys is more than
+-- Test the case where the number of incoming subtree path keys is more than
 -- the number of grouping keys.
-CREATE INDEX idx_y_x_z ON btg(y,x,w);
+CREATE INDEX btg_y_x_w_idx ON btg(y, x, w);
 EXPLAIN (VERBOSE, COSTS OFF)
-SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
-RESET enable_incremental_sort;
+SELECT y, x, array_agg(distinct w)
+  FROM btg WHERE y < 0 GROUP BY x, y;
 
--- Check we don't pick aggregate path key instead of grouping path key
+-- Ensure that we do not select the aggregate pathkeys instead of the grouping
+-- pathkeys
 CREATE TABLE group_agg_pk AS SELECT
   i % 10 AS x,
   i % 2 AS y,
@@ -1240,37 +1244,36 @@ FROM generate_series(1,100) AS i;
 ANALYZE group_agg_pk;
 SET enable_nestloop = off;
 SET enable_hashjoin = off;
-SELECT
-  c1.z, c1.w, string_agg(''::text, repeat(''::text, c1.f) ORDER BY c1.x,c1.y)
-FROM group_agg_pk c1 JOIN group_agg_pk c2 ON (c1.x = c2.f)
+
+EXPLAIN (COSTS OFF)
+SELECT avg(c1.f ORDER BY c1.x, c1.y)
+FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
+GROUP BY c1.w, c1.z;
+SELECT avg(c1.f ORDER BY c1.x, c1.y)
+FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
 GROUP BY c1.w, c1.z;
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
 
--- The case, when scanning sort order correspond to aggregate sort order but
--- can not be found in the group-by list
+-- Test the case where the the ordering of scan matches the ordering within the
+-- aggregate but cannot be found in the group-by list
 CREATE TABLE agg_sort_order (c1 int PRIMARY KEY, c2 int);
-CREATE UNIQUE INDEX ON agg_sort_order(c2);
-explain (costs off)
+CREATE UNIQUE INDEX agg_sort_order_c2_idx ON agg_sort_order(c2);
+INSERT INTO agg_sort_order SELECT i, i FROM generate_series(1,100)i;
+ANALYZE agg_sort_order;
+
+EXPLAIN (COSTS OFF)
 SELECT array_agg(c1 ORDER BY c2),c2
 FROM agg_sort_order WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
-DROP TABLE agg_sort_order CASCADE;
 
--- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
--- by planner itself. For example, by MergeJoin.
-SET enable_hashjoin = off;
-SET enable_nestloop = off;
-explain (COSTS OFF)
-SELECT b1.x,b1.w FROM btg b1 JOIN btg b2 ON (b1.z=b2.z AND b1.w=b2.w)
-GROUP BY b1.x,b1.z,b1.w ORDER BY b1.z, b1.w, b1.x*b1.x;
-RESET enable_hashjoin;
-RESET enable_nestloop;
+DROP TABLE agg_sort_order CASCADE;
 
 DROP TABLE btg;
+
 RESET enable_hashagg;
-RESET max_parallel_workers;
-RESET max_parallel_workers_per_gather;
+RESET enable_seqscan;
 
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
-- 
2.31.0

#142

Andrei Lepikhov

a.lepikhov@postgrespro.ru

almost 2 years ago

In reply to: Richard Guo (#141)

Re: POC: GROUP BY optimization

On 22/2/2024 09:09, Richard Guo wrote:

On Wed, Feb 21, 2024 at 6:20 PM Alexander Korotkov <aekorotkov@gmail.com
<mailto:aekorotkov@gmail.com>> wrote:

Hi, Richard!

What do you think about the revisions for the test cases?

I've rebased your patch upthread. Did some minor beautifications.

* The table 'btg' is inserted with 10000 tuples, which seems a bit
expensive for a test. I don't think we need such a big table to test
what we want.

Your patch reduces the number of rows to 1000 tuples. I found it
possible to further reduce it to 100 tuples. That also allowed me to
save the plan in the test case introduced by e1b7fde418.

Please check if you're OK with the patch attached.

I looked through the v2 patch and have two comments.

* The test case under "Check we don't pick aggregate path key instead of
grouping path key" does not have EXPLAIN to show the plan. So how can
we ensure we do not mistakenly select the aggregate pathkeys instead of
the grouping pathkeys?

I confirm it works correctly. I am not sure about the stability of the
zeros number in the output on different platforms here:
avg
--------------------
4.0000000000000000
5.0000000000000000
It was why I'd used the format() function before. So, may we elaborate
on the test and restrict the output?

* I don't think the test case introduced by e1b7fde418 is still needed,
because we already have one under "Utilize the ordering of merge join to
avoid a full Sort operation". This kind of test case is just to ensure
that we are able to utilize the ordering of the subplans underneath. So
it should be parallel to the test cases for utilize the ordering of
index scan and subquery scan.

I confirm, this version also checks ec_sortref initialization in the
case when ec are contructed from WHERE clause. Generally, I like more
one test for one issue instead of one test for all at once. But it works
and I don't have any reason to dispute it.

Also, I'm unsure about removing the disabling of the
max_parallel_workers_per_gather parameter. Have you discovered the
domination of the current plan over the partial one? Do the cost
fluctuations across platforms not trigger a parallel plan?

What's more, I suggest to address here the complaint from [1]/messages/by-id/CACG=ezaYM1tr6Lmp8PRH1aeZq=rBKXEoTwgzMcLaD5MPhfW0Lg@mail.gmail.com. As I see,
cost difference between Sort and IncrementalSort strategies in that case
is around 0.5. To make the test more stable I propose to change it a bit
and add a limit:
SELECT count(*) FROM btg GROUP BY z, y, w, x LIMIT 10;
It makes efficacy of IncrementalSort more obvious difference around 10
cost points.

[1]: /messages/by-id/CACG=ezaYM1tr6Lmp8PRH1aeZq=rBKXEoTwgzMcLaD5MPhfW0Lg@mail.gmail.com
/messages/by-id/CACG=ezaYM1tr6Lmp8PRH1aeZq=rBKXEoTwgzMcLaD5MPhfW0Lg@mail.gmail.com

--
regards,
Andrei Lepikhov
Postgres Professional

#143

Richard Guo

guofenglinux@gmail.com

almost 2 years ago

In reply to: Andrei Lepikhov (#142)

Re: POC: GROUP BY optimization

On Thu, Feb 22, 2024 at 12:18 PM Andrei Lepikhov <a.lepikhov@postgrespro.ru>
wrote:

On 22/2/2024 09:09, Richard Guo wrote:

I looked through the v2 patch and have two comments.

* The test case under "Check we don't pick aggregate path key instead of
grouping path key" does not have EXPLAIN to show the plan. So how can
we ensure we do not mistakenly select the aggregate pathkeys instead of
the grouping pathkeys?

I confirm it works correctly. I am not sure about the stability of the
zeros number in the output on different platforms here:
avg
--------------------
4.0000000000000000
5.0000000000000000
It was why I'd used the format() function before. So, may we elaborate
on the test and restrict the output?

The avg() function on integer argument is commonly used in
aggregates.sql. I don't think this is an issue. See the first test
query in aggregates.sql.

SELECT avg(four) AS avg_1 FROM onek;
avg_1
--------------------
1.5000000000000000
(1 row)

* I don't think the test case introduced by e1b7fde418 is still needed,
because we already have one under "Utilize the ordering of merge join to
avoid a full Sort operation". This kind of test case is just to ensure
that we are able to utilize the ordering of the subplans underneath. So
it should be parallel to the test cases for utilize the ordering of
index scan and subquery scan.

Also, I'm unsure about removing the disabling of the
max_parallel_workers_per_gather parameter. Have you discovered the
domination of the current plan over the partial one? Do the cost
fluctuations across platforms not trigger a parallel plan?

The table used for testing contains only 100 tuples, which is the size
of only one page. I don't believe it would trigger any parallel plans,
unless we manually change min_parallel_table_scan_size.

What's more, I suggest to address here the complaint from [1]. As I see,
cost difference between Sort and IncrementalSort strategies in that case
is around 0.5. To make the test more stable I propose to change it a bit
and add a limit:
SELECT count(*) FROM btg GROUP BY z, y, w, x LIMIT 10;
It makes efficacy of IncrementalSort more obvious difference around 10
cost points.

I don't think that's necessary. With Incremental Sort the final cost
is:

GroupAggregate (cost=1.66..19.00 rows=100 width=25)

while with full Sort it is:

GroupAggregate (cost=16.96..19.46 rows=100 width=25)

With the STD_FUZZ_FACTOR (1.01), there is no doubt that the first path
is cheaper on total cost. Not to say that even if somehow we decide the
two paths are fuzzily the same on total cost, the first path still
dominates because its startup cost is much cheaper.

Thanks
Richard

#144

Andrei Lepikhov

a.lepikhov@postgrespro.ru

almost 2 years ago

In reply to: Richard Guo (#143)

Re: POC: GROUP BY optimization

On 22/2/2024 13:35, Richard Guo wrote:

The avg() function on integer argument is commonly used in
aggregates.sql. I don't think this is an issue. See the first test
query in aggregates.sql.

Make sense

it should be parallel to the test cases for utilize the ordering of
index scan and subquery scan.

Also, I'm unsure about removing the disabling of the
max_parallel_workers_per_gather parameter. Have you discovered the
domination of the current plan over the partial one? Do the cost
fluctuations across platforms not trigger a parallel plan?

The table used for testing contains only 100 tuples, which is the size
of only one page. I don't believe it would trigger any parallel plans,
unless we manually change min_parallel_table_scan_size.

I don't intend to argue it, but just for the information, I frequently
reduce it to zero, allowing PostgreSQL to make a decision based on
costs. It sometimes works much better, because one small table in multi
join can disallow an effective parallel plan.

What's more, I suggest to address here the complaint from [1]. As I
see,
cost difference between Sort and IncrementalSort strategies in that
case
is around 0.5. To make the test more stable I propose to change it a
bit
and add a limit:
SELECT count(*) FROM btg GROUP BY z, y, w, x LIMIT 10;
It makes efficacy of IncrementalSort more obvious difference around 10
cost points.

I don't think that's necessary. With Incremental Sort the final cost
is:

GroupAggregate (cost=1.66..19.00 rows=100 width=25)

while with full Sort it is:

GroupAggregate (cost=16.96..19.46 rows=100 width=25)

With the STD_FUZZ_FACTOR (1.01), there is no doubt that the first path
is cheaper on total cost. Not to say that even if somehow we decide the
two paths are fuzzily the same on total cost, the first path still
dominates because its startup cost is much cheaper.

As before, I won't protest here - it needs some computations about how
much cost can be added by bulk extension of the relation blocks. If
Maxim will answer that it's enough to resolve his issue, why not?

--
regards,
Andrei Lepikhov
Postgres Professional

#145

Tom Lane

tgl@sss.pgh.pa.us

almost 2 years ago

In reply to: Andrei Lepikhov (#144)

Re: POC: GROUP BY optimization

I'm late to the party, and I apologize for not having paid attention
to this thread for months ... but I am wondering why 0452b461b got
committed. It seems to add a substantial amount of complexity and
planner cycles in return for not much. The reason why I'm dubious
about it can be found in this comment that the patch deleted:

- * In principle it might be interesting to consider other orderings of the
- * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost. We don't
- * bother with that, though. Hashed grouping will frequently win anyway.

Indeed, if you remove the

SET enable_hashagg = off;
SET enable_seqscan = off;

lines added to aggregates.sql, you find that every single one of the
test cases added there changes plan, except for the one case that is
there to prove that the planner *doesn't* pick this type of plan.
That shows that the planner doesn't believe any of the plans developed
here are cheaper than other alternatives it would normally consider
(usually HashAgg). So it sure seems like we're spending a lot of
effort on uninteresting plans. Maybe this is an artifact of testing
against toy tables and a useful win could be shown on bigger tables,
but I don't see any indication in the thread that anyone actually
demonstrated that.

I might hold still for this patch anyway if the patch were simpler
and more obviously correct, but there are scary bits in it:

* The very first hunk causes get_eclass_for_sort_expr to have
side-effects on existing EquivalenceClass data structures.
What is the argument that that's not going to cause problems?
At the very least it introduces asymmetry, in that the first
caller wins the chance to change the EC's ec_sortref and later
callers won't be able to.

* I'm pretty unconvinced by group_keys_reorder_by_pathkeys (which
I notice has already had one band-aid added to it since commit).
In particular, it seems to believe that the pathkeys and clauses
lists match one-for-one, but I seriously doubt that that invariant
remains guaranteed after the cleanup steps

/* append the remaining group pathkeys (will be treated as not sorted) */
*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
*group_pathkeys);
*group_clauses = list_concat_unique_ptr(new_group_clauses,
*group_clauses);

For that to be reliable, the SortGroupClauses added to
new_group_clauses in the main loop have to be exactly those
that are associated with the same pathkeys in the old lists.
I doubt that that's necessarily true in the presence of redundant
grouping clauses. (Maybe we can't get here with any redundant
grouping clauses, but still, we don't really guarantee uniqueness of
SortGroupClauses, much less that they are never copied which is what
you need if you want to believe that pointer equality is sufficient
for de-duping here. PathKeys are explicitly made to be safe to compare
pointer-wise, but I know of no such guarantee for SortGroupClauses.)

* It seems like root->processed_groupClause no longer has much to do
with the grouping behavior, which is scary given how much code still
believes that it does. I suspect there are bugs lurking there, and
am not comforted by the fact that the patch changed exactly nothing
in the pathnodes.h documentation of that field. This comment looks
pretty silly now too:

/* Preprocess regular GROUP BY clause, if any */
root->processed_groupClause = list_copy(parse->groupClause);

What "preprocessing" is going on there? This comment was adequate
before, when the code line invoked preprocess_groupclause which had
a bunch of commentary; but now it has to stand on its own and it's
not doing a great job of that.

* Speaking of pathnodes.h, PathKeyInfo is a pretty uninformative node
type name, and the comments provided for it are not going to educate
anybody. What is the "association" between the pathkeys and clauses?
I guess the clauses are supposed to be SortGroupClauses, but not all
pathkeys match up to SortGroupClauses, so what then? This is very
underspecified, and fuzzy thinking about data structures tends to lead
to bugs.

So I'm quite afraid that there are still bugs lurking here.
What's more, given that the plans this patch makes apparently
seldom win when you don't put a thumb on the scales, it might
take a very long time to isolate those bugs. If the patch
produces a faulty plan (e.g. not sorted properly) we'd never
notice if that plan isn't chosen, and even if it is chosen
it probably wouldn't produce anything as obviously wrong as
a crash.

If this patch were producing better results I'd be more excited
about putting more work into it. But on the basis of what I'm
seeing right now, I think maybe we ought to give up on it.

regards, tom lane

#146

Andrei Lepikhov

a.lepikhov@postgrespro.ru

almost 2 years ago

In reply to: Tom Lane (#145)

Re: POC: GROUP BY optimization

On 4/12/24 06:44, Tom Lane wrote:

If this patch were producing better results I'd be more excited
about putting more work into it. But on the basis of what I'm
seeing right now, I think maybe we ought to give up on it.

First, thanks for the deep review - sometimes, only a commit gives us a
chance to get such observation :))).
On a broader note, introducing automatic group-by-order choosing is a
step towards training the optimiser to handle poorly tuned incoming
queries. While it's true that this may initially impact performance,
it's crucial to weigh the potential benefits. So, beforehand, we should
agree: Is it worth it?
If yes, I would say I see how often hashing doesn't work in grouping.
Sometimes because of estimation errors, sometimes because grouping
already has sorted input, sometimes in analytical queries when planner
doesn't have enough memory for hashing. In analytical cases, the only
way to speed up queries sometimes is to be smart with features like
IncrementalSort and this one.
About low efficiency. Remember the previous version of the GROUP-BY
optimisation - we disagreed on operator costs and the cost model in
general. In the current version, we went the opposite - adding small
features step-by-step. The current commit contains an integral part of
the feature and is designed for safely testing the approach and adding
more profitable parts like choosing group-by-order according to distinct
values or unique indexes on grouping columns.
I have passed through the code being steered by the issues explained in
detail. I see seven issues. Two of them definitely should be scrutinised
right now, and I'm ready to do that.

--
regards,
Andrei Lepikhov
Postgres Professional

#147

Andrei Lepikhov

a.lepikhov@postgrespro.ru

over 1 year ago

In reply to: Tom Lane (#145)

1 attachment(s)

Re: POC: GROUP BY optimization

On 4/12/24 06:44, Tom Lane wrote:

* The very first hunk causes get_eclass_for_sort_expr to have
side-effects on existing EquivalenceClass data structures.
What is the argument that that's not going to cause problems?
At the very least it introduces asymmetry, in that the first
caller wins the chance to change the EC's ec_sortref and later
callers won't be able to.

Let me resolve issues bit by bit.
Addressing your first note, 'asymmetry,' I discovered this part of the
code. Before the 8d83a5d, it could cause some artefacts, but since then,
a kind of asymmetry has been introduced into the GROUP-BY clause list.
As you can see in the attached patch, GROUP-BY reordering works even
when the asymmetry of the 8d83a5d chooses different keys.

At the same time, I agree that implicitly setting anything in an
exporting function, which should just look up for EC, is a questionable
coding style. To avoid possible usage issues (in extensions, for
example), I propose setting it up afterwards, explicitly forcing this
action by input parameter - see my attempt in the attachment.

--
regards,
Andrei Lepikhov
Postgres Professional

Attachments:

group_by_asymmetry.difftext/x-patch; charset=UTF-8; name=group_by_asymmetry.diffDownload

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index a619ff9177..bc08dfadaf 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,18 +652,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-			{
-				/*
-				 * Match!
-				 *
-				 * Copy the sortref if it wasn't set yet.  That may happen if
-				 * the ec was constructed from a WHERE clause, i.e. it doesn't
-				 * have a target reference at all.
-				 */
-				if (cur_ec->ec_sortref == 0 && sortref > 0)
-					cur_ec->ec_sortref = sortref;
-				return cur_ec;
-			}
+				return cur_ec; /* Match! */
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 1d61881a6b..5d9597adcd 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1355,7 +1355,8 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 													&sortclauses,
 													tlist,
 													false,
-													&sortable);
+													&sortable,
+													false);
 	/* It's caller error if not all clauses were sortable */
 	Assert(sortable);
 	return result;
@@ -1379,13 +1380,16 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
  * to remove any clauses that can be proven redundant via the eclass logic.
  * Even though we'll have to hash in that case, we might as well not hash
  * redundant columns.)
+ * *set_ec_sortref is true if caller wants to set up ec_sortref field in
+ * the pathkey Equivalence Class, if it have not initialized beforehand.
  */
 List *
 make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 									   List **sortclauses,
 									   List *tlist,
 									   bool remove_redundant,
-									   bool *sortable)
+									   bool *sortable,
+									   bool set_ec_sortref)
 {
 	List	   *pathkeys = NIL;
 	ListCell   *l;
@@ -1409,6 +1413,13 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 										   sortcl->nulls_first,
 										   sortcl->tleSortGroupRef,
 										   true);
+		if (pathkey->pk_eclass->ec_sortref == 0 && set_ec_sortref)
+			/*
+			 * Copy the sortref if it wasn't set yet.  That may happen if
+			 * the ec was constructed from a WHERE clause, i.e. it doesn't
+			 * have a target reference at all.
+			 */
+			pathkey->pk_eclass->ec_sortref = sortcl->tleSortGroupRef;
 
 		/* Canonical form eliminates redundant ordering keys */
 		if (!pathkey_is_redundant(pathkey, pathkeys))
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 5320da51a0..dee98c9a90 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3391,7 +3391,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 		/*
 		 * With a plain GROUP BY list, we can remove any grouping items that
 		 * are proven redundant by EquivalenceClass processing.  For example,
-		 * we can remove y given "WHERE x = y GROUP BY x, y".  These aren't
+		 * we can remove y given "WHERE x get_eclass_for_sort_expr= y GROUP BY x, y".  These aren't
 		 * especially common cases, but they're nearly free to detect.  Note
 		 * that we remove redundant items from processed_groupClause but not
 		 * the original parse->groupClause.
@@ -3403,7 +3403,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &root->processed_groupClause,
 												   tlist,
 												   true,
-												   &sortable);
+												   &sortable,
+												   true);
 		if (!sortable)
 		{
 			/* Can't sort; no point in considering aggregate ordering either */
@@ -3453,7 +3454,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &root->processed_distinctClause,
 												   tlist,
 												   true,
-												   &sortable);
+												   &sortable,
+												   false);
 		if (!sortable)
 			root->distinct_pathkeys = NIL;
 	}
@@ -3479,7 +3481,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &groupClauses,
 												   tlist,
 												   false,
-												   &sortable);
+												   &sortable,
+												   false);
 		if (!sortable)
 			root->setop_pathkeys = NIL;
 	}
@@ -6027,7 +6030,8 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
 																 &wc->partitionClause,
 																 tlist,
 																 true,
-																 &sortable);
+																 &sortable,
+																 false);
 
 		Assert(sortable);
 	}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 39ba461548..fff4b69dfd 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -242,7 +242,8 @@ extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 													List **sortclauses,
 													List *tlist,
 													bool remove_redundant,
-													bool *sortable);
+													bool *sortable,
+													bool set_ec_sortref);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
 											RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 50695d83a2..1b06f9ca97 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2886,6 +2886,53 @@ GROUP BY c1.w, c1.z;
  5.0000000000000000
 (2 rows)
 
+-- Pathkeys, built in subtree, can be used to optimize GROUP-BY clause ordering.
+-- Also, here we check that it doesn't depend on the initial clause order in the
+-- GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c1.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c1.x, c1.y
+         Presorted Key: c1.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c2.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c2.x, c1.y
+         Presorted Key: c2.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 2905848ead..0be9c48483 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1253,6 +1253,20 @@ SELECT avg(c1.f ORDER BY c1.x, c1.y)
 FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
 GROUP BY c1.w, c1.z;
 
+-- Pathkeys, built in subtree, can be used to optimize GROUP-BY clause ordering.
+-- Also, here we check that it doesn't depend on the initial clause order in the
+-- GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;

#148

Andrei Lepikhov

a.lepikhov@postgrespro.ru

over 1 year ago

In reply to: Tom Lane (#145)

1 attachment(s)

Re: POC: GROUP BY optimization

On 4/12/24 06:44, Tom Lane wrote:

* I'm pretty unconvinced by group_keys_reorder_by_pathkeys (which
I notice has already had one band-aid added to it since commit).
In particular, it seems to believe that the pathkeys and clauses
lists match one-for-one, but I seriously doubt that that invariant
remains guaranteed after the cleanup steps

/* append the remaining group pathkeys (will be treated as not sorted) */
*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
*group_pathkeys);
*group_clauses = list_concat_unique_ptr(new_group_clauses,
*group_clauses);

For that to be reliable, the SortGroupClauses added to
new_group_clauses in the main loop have to be exactly those
that are associated with the same pathkeys in the old lists.
I doubt that that's necessarily true in the presence of redundant
grouping clauses. (Maybe we can't get here with any redundant
grouping clauses, but still, we don't really guarantee uniqueness of
SortGroupClauses, much less that they are never copied which is what
you need if you want to believe that pointer equality is sufficient
for de-duping here. PathKeys are explicitly made to be safe to compare
pointer-wise, but I know of no such guarantee for SortGroupClauses.)

I spent a lot of time inventing situations with SortGroupClause
duplicates. Unfortunately, it looks impossible so far. But because we
really don't guarantee uniqueness, I changed the code to survive in this
case. Also, I added assertion checking to be sure we don't have logical
mistakes here - see attachment.
About the band-aid mentioned above - as I see, 4169850 introduces the
same trick in planner.c. So, it looks like result of design of the
current code.

--
regards,
Andrei Lepikhov
Postgres Professional

Attachments:

get_useful_group_keys_orderings.patchtext/x-patch; charset=UTF-8; name=get_useful_group_keys_orderings.patchDownload

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 5d9597adcd..aa80f6486c 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -380,15 +380,18 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 
 	/*
 	 * We're going to search within just the first num_groupby_pathkeys of
-	 * *group_pathkeys.  The thing is that root->group_pathkeys is passed as
+	 * *group_pathkeys. The thing is that root->group_pathkeys is passed as
 	 * *group_pathkeys containing grouping pathkeys altogether with aggregate
-	 * pathkeys.  If we process aggregate pathkeys we could get an invalid
+	 * pathkeys. If we process aggregate pathkeys we could get an invalid
 	 * result of get_sortgroupref_clause_noerr(), because their
-	 * pathkey->pk_eclass->ec_sortref doesn't referece query targetlist.  So,
+	 * pathkey->pk_eclass->ec_sortref doesn't reference query targetlist. So,
 	 * we allocate a separate list of pathkeys for lookups.
 	 */
 	grouping_pathkeys = list_copy_head(*group_pathkeys, num_groupby_pathkeys);
 
+	/* Make a new copy before reordering clauses */
+	*group_clauses = list_copy(*group_clauses);
+
 	/*
 	 * Walk the pathkeys (determining ordering of the input path) and see if
 	 * there's a matching GROUP BY key. If we find one, we append it to the
@@ -400,8 +403,8 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	 */
 	foreach(lc, pathkeys)
 	{
-		PathKey    *pathkey = (PathKey *) lfirst(lc);
-		SortGroupClause *sgc;
+		PathKey			   *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	   *sgc;
 
 		/*
 		 * Pathkeys are built in a way that allows simply comparing pointers.
@@ -431,17 +434,25 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 		Assert(OidIsValid(sgc->sortop));
 
 		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		/*
+		 * Keeping in mind that the SortGroupClause list doesn't guarantee
+		 * unique pointers we must explicitly transfer elements one-by-one.
+		 */
 		new_group_clauses = lappend(new_group_clauses, sgc);
+		*group_clauses = list_delete_ptr(*group_clauses, sgc);
 	}
 
 	/* remember the number of pathkeys with a matching GROUP BY key */
 	n = list_length(new_group_pathkeys);
 
-	/* append the remaining group pathkeys (will be treated as not sorted) */
+	/*
+	 * Append the remaining group pathkeys (will be treated as not sorted) and
+	 * grouping clauses.
+	 */
 	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
 											 *group_pathkeys);
-	*group_clauses = list_concat_unique_ptr(new_group_clauses,
-											*group_clauses);
+	*group_clauses = list_concat(new_group_clauses, *group_clauses);
 
 	list_free(grouping_pathkeys);
 	return n;
@@ -484,9 +495,9 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 {
-	Query	   *parse = root->parse;
-	List	   *infos = NIL;
-	PathKeyInfo *info;
+	Query		   *parse = root->parse;
+	List		   *infos = NIL;
+	PathKeyInfo	   *info;
 
 	List	   *pathkeys = root->group_pathkeys;
 	List	   *clauses = root->processed_groupClause;
@@ -561,6 +572,23 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 		}
 	}
 
+#ifdef USE_ASSERT_CHECKING
+	{
+		PathKeyInfo	   *pinfo = linitial_node(PathKeyInfo, infos);
+		ListCell	   *lc;
+
+		/* Test consistency of info structures */
+		for_each_from(lc, infos, 1)
+		{
+			info = lfirst_node(PathKeyInfo, lc);
+
+			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
+			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
+			Assert(list_difference(info->clauses, pinfo->clauses) == NIL);
+			Assert(list_difference_ptr(info->pathkeys, pinfo->pathkeys) == NIL);
+		}
+	}
+#endif
 	return infos;
 }

#149

Andrei Lepikhov

a.lepikhov@postgrespro.ru

over 1 year ago

In reply to: Tom Lane (#145)

1 attachment(s)

Re: POC: GROUP BY optimization

On 4/12/24 06:44, Tom Lane wrote:

* It seems like root->processed_groupClause no longer has much to do
with the grouping behavior, which is scary given how much code still
believes that it does. I suspect there are bugs lurking there, and

1. Analysing the code, processed_groupClause is used in:
- adjust_foreign_grouping_path_cost - to decide, do we need to add cost
of sort to the foreign grouping.
- just for replacing relids during SJE process.
- create_groupingsets_plan(), preprocess_grouping_sets,
remove_useless_groupby_columns - we don't apply this feature in the case
of grouping sets.
- get_number_of_groups - estimation shouldn't depend on the order of
clauses.
- create_grouping_paths - to analyse hashability of grouping clause
- make_group_input_target, make_partial_grouping_target - doesn't depend
on the order of clauses
planner.c: add_paths_to_grouping_rel, create_partial_grouping_paths - in
the case of hash grouping.

So, the only case we can impact current behavior lies in the
postgres_fdw. But here we don't really know which plan will be chosen
during planning on foreign node and stay the same behavior is legal for me.

am not comforted by the fact that the patch changed exactly nothing
in the pathnodes.h documentation of that field. This comment looks
pretty silly now too:

/* Preprocess regular GROUP BY clause, if any */
root->processed_groupClause = list_copy(parse->groupClause);

What "preprocessing" is going on there? This comment was adequate
before, when the code line invoked preprocess_groupclause which had
a bunch of commentary; but now it has to stand on its own and it's
not doing a great job of that.

Thanks for picking this place. I fixed it.
More interesting here is that we move decisions on the order of group-by
clauses to the stage, where we already have underlying subtree and
incoming path keys. In principle, we could return the preprocessing
routine and arrange GROUP-BY with the ORDER-BY clause as it was before.
But looking into the code, I found only one reason to do this:
adjust_group_pathkeys_for_groupagg. Curious about how much profit we get
here, I think we can discover it later with no hurry. A good outcome
here will be a test case that can show the importance of arranging
GROUP-BY and ORDER-BY at an early stage.

--
regards,
Andrei Lepikhov
Postgres Professional

Attachments:

minor_comment.patchtext/x-patch; charset=UTF-8; name=minor_comment.patchDownload

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 5ea3705796..861656a192 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1424,7 +1424,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 		}
 		else if (parse->groupClause)
 		{
-			/* Preprocess regular GROUP BY clause, if any */
+			/*
+			 * Make a copy of origin groupClause because we are going to
+			 * remove redundant clauses.
+			 */
 			root->processed_groupClause = list_copy(parse->groupClause);
 			/* Remove any redundant GROUP BY columns */
 			remove_useless_groupby_columns(root);

#150

Andrei Lepikhov

a.lepikhov@postgrespro.ru

over 1 year ago

In reply to: Tom Lane (#145)

1 attachment(s)

Re: POC: GROUP BY optimization

On 4/12/24 06:44, Tom Lane wrote:

* Speaking of pathnodes.h, PathKeyInfo is a pretty uninformative node
type name, and the comments provided for it are not going to educate
anybody. What is the "association" between the pathkeys and clauses?
I guess the clauses are supposed to be SortGroupClauses, but not all
pathkeys match up to SortGroupClauses, so what then? This is very
underspecified, and fuzzy thinking about data structures tends to lead
to bugs.

I'm not the best in English documentation and naming style. So, feel
free to check my version.

So I'm quite afraid that there are still bugs lurking here.
What's more, given that the plans this patch makes apparently
seldom win when you don't put a thumb on the scales, it might
take a very long time to isolate those bugs. If the patch
produces a faulty plan (e.g. not sorted properly) we'd never
notice if that plan isn't chosen, and even if it is chosen
it probably wouldn't produce anything as obviously wrong as
a crash.

I added checkings on the proper order of pathkeys and clauses.
If you really care about that, we should spend additional time and
rewrite the code to generate an order of clauses somewhere in the plan
creation phase. For example, during the create_group_plan call, we could
use the processed_groupClause list, cycle through subpath->pathkeys, set
the order, and detect whether the pathkeys list corresponds to the
group-by or is enough to build a grouping plan.
Anyway, make this part of code more resistant to blunders is another story.

--
regards,
Andrei Lepikhov
Postgres Professional

Attachments:

final_improvements.patchtext/x-patch; charset=UTF-8; name=final_improvements.patchDownload

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index aa80f6486c..9c079270ec 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -461,7 +461,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 /*
  * pathkeys_are_duplicate
  *		Check if give pathkeys are already contained the list of
- *		PathKeyInfo's.
+ *		GroupByOrdering's.
  */
 static bool
 pathkeys_are_duplicate(List *infos, List *pathkeys)
@@ -470,7 +470,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
 
 	foreach(lc, infos)
 	{
-		PathKeyInfo *info = lfirst_node(PathKeyInfo, lc);
+		GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
 
 		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
 			return true;
@@ -482,7 +482,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
  *
- * Returns a list of PathKeyInfo items, each representing an interesting
+ * Returns a list of GroupByOrdering items, each representing an interesting
  * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
  * matching order.
  *
@@ -495,15 +495,15 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 {
-	Query		   *parse = root->parse;
-	List		   *infos = NIL;
-	PathKeyInfo	   *info;
+	Query			   *parse = root->parse;
+	List			   *infos = NIL;
+	GroupByOrdering	   *info;
 
 	List	   *pathkeys = root->group_pathkeys;
 	List	   *clauses = root->processed_groupClause;
 
 	/* always return at least the original pathkeys/clauses */
-	info = makeNode(PathKeyInfo);
+	info = makeNode(GroupByOrdering);
 	info->pathkeys = pathkeys;
 	info->clauses = clauses;
 	infos = lappend(infos, info);
@@ -539,7 +539,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -564,7 +564,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -574,18 +574,29 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 #ifdef USE_ASSERT_CHECKING
 	{
-		PathKeyInfo	   *pinfo = linitial_node(PathKeyInfo, infos);
+		GroupByOrdering	   *pinfo = linitial_node(GroupByOrdering, infos);
 		ListCell	   *lc;
 
 		/* Test consistency of info structures */
 		for_each_from(lc, infos, 1)
 		{
-			info = lfirst_node(PathKeyInfo, lc);
+			ListCell *lc1, *lc2;
+
+			info = lfirst_node(GroupByOrdering, lc);
 
 			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
 			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
 			Assert(list_difference(info->clauses, pinfo->clauses) == NIL);
 			Assert(list_difference_ptr(info->pathkeys, pinfo->pathkeys) == NIL);
+
+			forboth(lc1, info->clauses, lc2, info->pathkeys)
+			{
+				SortGroupClause *sgc = lfirst_node(SortGroupClause, lc1);
+				PathKey *pk = lfirst_node(PathKey, lc2);
+
+				if (pk->pk_eclass->ec_sortref != sgc->tleSortGroupRef)
+					elog(ERROR, "Order of group-by clauses doesn't correspond incoming sort order");
+			}
 		}
 	}
 #endif
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 861656a192..4751a700ab 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6882,7 +6882,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -6963,7 +6963,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				/* process all potentially interesting grouping reorderings */
 				foreach(lc2, pathkey_orderings)
 				{
-					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+					GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 					/* restore the path (we replace it in the loop) */
 					path = path_save;
@@ -7214,7 +7214,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -7270,7 +7270,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 
 				/* restore the path (we replace it in the loop) */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 6c71098f2d..b3862500b6 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -427,6 +427,10 @@ struct PlannerInfo
 	 * containing all the query's rows.  Hence, if you want to check whether
 	 * GROUP BY was specified, test for nonempty parse->groupClause, not for
 	 * nonempty processed_groupClause.
+	 * Optimiser chooses specific order of group-by clauses during the upper
+	 * paths generation process, attempting to use different strategies to
+	 * minimize number of sorts or engage incremental sort.
+	 * See get_useful_group_keys_orderings for details.
 	 *
 	 * Currently, when grouping sets are specified we do not attempt to
 	 * optimize the groupClause, so that processed_groupClause will be
@@ -1468,14 +1472,20 @@ typedef struct PathKey
 } PathKey;
 
 /*
- * Combines the information about pathkeys and the associated clauses.
+ * Contains an order of group-by clauses and corresponding list of pathkeys.
+ *
+ * Order of SortGroupClause elements must correspond the order in the head of
+ * PathKey list:
+ * tleSortGroupRef of N-th element in the clauses must be the same as the value
+ * of ec_sortref in N-th pathkey equivalence class.
  */
-typedef struct PathKeyInfo
+typedef struct GroupByOrdering
 {
 	NodeTag		type;
+
 	List	   *pathkeys;
 	List	   *clauses;
-} PathKeyInfo;
+} GroupByOrdering;
 
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index d551ada325..33eff1e63a 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -4113,7 +4113,7 @@ manifest_writer
 rfile
 ws_options
 ws_file_info
-PathKeyInfo
+GroupByOrdering
 TidStore
 TidStoreIter
 TidStoreIterResult

#151

Alexander Korotkov

aekorotkov@gmail.com

over 1 year ago

In reply to: Andrei Lepikhov (#150)

Re: POC: GROUP BY optimization

Hi, Andrei!

On Thu, Apr 18, 2024 at 11:54 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

On 4/12/24 06:44, Tom Lane wrote:

* Speaking of pathnodes.h, PathKeyInfo is a pretty uninformative node
type name, and the comments provided for it are not going to educate
anybody. What is the "association" between the pathkeys and clauses?
I guess the clauses are supposed to be SortGroupClauses, but not all
pathkeys match up to SortGroupClauses, so what then? This is very
underspecified, and fuzzy thinking about data structures tends to lead
to bugs.

I'm not the best in English documentation and naming style. So, feel
free to check my version.

So I'm quite afraid that there are still bugs lurking here.
What's more, given that the plans this patch makes apparently
seldom win when you don't put a thumb on the scales, it might
take a very long time to isolate those bugs. If the patch
produces a faulty plan (e.g. not sorted properly) we'd never
notice if that plan isn't chosen, and even if it is chosen
it probably wouldn't produce anything as obviously wrong as
a crash.

I added checkings on the proper order of pathkeys and clauses.
If you really care about that, we should spend additional time and
rewrite the code to generate an order of clauses somewhere in the plan
creation phase. For example, during the create_group_plan call, we could
use the processed_groupClause list, cycle through subpath->pathkeys, set
the order, and detect whether the pathkeys list corresponds to the
group-by or is enough to build a grouping plan.
Anyway, make this part of code more resistant to blunders is another story.

Thank you for the fixes you've proposed. I didn't look much into
details yet, but I think the main concern Tom expressed in [1] is
whether the feature is reasonable at all. I think at this stage the
most important thing is to come up with convincing examples showing
how huge performance benefits it could cause. I will return to this
later today and will try to provide some convincing examples.

Links
1. /messages/by-id/266850.1712879082@sss.pgh.pa.us

------
Regards,
Alexander Korotkov

#152

jian he

jian.universality@gmail.com

over 1 year ago

In reply to: Alexander Korotkov (#151)

Re: POC: GROUP BY optimization

On Thu, Apr 18, 2024 at 6:58 PM Alexander Korotkov <aekorotkov@gmail.com> wrote:

Thank you for the fixes you've proposed. I didn't look much into
details yet, but I think the main concern Tom expressed in [1] is
whether the feature is reasonable at all. I think at this stage the
most important thing is to come up with convincing examples showing
how huge performance benefits it could cause. I will return to this
later today and will try to provide some convincing examples.

hi.
I found a case where it improved performance.

+-- GROUP BY optimization by reorder columns
+CREATE TABLE btg AS SELECT
+ i % 100 AS x,
+ i % 100 AS y,
+ 'abc' || i % 10 AS z,
+ i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+
I change
+FROM generate_series(1,10000) AS i;
to
+ FROM generate_series(1, 1e6) AS i;

Then I found out about these 2 queries performance improved a lot.
A: explain(analyze) SELECT count(*) FROM btg GROUP BY w, x, y, z ORDER
BY y, x \watch i=0.1 c=10
B: explain(analyze) SELECT count(*) FROM btg GROUP BY w, x, z, y ORDER
BY y, x, z, w \watch i=0.1 c=10

set (enable_seqscan,enable_hashagg) from on to off:
queryA execution time from 1533.013 ms to 533.430 ms
queryB execution time from 1996.817 ms to 497.020 ms

#153

Andrei Lepikhov

a.lepikhov@postgrespro.ru

over 1 year ago

In reply to: Tom Lane (#145)

1 attachment(s)

Re: POC: GROUP BY optimization

On 4/12/24 06:44, Tom Lane wrote:

If this patch were producing better results I'd be more excited
about putting more work into it. But on the basis of what I'm
seeing right now, I think maybe we ought to give up on it.

Let me show current cases where users have a profit with this tiny
improvement (see queries and execution results in query.sql):
1. 'Not optimised query text' — when we didn't consider group-by
ordering during database development.
2. 'Accidental pathkeys' - we didn't see any explicit orderings, but
accidentally, the planner used merge join that caused some orderings and
we can utilise it.
3. 'Uncertain scan path' — We have options regarding which index to use,
and because of that, we can't predict the optimal group-by ordering
before the start of query planning.
4. 'HashAgg V/S GroupAgg' — sometimes, the GroupAgg strategy outperforms
HashAgg just because we don't need any ordering procedure at all.

And the last thing here — this code introduces the basics needed to add
more sophisticated strategies, like ordering according to uniqueness or
preferring to set constant-width columns first in grouping.

--
regards,
Andrei Lepikhov
Postgres Professional

#154

jian he

jian.universality@gmail.com

over 1 year ago

In reply to: jian he (#152)

1 attachment(s)

Re: POC: GROUP BY optimization

On Fri, Apr 19, 2024 at 6:44 PM jian he <jian.universality@gmail.com> wrote:

On Thu, Apr 18, 2024 at 6:58 PM Alexander Korotkov <aekorotkov@gmail.com> wrote:

Thank you for the fixes you've proposed. I didn't look much into
details yet, but I think the main concern Tom expressed in [1] is
whether the feature is reasonable at all. I think at this stage the
most important thing is to come up with convincing examples showing
how huge performance benefits it could cause. I will return to this
later today and will try to provide some convincing examples.

hi.
previously preprocess_groupclause will not process cases
where no ORDER BY clause is specified.
commit 0452b461b will reorder the GROUP BY element even though no
ORDER BY clause is specified
, if there are associated indexes on it.
(hope I understand it correctly).

for example (when enable_hashagg is false)
explain(verbose) select count(*) FROM btg GROUP BY y,x;
in pg16 will not reorder, it will be as is: `GROUP BY y,x`

after commit 0452b461b, it will reorder to `GROUP BY x,y`.
because there is an index `btree (x, y)` (only one) associated with it.
if you drop the index `btree (x, y)` , it will be `GROUP BY y,x` as pg16.

This reordering GROUP BY element when no ORDER BY clause is not specified
is performant useful when the work_mem is small.
I've attached some tests comparing master with REL_16_STABLE to
demonstrate that.
all the tests attached are under the condition:
work_mem='64kB', buildtype=release, max_parallel_workers_per_gather=0.

one example:
CREATE TABLE btg5 AS
SELECT i::numeric % 10 AS x, i % 10 AS y, 'abc' || i % 10 AS z, i % 100000 AS w
FROM generate_series(1, 1e6) AS i;
CREATE INDEX btg5_x_y_idx ON btg5(x, y);

explain(analyze) SELECT count(*) FROM btg5 GROUP BY z, y, w, x;
in pg17, the execution time is: 746.574 ms
in pg16, the execution time is: 1693.483 ms

if I reorder it manually as:
`explain(analyze) SELECT count(*) FROM btg5 GROUP BY x, y, w, z;`
then in pg16, the execution time is 630.394 ms

#155

Alexander Korotkov

aekorotkov@gmail.com

over 1 year ago

In reply to: Alexander Korotkov (#151)

Re: POC: GROUP BY optimization

Hi!

On Thu, Apr 18, 2024 at 1:57 PM Alexander Korotkov <aekorotkov@gmail.com> wrote:

Thank you for the fixes you've proposed. I didn't look much into
details yet, but I think the main concern Tom expressed in [1] is
whether the feature is reasonable at all. I think at this stage the
most important thing is to come up with convincing examples showing
how huge performance benefits it could cause. I will return to this
later today and will try to provide some convincing examples.

I took a fresh look at 0452b461b, and have the following thoughts:
1) Previously, preprocess_groupclause() tries to reorder GROUP BY
clauses to match the required ORDER BY order. It only reorders if
GROUP BY pathkeys are the prefix of ORDER BY pathkeys or vice versa.
So, both of them need to be satisfied by one ordering. 0452b461b also
tries to match GROUP BY clauses to ORDER BY clauses, but takes into
account an incremental sort. Actually, instead of removing
preprocess_groupclause(), we could just teach it to take incremental
sort into account.
2) The get_useful_group_keys_orderings() function takes into account 3
orderings of pathkeys and clauses: original order as written in GROUP
BY, matching ORDER BY clauses as much as possible, and matching the
input path as much as possible. Given that even before 0452b461b,
preprocess_groupclause() could change the original order of GROUP BY
clauses, so we don't need to distinguish the first two. We could just
consider output of new preprocess_groupclause() taking into account an
incremental sort and the ordering matching the input path as much as
possible. This seems like significant simplification.

Let me also describe my thoughts about the justification of the
feature itself. As Tom pointed upthread, Sort + Grouping is generally
unlikely faster than Hash Aggregate. The main point of this feature
is being able to match the order of GROUP BY clauses to the order of
the input path. That input path could be Index Scan or Subquery.
Let's concentrate on Index Scan. Index Scan can give us the required
order, so we can do grouping without Sort or with significantly
cheaper Incremental Sort. That could be much faster than Hash
Aggregate. But if we scan the full table (or its significant
fraction), Index Scan is typically much more expensive than Sequential
Scan because of random page access. However, there are cases when
Index Scan could be cheaper.
1) If the heap row is wide and the index contains all the required
columns, Index Only Scan can be cheaper than Sequential Scan because
of lesser volume.
2) If the query predicate is selective enough and matches the index,
Index Scan might be significantly cheaper. One may say that if the
query predicate is selective enough then there are not that many
matching rows, so aggregating them either way isn't a big deal anyway.
However, given that data volumes are growing tremendously, it's not
hard to imagine that the index selected a small fraction of table
rows, but they are still enough to be costly for aggregating.
Therefore, I think there are significant cases where matching GROUP BY
clauses to the order of the input path could give a substantial
improvement over Hash Aggregate.

While there are some particular use-cases by Jian He, I hope that
above could give some rationale.

------
Regards,
Alexander Korotkov

#156

jian he

jian.universality@gmail.com

over 1 year ago

In reply to: Alexander Korotkov (#155)

Re: POC: GROUP BY optimization

hi.
I found an interesting case.

CREATE TABLE t1 AS
SELECT (i % 10)::numeric AS x,(i % 10)::int8 AS y,'abc' || i % 10 AS
z, i::int4 AS w
FROM generate_series(1, 100) AS i;
CREATE INDEX t1_x_y_idx ON t1 (x, y);
ANALYZE t1;
SET enable_hashagg = off;
SET enable_seqscan = off;

EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,y,w;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,w,y,z;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,w,y;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,w,z,y;
the above part will use:
-> Incremental Sort
Sort Key: x, $, $, $
Presorted Key: x
-> Index Scan using t1_x_y_idx on t1

EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY z,y,w,x;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY w,y,z,x;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY y,z,x,w;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY y,w,x,z;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY y,x,z,w;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY y,x,w,z;

these will use:
-> Incremental Sort
Sort Key: x, y, $, $
Presorted Key: x, y
-> Index Scan using t1_x_y_idx on t1

I guess this is fine, but not optimal?

#157

jian he

jian.universality@gmail.com

over 1 year ago

In reply to: jian he (#156)

Re: POC: GROUP BY optimization

hi
one more question (maybe a dumb one....)

drop table if exists t1;
CREATE TABLE t1 AS
SELECT (i % 10)::numeric AS x,(i % 10)::int8 AS y,'abc' || i % 10 AS
z, i::int4 AS w
FROM generate_series(1, 100) AS i;
CREATE INDEX t1_x_y_idx ON t1 (x, y);
ANALYZE t1;
SET enable_hashagg = off;
SET enable_seqscan = off;

EXPLAIN (COSTS OFF, verbose) SELECT count(*) FROM t1 GROUP BY z,x,y,w
order by w;

QUERY PLAN
------------------------------------------------------
GroupAggregate
Output: count(*), w, z, x, y
Group Key: t1.w, t1.x, t1.y, t1.z
-> Sort
Output: w, z, x, y
Sort Key: t1.w, t1.x, t1.y, t1.z
-> Index Scan using t1_x_y_idx on public.t1
Output: w, z, x, y
(8 rows)

if you do
` Sort Key: t1.w, t1.x, t1.y, t1.z`

then the output is supposed to be:

Output: w, x, y, z

#158

jian he

jian.universality@gmail.com

over 1 year ago

In reply to: jian he (#156)

Re: POC: GROUP BY optimization

On Wed, Apr 24, 2024 at 2:25 PM jian he <jian.universality@gmail.com> wrote:

hi.
I found an interesting case.

CREATE TABLE t1 AS
SELECT (i % 10)::numeric AS x,(i % 10)::int8 AS y,'abc' || i % 10 AS
z, i::int4 AS w
FROM generate_series(1, 100) AS i;
CREATE INDEX t1_x_y_idx ON t1 (x, y);
ANALYZE t1;
SET enable_hashagg = off;
SET enable_seqscan = off;

EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,y,w;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,w,y,z;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,w,y;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,w,z,y;
the above part will use:
-> Incremental Sort
Sort Key: x, $, $, $
Presorted Key: x
-> Index Scan using t1_x_y_idx on t1

We can make these cases also `Presorted Key: x, y`.

in
`if (path->pathkeys && !pathkeys_contained_in(path->pathkeys,
root->group_pathkeys))` branch
we can simple do
-                       infos = lappend(infos, info);
+                       infos = lcons(info, infos);

similar to what we did at plancat.c (search lcons).

get_useful_group_keys_orderings returns a list of PathKeyInfo,
then the caller function just iterates each element.
so for the caller, order of the returned list element from
get_useful_group_keys_orderings
does not matter.

for path Incremental Sort:
function make_ordered_path will return the same cost for different
numbers of presorted keys.

for example:
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,y,w;
make_ordered_path cost is same for:
`info->pathkeys: x,y,z,w`
`info->pathkeys:x,z,y,w`

if we arrange `info->pathkeys: x,y,z,w` before `info->pathkeys:x,z,y,w`
in get_useful_group_keys_orderings.
then with the same cost, we will choose the first one
(`info->pathkeys: x,y,z,w`),
if we use IncrementalSort, then we use `Presorted Key: x, y`.

#159

Andrei Lepikhov

a.lepikhov@postgrespro.ru

over 1 year ago

In reply to: jian he (#156)

Re: POC: GROUP BY optimization

On 24.04.2024 13:25, jian he wrote:

hi.
I found an interesting case.

CREATE TABLE t1 AS
SELECT (i % 10)::numeric AS x,(i % 10)::int8 AS y,'abc' || i % 10 AS
z, i::int4 AS w
FROM generate_series(1, 100) AS i;
CREATE INDEX t1_x_y_idx ON t1 (x, y);
ANALYZE t1;
SET enable_hashagg = off;
SET enable_seqscan = off;

EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,y,w;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,w,y,z;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,w,y;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,w,z,y;
the above part will use:
-> Incremental Sort
Sort Key: x, $, $, $
Presorted Key: x
-> Index Scan using t1_x_y_idx on t1

EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY z,y,w,x;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY w,y,z,x;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY y,z,x,w;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY y,w,x,z;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY y,x,z,w;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY y,x,w,z;

these will use:
-> Incremental Sort
Sort Key: x, y, $, $
Presorted Key: x, y
-> Index Scan using t1_x_y_idx on t1

I guess this is fine, but not optimal?

It looks like a bug right now - in current implementation we don't
differentiate different orders. So:
1. Applying all the patches from the thread which I proposed as an
answer to T.Lane last rebuke - does behavior still the same?.
2. Could you try to find the reason?

--
regards,
Andrei Lepikhov
Postgres Professional

#160

jian he

jian.universality@gmail.com

over 1 year ago

In reply to: Andrei Lepikhov (#159)

Re: POC: GROUP BY optimization

On Thu, May 16, 2024 at 3:47 PM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

On 24.04.2024 13:25, jian he wrote:

hi.
I found an interesting case.

CREATE TABLE t1 AS
SELECT (i % 10)::numeric AS x,(i % 10)::int8 AS y,'abc' || i % 10 AS
z, i::int4 AS w
FROM generate_series(1, 100) AS i;
CREATE INDEX t1_x_y_idx ON t1 (x, y);
ANALYZE t1;
SET enable_hashagg = off;
SET enable_seqscan = off;

EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,y,w;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,w,y,z;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,w,y;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,w,z,y;
the above part will use:
-> Incremental Sort
Sort Key: x, $, $, $
Presorted Key: x
-> Index Scan using t1_x_y_idx on t1

EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY z,y,w,x;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY w,y,z,x;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY y,z,x,w;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY y,w,x,z;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY y,x,z,w;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY y,x,w,z;

these will use:
-> Incremental Sort
Sort Key: x, y, $, $
Presorted Key: x, y
-> Index Scan using t1_x_y_idx on t1

I guess this is fine, but not optimal?

It looks like a bug right now - in current implementation we don't
differentiate different orders. So:
1. Applying all the patches from the thread which I proposed as an
answer to T.Lane last rebuke - does behavior still the same?.

I've applied these 4 patches in this thread.
final_improvements.patch
minor_comment.patch
get_useful_group_keys_orderings.patch
group_by_asymmetry.diff

The behavior is still the same as the master.
meaning that below quoted queries are still using "Presorted Key: x".

EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,y,w;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,w,y,z;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,w,y;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,w,z,y;
the above part will use:
-> Incremental Sort
Sort Key: x, $, $, $
Presorted Key: x
-> Index Scan using t1_x_y_idx on t1

2. Could you try to find the reason?

the following are my understanding, it may be wrong...

function get_useful_group_keys_orderings, may generate alternative
pathkeys and group clauses.
for different PathKeyInfo
sub functions within add_paths_to_grouping_rel:
make_ordered_path, create_agg_path will yield the same cost.
function add_path (within add_paths_to_grouping_rel) will add these
paths (different PathKeyInfos) in a *sequential* order.

then later in the function set_cheapest.
`foreach(p, parent_rel->pathlist)` will iterate these different paths
in a sequential order.
so in set_cheapest if 2 path costs are the same, then the first path
residing in parent_rel->pathlist wins.

fo a concrete example:
CREATE TABLE t1 AS
SELECT (i % 10)::numeric AS x,(i % 10)::int8 AS y,'abc' || i % 10 AS
z, i::float4 AS w FROM generate_series(1, 100) AS i;
CREATE INDEX t1_x_y_idx ON t1 (x, y);
ANALYZE t1;
SET enable_hashagg = off;
SET enable_seqscan = off;
EXPLAIN (COSTS OFF, verbose) SELECT count(*) FROM t1 GROUP BY x,z,y,w;

add_paths_to_grouping_rel will do the following in a sequential order.
A. generate and add original PathKeyInfo(groupby x,z,y,w)
B. generate and add alternative PathKeyInfo(groupby x,y,z,w). (because
of the index path)
C. can_hash is true, generate a HashAgg Path (because of
enable_hashagg is set to off, so this path the cost is very high)

As mentioned previously,
both A and B can use Incremental Sort, set_cheapest will choose the A
instead of B,
because A step generated path **first** satisfies increment sort.

#161

Andrei Lepikhov

a.lepikhov@postgrespro.ru

over 1 year ago

In reply to: jian he (#160)

Re: POC: GROUP BY optimization

On 20/5/2024 15:54, jian he wrote:

As mentioned previously,
both A and B can use Incremental Sort, set_cheapest will choose the A
instead of B,
because A step generated path **first** satisfies increment sort.

Yeah, I agree with your analysis.
Looking into the cost_incremental_sort, I see that we have ten
group_tuples. This value doesn't depend on how many presorted keys (1 or
2) we use. This is caused by default estimation.
Given the current circumstances, it seems unlikely that we can make any
significant changes without introducing a new sort cost model that
accounts for the number of sorted columns.

--
regards,
Andrei Lepikhov
Postgres Professional

#162

jian he

jian.universality@gmail.com

over 1 year ago

In reply to: jian he (#160)

Re: POC: GROUP BY optimization

On Mon, May 20, 2024 at 4:54 PM jian he <jian.universality@gmail.com> wrote:

The behavior is still the same as the master.
meaning that below quoted queries are still using "Presorted Key: x".

EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,y,w;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,w,y,z;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,w,y;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,w,z,y;
the above part will use:
-> Incremental Sort
Sort Key: x, $, $, $
Presorted Key: x
-> Index Scan using t1_x_y_idx on t1

2. Could you try to find the reason?

the following are my understanding, it may be wrong...

I think
my previous thread only explained that two paths have the same cost,
the planner chooses the one that was first added into the pathlist.

but didn't explain why Incremental Sort path, presorted by one key and
presorted by two keys
yield the same cost.
--------------------------------------------
if (rel->tuples > 0)
{
/*
* Clamp to size of rel, or size of rel / 10 if multiple Vars. The
* fudge factor is because the Vars are probably correlated but we
* don't know by how much. We should never clamp to less than the
* largest ndistinct value for any of the Vars, though, since
* there will surely be at least that many groups.
*/
double clamp = rel->tuples;

if (relvarcount > 1)
{
clamp *= 0.1;
if (clamp < relmaxndistinct)
{
clamp = relmaxndistinct;
/* for sanity in case some ndistinct is too large: */
if (clamp > rel->tuples)
clamp = rel->tuples;
}
}
if (reldistinct > clamp)
reldistinct = clamp;
...
}

i think,
the above code[0]https://git.postgresql.org/cgit/postgresql.git/tree/src/backend/utils/adt/selfuncs.c#n3669 snippet within function estimate_num_groups
makes Incremental Sort Path not pickup the optimal presorted keys.

see original problem thread: [1]/messages/by-id/CACJufxGt99nZ+nir+aB6pFQ=K8oNiHAQ3OELqSbGMqNxok8nxA@mail.gmail.com
----------------------------------------------------------------------------------------
CREATE TABLE t1 AS
SELECT (i % 10)::numeric AS x,(i % 10)::int8 AS y,'abc' || i % 10 AS
z, i::int4 AS w
FROM generate_series(1, 100) AS i;
CREATE INDEX t1_x_y_idx ON t1 (x, y);
ANALYZE t1;
SET enable_hashagg = off;
SET enable_seqscan = off;

EXPLAIN (COSTS OFF) SELECT count(*) FROM t1 GROUP BY x,z,y,w;
will generate 2 Incremental Sort path, one: "Presorted Key: x",
another one: "Presorted Key: x,y".
The first Incremental Sort path is added first.
The function estimate_num_groups returned value (input_groups) is the
main key to
calculate the cost of Incremental Sort path!

But here, the estimate_num_groups function returned the same
value: 10 for
"Presorted Key: x" and "Presorted Key: x,y".
then later cost_incremental_sort returns the same cost for "Presorted
Key: x" and "Presorted Key: x,y".

(line refers to src/backend/utils/adt/selfuncs line number).
why "Presorted Key: x,y" return 10 in estimate_num_groups:
line 3667 assign clamp to 100.0, because rel->tuples is 100.
line 3671 clamp *= 0.1; make clamp because 10.
line 3680, 3681 `if (reldistinct > clamp) branch` make reldistinct
from 100 to 10.
line 3733 make `numdistinct *= reldistinct;` makes numdistinct because 10.

If I change the total number of rows in a relation, or change the
distinct number of y values
then the planner will use "Incremental Sort Presorted Key: x,y".
for example:

CREATE TABLE t10 AS
SELECT (i % 10)::numeric AS x,(i % 11)::int8 AS y,'abc' || i % 10
AS z, i::float4 AS w FROM generate_series(1, 1E2) AS i;
CREATE INDEX t10_x_y_idx ON t10 (x, y);
ANALYZE t10;

The above setup will make the following query using "Incremental Sort
Presorted Key: x,y".
EXPLAIN (COSTS OFF) SELECT count(*) FROM t10 GROUP BY x,z,y,w;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t10 GROUP BY x,w,y,z;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t10 GROUP BY x,z,w,y;
EXPLAIN (COSTS OFF) SELECT count(*) FROM t10 GROUP BY x,w,z,y;

summary:
* the regression test setup at [2]https://git.postgresql.org/cgit/postgresql.git/tree/src/test/regress/sql/aggregates.sql#n1194 can make some cases not pickup the
best optimal
Incremental Sort path.

we use this test in src/test/regress/sql/aggregates.sql
-- Engage incremental sort
EXPLAIN (COSTS OFF)
SELECT count(*) FROM btg GROUP BY z, y, w, x;

but if we use:
EXPLAIN (COSTS OFF)
SELECT count(*) FROM btg GROUP BY x, z, w, y;
then the plan is not the best.

* The regression test setup makes estimate_num_groups code logic
return the same value.
therefore make Incremental Sort presort by one key, two keys yield the
same cost.
the cost_incremental_sort will return the same cost.

* https://git.postgresql.org/cgit/postgresql.git/tree/src/test/regress/sql/aggregates.sql#n1194
maybe we can change:

CREATE TABLE btg AS SELECT
i % 10 AS x,
i % 10 AS y,
'abc' || i % 10 AS z,
i AS w
FROM generate_series(1, 100) AS i;

CREATE TABLE btg AS SELECT
i % 10 AS x,
i % 10 AS y,
'abc' || i % 10 AS z,
i AS w
FROM generate_series(1, 1000) AS i;

[0]: https://git.postgresql.org/cgit/postgresql.git/tree/src/backend/utils/adt/selfuncs.c#n3669
[1]: /messages/by-id/CACJufxGt99nZ+nir+aB6pFQ=K8oNiHAQ3OELqSbGMqNxok8nxA@mail.gmail.com
[2]: https://git.postgresql.org/cgit/postgresql.git/tree/src/test/regress/sql/aggregates.sql#n1194

#163

Alexander Korotkov

aekorotkov@gmail.com

over 1 year ago

In reply to: Alexander Korotkov (#155)

4 attachment(s)

Re: POC: GROUP BY optimization

Hi!

On Tue, Apr 23, 2024 at 1:19 AM Alexander Korotkov <aekorotkov@gmail.com> wrote:

On Thu, Apr 18, 2024 at 1:57 PM Alexander Korotkov <aekorotkov@gmail.com> wrote:

Thank you for the fixes you've proposed. I didn't look much into
details yet, but I think the main concern Tom expressed in [1] is
whether the feature is reasonable at all. I think at this stage the
most important thing is to come up with convincing examples showing
how huge performance benefits it could cause. I will return to this
later today and will try to provide some convincing examples.

I took a fresh look at 0452b461b, and have the following thoughts:
1) Previously, preprocess_groupclause() tries to reorder GROUP BY
clauses to match the required ORDER BY order. It only reorders if
GROUP BY pathkeys are the prefix of ORDER BY pathkeys or vice versa.
So, both of them need to be satisfied by one ordering. 0452b461b also
tries to match GROUP BY clauses to ORDER BY clauses, but takes into
account an incremental sort. Actually, instead of removing
preprocess_groupclause(), we could just teach it to take incremental
sort into account.
2) The get_useful_group_keys_orderings() function takes into account 3
orderings of pathkeys and clauses: original order as written in GROUP
BY, matching ORDER BY clauses as much as possible, and matching the
input path as much as possible. Given that even before 0452b461b,
preprocess_groupclause() could change the original order of GROUP BY
clauses, so we don't need to distinguish the first two. We could just
consider output of new preprocess_groupclause() taking into account an
incremental sort and the ordering matching the input path as much as
possible. This seems like significant simplification.

Let me also describe my thoughts about the justification of the
feature itself. As Tom pointed upthread, Sort + Grouping is generally
unlikely faster than Hash Aggregate. The main point of this feature
is being able to match the order of GROUP BY clauses to the order of
the input path. That input path could be Index Scan or Subquery.
Let's concentrate on Index Scan. Index Scan can give us the required
order, so we can do grouping without Sort or with significantly
cheaper Incremental Sort. That could be much faster than Hash
Aggregate. But if we scan the full table (or its significant
fraction), Index Scan is typically much more expensive than Sequential
Scan because of random page access. However, there are cases when
Index Scan could be cheaper.
1) If the heap row is wide and the index contains all the required
columns, Index Only Scan can be cheaper than Sequential Scan because
of lesser volume.
2) If the query predicate is selective enough and matches the index,
Index Scan might be significantly cheaper. One may say that if the
query predicate is selective enough then there are not that many
matching rows, so aggregating them either way isn't a big deal anyway.
However, given that data volumes are growing tremendously, it's not
hard to imagine that the index selected a small fraction of table
rows, but they are still enough to be costly for aggregating.
Therefore, I think there are significant cases where matching GROUP BY
clauses to the order of the input path could give a substantial
improvement over Hash Aggregate.

While there are some particular use-cases by Jian He, I hope that
above could give some rationale.

I've assembled patches in this thread into one patchset.
0001 The patch fixing asymmetry in setting EquivalenceClass.ec_sortref
by Andrei [1]. I've revised comments and wrote the commit message.
0002 The patch for handling duplicates of SortGroupClause. I didn't
get the sense of Andrei implementation. It seems to care about
duplicate pointers in group clauses list. But the question is the
equal SortGroupClause's comprising different pointers. I think we
should group duplicate SortGroupClause's together as
preprocess_groupclause() used to do. Reimplemented patch to do so.
0003 Rename PathKeyInfo to GroupByOrdering by Andres [3]. I only
revised comments and wrote the commit message.
0004 Turn back preprocess_groupclause() for the reason I described upthread [4].

Any thoughts?

Links.
1. /messages/by-id/17037754-f187-4138-8285-0e2bfebd0dea@postgrespro.ru
2. /messages/by-id/a663f0f6-cbf6-49aa-af2e-234dc6768a07@postgrespro.ru
3. /messages/by-id/db0fc3a4-966c-4cec-a136-94024d39212d@postgrespro.ru
4. /messages/by-id/CAPpHfdvyWLMGwvxaf=7KAp-z-4mxbSH8ti2f6mNOQv5metZFzg@mail.gmail.com

------
Regards,
Alexander Korotkov
Supabase

Attachments:

v2-0003-Rename-PathKeyInfo-to-GroupByOrdering.patchapplication/octet-stream; name=v2-0003-Rename-PathKeyInfo-to-GroupByOrdering.patchDownload

From 67c4ab916acf94a136179107f6d8a84834758445 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Mon, 27 May 2024 14:57:43 +0300
Subject: [PATCH v2 3/4] Rename PathKeyInfo to GroupByOrdering

0452b461bc made optimizer explore alternative orderings of group-by pathkeys.
The PathKeyInfo data structure was used to store the particular ordering of
group-by pathkeys and corresponding clauses.  It turns out that PathKeyInfo
is not the best name for that purpose.  This commit renames this data structure
to GroupByOrdering, and revises its comment.

Discussion: https://postgr.es/m/db0fc3a4-966c-4cec-a136-94024d39212d%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/pathkeys.c | 18 +++++++++---------
 src/backend/optimizer/plan/planner.c  |  8 ++++----
 src/include/nodes/pathnodes.h         | 13 ++++++++++---
 src/tools/pgindent/typedefs.list      |  2 +-
 4 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index e8ff2297697..85bfa080680 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -462,7 +462,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 /*
  * pathkeys_are_duplicate
  *		Check if give pathkeys are already contained the list of
- *		PathKeyInfo's.
+ *		GroupByOrdering's.
  */
 static bool
 pathkeys_are_duplicate(List *infos, List *pathkeys)
@@ -471,7 +471,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
 
 	foreach(lc, infos)
 	{
-		PathKeyInfo *info = lfirst_node(PathKeyInfo, lc);
+		GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
 
 		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
 			return true;
@@ -483,7 +483,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
  *
- * Returns a list of PathKeyInfo items, each representing an interesting
+ * Returns a list of GroupByOrdering items, each representing an interesting
  * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
  * matching order.
  *
@@ -498,13 +498,13 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 {
 	Query	   *parse = root->parse;
 	List	   *infos = NIL;
-	PathKeyInfo *info;
+	GroupByOrdering *info;
 
 	List	   *pathkeys = root->group_pathkeys;
 	List	   *clauses = root->processed_groupClause;
 
 	/* always return at least the original pathkeys/clauses */
-	info = makeNode(PathKeyInfo);
+	info = makeNode(GroupByOrdering);
 	info->pathkeys = pathkeys;
 	info->clauses = clauses;
 	infos = lappend(infos, info);
@@ -540,7 +540,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -565,7 +565,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -575,7 +575,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 #ifdef USE_ASSERT_CHECKING
 	{
-		PathKeyInfo *pinfo = linitial_node(PathKeyInfo, infos);
+		GroupByOrdering *pinfo = linitial_node(GroupByOrdering, infos);
 		ListCell   *lc;
 
 		/* Test consistency of info structures */
@@ -584,7 +584,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			ListCell   *lc1,
 					   *lc2;
 
-			info = lfirst_node(PathKeyInfo, lc);
+			info = lfirst_node(GroupByOrdering, lc);
 
 			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
 			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index e723f72db64..5739275aa70 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6917,7 +6917,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -6998,7 +6998,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				/* process all potentially interesting grouping reorderings */
 				foreach(lc2, pathkey_orderings)
 				{
-					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+					GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 					/* restore the path (we replace it in the loop) */
 					path = path_save;
@@ -7249,7 +7249,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -7305,7 +7305,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 
 				/* restore the path (we replace it in the loop) */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 14ef296ab72..78489398294 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1468,14 +1468,21 @@ typedef struct PathKey
 } PathKey;
 
 /*
- * Combines the information about pathkeys and the associated clauses.
+ * Contains an order of group-by clauses and the corresponding list of
+ * pathkeys.
+ *
+ * The elements of 'clauses' list should have the same order as the head of
+ * 'pathkeys' list.  The tleSortGroupRef of the clause should be equal to
+ * ec_sortref of the pathkey equivalence class.  If there are redundant
+ * clauses with the same tleSortGroupRef, they must be grouped together.
  */
-typedef struct PathKeyInfo
+typedef struct GroupByOrdering
 {
 	NodeTag		type;
+
 	List	   *pathkeys;
 	List	   *clauses;
-} PathKeyInfo;
+} GroupByOrdering;
 
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index d427a1c16a5..4f57078d133 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1056,6 +1056,7 @@ GrantRoleStmt
 GrantStmt
 GrantTargetType
 Group
+GroupByOrdering
 GroupClause
 GroupPath
 GroupPathExtraData
@@ -2067,7 +2068,6 @@ PathClauseUsage
 PathCostComparison
 PathHashStack
 PathKey
-PathKeyInfo
 PathKeysComparison
 PathTarget
 PatternInfo
-- 
2.39.3 (Apple Git-145)

v2-0004-Restore-preprocess_groupclause.patchapplication/octet-stream; name=v2-0004-Restore-preprocess_groupclause.patchDownload

From e182d34d9128b9628bcb2c4b2df3868b65ae241b Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:51:31 +0300
Subject: [PATCH v2 4/4] Restore preprocess_groupclause()

0452b461bc made optimizer explore alternative orderings of group-by pathkeys.
It eliminated preprocess_groupclause(), which was intended to match items
between GROUP BY and ORDER BY.  Instead, get_useful_group_keys_orderings()
function generates orderings of GROUP BY elements at the time of grouping
paths generation.  The get_useful_group_keys_orderings() function takes into
account 3 orderings of GROUP BY pathkeys and clauses: original order as written
in GROUP BY, matching ORDER BY clauses as much as possible, and matching the
input path as much as possible.  Given that even before 0452b461b,
preprocess_groupclause() could change the original order of GROUP BY clauses
we don't need to consider it apart from ordering matching ORDER BY clauses.

This commit restores preprocess_groupclause() to provide an ordering of
GROUP BY elements matching ORDER BY before generation of paths.  The new
version of preprocess_groupclause() takes into account an incremental sort.
The get_useful_group_keys_orderings() function now takes into 2 orderings of
GROUP BY elements: the order generated preprocess_groupclause() and the order
matching the input path as much as possible.

Discussion: https://postgr.es/m/CAPpHfdvyWLMGwvxaf%3D7KAp-z-4mxbSH8ti2f6mNOQv5metZFzg%40mail.gmail.com
Author: Alexander Korotkov
---
 src/backend/optimizer/path/pathkeys.c         |  55 +--------
 src/backend/optimizer/plan/planner.c          | 108 +++++++++++++++---
 src/include/nodes/pathnodes.h                 |   6 +-
 .../regress/expected/partition_aggregate.out  |   6 +-
 4 files changed, 108 insertions(+), 67 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 85bfa080680..301a4b1ba99 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -459,26 +459,6 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	return n;
 }
 
-/*
- * pathkeys_are_duplicate
- *		Check if give pathkeys are already contained the list of
- *		GroupByOrdering's.
- */
-static bool
-pathkeys_are_duplicate(List *infos, List *pathkeys)
-{
-	ListCell   *lc;
-
-	foreach(lc, infos)
-	{
-		GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
-
-		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
-			return true;
-	}
-	return false;
-}
-
 /*
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
@@ -487,11 +467,11 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
  * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
  * matching order.
  *
- * The function considers (and keeps) multiple GROUP BY orderings:
+ * The function considers (and keeps) following GROUP BY orderings:
  *
- * - the original ordering, as specified by the GROUP BY clause,
- * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
- * - GROUP BY keys to match target ORDER BY clause (as much as possible).
+ * - GROUP BY keys as ordered by preprocess_groupclause() to match target
+ *   ORDER BY clause (as much as possible),
+ * - GROUP BY keys reordered to match 'path' ordering (as much as possible).
  */
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
@@ -538,32 +518,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 		if (n > 0 &&
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
-			!pathkeys_are_duplicate(infos, pathkeys))
-		{
-			info = makeNode(GroupByOrdering);
-			info->pathkeys = pathkeys;
-			info->clauses = clauses;
-
-			infos = lappend(infos, info);
-		}
-	}
-
-	/*
-	 * Try reordering pathkeys to minimize the sort cost (this time consider
-	 * the ORDER BY clause).
-	 */
-	if (root->sort_pathkeys &&
-		!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
-	{
-		int			n;
-
-		n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
-										   &clauses,
-										   root->num_groupby_pathkeys);
-
-		if (n > 0 &&
-			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
-			!pathkeys_are_duplicate(infos, pathkeys))
+			compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL)
 		{
 			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 5739275aa70..c4306b9b9ba 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -137,7 +137,7 @@ static double preprocess_limit(PlannerInfo *root,
 							   double tuple_fraction,
 							   int64 *offset_est, int64 *count_est);
 static void remove_useless_groupby_columns(PlannerInfo *root);
-static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
+static List *preprocess_groupclause(PlannerInfo *root, List *force);
 static List *extract_rollup_sets(List *groupingSets);
 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
 static void standard_qp_callback(PlannerInfo *root, void *extra);
@@ -1422,7 +1422,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 		else if (parse->groupClause)
 		{
 			/* Preprocess regular GROUP BY clause, if any */
-			root->processed_groupClause = list_copy(parse->groupClause);
+			root->processed_groupClause = preprocess_groupclause(root, NIL);
 			/* Remove any redundant GROUP BY columns */
 			remove_useless_groupby_columns(root);
 		}
@@ -2169,7 +2169,7 @@ preprocess_grouping_sets(PlannerInfo *root)
 		 * The groupClauses for hashed grouping sets are built later on.)
 		 */
 		if (gs->set)
-			rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
+			rollup->groupClause = preprocess_groupclause(root, gs->set);
 		else
 			rollup->groupClause = NIL;
 
@@ -2821,24 +2821,106 @@ remove_useless_groupby_columns(PlannerInfo *root)
 }
 
 /*
- * groupclause_apply_groupingset
- *		Apply the order of GROUP BY clauses defined by grouping sets.  Items
- *		not in the grouping set are skipped.
+ * preprocess_groupclause - do preparatory work on GROUP BY clause
+ *
+ * The idea here is to adjust the ordering of the GROUP BY elements
+ * (which in itself is semantically insignificant) to match ORDER BY,
+ * thereby allowing a single sort operation to both implement the ORDER BY
+ * requirement and set up for a Unique step that implements GROUP BY.
+ * We also consider partial match between GROUP BY and ORDER BY elements,
+ * which could allow to implement ORDER BY using the incremental sort.
+ *
+ * We also consider other orderings of the GROUP BY elements, which could
+ * match the sort ordering of other possible plans (eg an indexscan) and
+ * thereby reduce cost.  This is implemented during the generation of grouping
+ * paths.  See get_useful_group_keys_orderings() for details.
+ *
+ * Note: we need no comparable processing of the distinctClause because
+ * the parser already enforced that that matches ORDER BY.
+ *
+ * Note: we return a fresh List, but its elements are the same
+ * SortGroupClauses appearing in parse->groupClause.  This is important
+ * because later processing may modify the processed_groupClause list.
+ *
+ * For grouping sets, the order of items is instead forced to agree with that
+ * of the grouping set (and items not in the grouping set are skipped). The
+ * work of sorting the order of grouping set elements to match the ORDER BY if
+ * possible is done elsewhere.
  */
 static List *
-groupclause_apply_groupingset(PlannerInfo *root, List *gset)
+preprocess_groupclause(PlannerInfo *root, List *force)
 {
 	Query	   *parse = root->parse;
 	List	   *new_groupclause = NIL;
 	ListCell   *sl;
+	ListCell   *gl;
 
-	foreach(sl, gset)
+	/* For grouping sets, we need to force the ordering */
+	if (force)
 	{
-		Index		ref = lfirst_int(sl);
-		SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
+		foreach(sl, force)
+		{
+			Index		ref = lfirst_int(sl);
+			SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
+
+			new_groupclause = lappend(new_groupclause, cl);
+		}
 
-		new_groupclause = lappend(new_groupclause, cl);
+		return new_groupclause;
 	}
+
+	/* If no ORDER BY, nothing useful to do here */
+	if (parse->sortClause == NIL)
+		return list_copy(parse->groupClause);
+
+	/*
+	 * Scan the ORDER BY clause and construct a list of matching GROUP BY
+	 * items, but only as far as we can make a matching prefix.
+	 *
+	 * This code assumes that the sortClause contains no duplicate items.
+	 */
+	foreach(sl, parse->sortClause)
+	{
+		SortGroupClause *sc = lfirst_node(SortGroupClause, sl);
+
+		foreach(gl, parse->groupClause)
+		{
+			SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
+
+			if (equal(gc, sc))
+			{
+				new_groupclause = lappend(new_groupclause, gc);
+				break;
+			}
+		}
+		if (gl == NULL)
+			break;				/* no match, so stop scanning */
+	}
+
+
+	/* If no match at all, no point in reordering GROUP BY */
+	if (new_groupclause == NIL)
+		return list_copy(parse->groupClause);
+
+	/*
+	 * Add any remaining GROUP BY items to the new list.  We don't require a
+	 * complete match, because even partial match allows ORDER BY to be
+	 * implemented using incremental sort.  Also, give up if there are any
+	 * non-sortable GROUP BY items, since then there's no hope anyway.
+	 */
+	foreach(gl, parse->groupClause)
+	{
+		SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
+
+		if (list_member_ptr(new_groupclause, gc))
+			continue;			/* it matched an ORDER BY item */
+		if (!OidIsValid(gc->sortop))	/* give up, GROUP BY can't be sorted */
+			return list_copy(parse->groupClause);
+		new_groupclause = lappend(new_groupclause, gc);
+	}
+
+	/* Success --- install the rearranged GROUP BY list */
+	Assert(list_length(parse->groupClause) == list_length(new_groupclause));
 	return new_groupclause;
 }
 
@@ -4170,7 +4252,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 			{
 				rollup = makeNode(RollupData);
 
-				rollup->groupClause = groupclause_apply_groupingset(root, gset);
+				rollup->groupClause = preprocess_groupclause(root, gset);
 				rollup->gsets_data = list_make1(gs);
 				rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
 														 rollup->gsets_data,
@@ -4359,7 +4441,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 
 			Assert(gs->set != NIL);
 
-			rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
+			rollup->groupClause = preprocess_groupclause(root, gs->set);
 			rollup->gsets_data = list_make1(gs);
 			rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
 													 rollup->gsets_data,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 78489398294..af8de4213de 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -426,7 +426,11 @@ struct PlannerInfo
 	 * items to be proven redundant, implying that there is only one group
 	 * containing all the query's rows.  Hence, if you want to check whether
 	 * GROUP BY was specified, test for nonempty parse->groupClause, not for
-	 * nonempty processed_groupClause.
+	 * nonempty processed_groupClause.  Optimiser chooses specific order of
+	 * group-by clauses during the upper paths generation process, attempting
+	 * to use different strategies to minimize number of sorts or engage
+	 * incremental sort.  See preprocess_groupclause() and
+	 * get_useful_group_keys_orderings() for details.
 	 *
 	 * Currently, when grouping sets are specified we do not attempt to
 	 * optimize the groupClause, so that processed_groupClause will be
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index 1b900fddf8e..5f2c0cf5786 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -901,15 +901,15 @@ SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAV
    Sort Key: pagg_tab_m.a, pagg_tab_m.c, (sum(pagg_tab_m.b))
    ->  Append
          ->  HashAggregate
-               Group Key: ((pagg_tab_m.a + pagg_tab_m.b) / 2), pagg_tab_m.c, pagg_tab_m.a
+               Group Key: pagg_tab_m.a, pagg_tab_m.c, ((pagg_tab_m.a + pagg_tab_m.b) / 2)
                Filter: ((sum(pagg_tab_m.b) = 50) AND (avg(pagg_tab_m.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p1 pagg_tab_m
          ->  HashAggregate
-               Group Key: ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2), pagg_tab_m_1.c, pagg_tab_m_1.a
+               Group Key: pagg_tab_m_1.a, pagg_tab_m_1.c, ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2)
                Filter: ((sum(pagg_tab_m_1.b) = 50) AND (avg(pagg_tab_m_1.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p2 pagg_tab_m_1
          ->  HashAggregate
-               Group Key: ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2), pagg_tab_m_2.c, pagg_tab_m_2.a
+               Group Key: pagg_tab_m_2.a, pagg_tab_m_2.c, ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2)
                Filter: ((sum(pagg_tab_m_2.b) = 50) AND (avg(pagg_tab_m_2.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p3 pagg_tab_m_2
 (15 rows)
-- 
2.39.3 (Apple Git-145)

v2-0002-Teach-group_keys_reorder_by_pathkeys-about-redund.patchapplication/octet-stream; name=v2-0002-Teach-group_keys_reorder_by_pathkeys-about-redund.patchDownload

From b5ea90e50faf8a7a8fc2329a35da5f66fbe3d35b Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:13:11 +0300
Subject: [PATCH v2 2/4] Teach group_keys_reorder_by_pathkeys() about redundant
 SortGroupClause's

We don't have strict guarantees that there shouldn't be redundant
SortGroupClause's.  Although there is no confirmed way to generate
duplicate SortGroupClause's, this commit teaches
group_keys_reorder_by_pathkeys() to pull all the SortGroupClause matching to
pathkey at once.

This commit also introduces checking invariants of generated orderings
in get_useful_group_keys_orderings for assert-enabled builds.

Discussion: https://postgr.es/m/a663f0f6-cbf6-49aa-af2e-234dc6768a07%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/pathkeys.c | 69 +++++++++++++++++++++------
 1 file changed, 55 insertions(+), 14 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 7858a56fae0..e8ff2297697 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -401,7 +401,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	foreach(lc, pathkeys)
 	{
 		PathKey    *pathkey = (PathKey *) lfirst(lc);
-		SortGroupClause *sgc;
+		bool		found = false;
 
 		/*
 		 * Pathkeys are built in a way that allows simply comparing pointers.
@@ -416,28 +416,40 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 		/*
 		 * Since 1349d27 pathkey coming from underlying node can be in the
 		 * root->group_pathkeys but not in the processed_groupClause. So, we
-		 * should be careful here.
+		 * should be careful here.  Also, there could be redundant
+		 * SortGroupClause's.  So, we need to pull of all the matching
+		 * SortGroupClause's, but ensure there is at least one.
 		 */
-		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
-											*group_clauses);
-		if (!sgc)
-			/* The grouping clause does not cover this pathkey */
-			break;
+		foreach_ptr(SortGroupClause, sgc, *group_clauses)
+		{
+			if (sgc->tleSortGroupRef == pathkey->pk_eclass->ec_sortref)
+			{
+				/*
+				 * Sort group clause should have an ordering operator as long
+				 * as there is an associated pathkey.
+				 */
+				Assert(OidIsValid(sgc->sortop));
 
-		/*
-		 * Sort group clause should have an ordering operator as long as there
-		 * is an associated pathkey.
-		 */
-		Assert(OidIsValid(sgc->sortop));
+				new_group_clauses = lappend(new_group_clauses, sgc);
+				found = true;
+			}
+		}
+
+		/* Check if the grouping clause does not cover this pathkey */
+		if (!found)
+			break;
 
 		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
-		new_group_clauses = lappend(new_group_clauses, sgc);
+
 	}
 
 	/* remember the number of pathkeys with a matching GROUP BY key */
 	n = list_length(new_group_pathkeys);
 
-	/* append the remaining group pathkeys (will be treated as not sorted) */
+	/*
+	 * Append the remaining group pathkeys (will be treated as not sorted) and
+	 * grouping clauses.
+	 */
 	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
 											 *group_pathkeys);
 	*group_clauses = list_concat_unique_ptr(new_group_clauses,
@@ -561,6 +573,35 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 		}
 	}
 
+#ifdef USE_ASSERT_CHECKING
+	{
+		PathKeyInfo *pinfo = linitial_node(PathKeyInfo, infos);
+		ListCell   *lc;
+
+		/* Test consistency of info structures */
+		for_each_from(lc, infos, 1)
+		{
+			ListCell   *lc1,
+					   *lc2;
+
+			info = lfirst_node(PathKeyInfo, lc);
+
+			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
+			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
+			Assert(list_difference(info->clauses, pinfo->clauses) == NIL);
+			Assert(list_difference_ptr(info->pathkeys, pinfo->pathkeys) == NIL);
+
+			forboth(lc1, info->clauses, lc2, info->pathkeys)
+			{
+				SortGroupClause *sgc = lfirst_node(SortGroupClause, lc1);
+				PathKey    *pk = lfirst_node(PathKey, lc2);
+
+				if (pk->pk_eclass->ec_sortref != sgc->tleSortGroupRef)
+					elog(ERROR, "Order of group-by clauses doesn't correspond incoming sort order");
+			}
+		}
+	}
+#endif
 	return infos;
 }
 
-- 
2.39.3 (Apple Git-145)

v2-0001-Fix-asymmetry-in-setting-EquivalenceClass.ec_sort.patchapplication/octet-stream; name=v2-0001-Fix-asymmetry-in-setting-EquivalenceClass.ec_sort.patchDownload

From 4e57d213a9619d04b527ee24810a6daf45fe028d Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:10:34 +0300
Subject: [PATCH v2 1/4] Fix asymmetry in setting EquivalenceClass.ec_sortref

0452b461bc made get_eclass_for_sort_expr() always set
EquivalenceClass.ec_sortref if it's not done yet.  This leads to an asymmetric
situation when whoever first looks for the EquivalenceClass sets the
ec_sortref.  It is also counterintuitive that get_eclass_for_sort_expr()
performs modification of data structures.

This commit makes make_pathkeys_for_sortclauses_extended() responsible for
setting EquivalenceClass.ec_sortref.  Now we set the
EquivalenceClass.ec_sortref's needed to explore alternative GROUP BY ordering
specifically during turning GROUP BY clauses into pathkeys.

Discussion: https://postgr.es/m/17037754-f187-4138-8285-0e2bfebd0dea%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/equivclass.c  | 13 +------
 src/backend/optimizer/path/pathkeys.c    | 18 ++++++++-
 src/backend/optimizer/plan/planner.c     | 16 ++++++--
 src/include/optimizer/paths.h            |  3 +-
 src/test/regress/expected/aggregates.out | 47 ++++++++++++++++++++++++
 src/test/regress/sql/aggregates.sql      | 14 +++++++
 6 files changed, 92 insertions(+), 19 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 21ce1ae2e13..51d806326eb 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,18 +652,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-			{
-				/*
-				 * Match!
-				 *
-				 * Copy the sortref if it wasn't set yet.  That may happen if
-				 * the ec was constructed from a WHERE clause, i.e. it doesn't
-				 * have a target reference at all.
-				 */
-				if (cur_ec->ec_sortref == 0 && sortref > 0)
-					cur_ec->ec_sortref = sortref;
-				return cur_ec;
-			}
+				return cur_ec;	/* Match! */
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 8b258cbef92..7858a56fae0 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1355,7 +1355,8 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 													&sortclauses,
 													tlist,
 													false,
-													&sortable);
+													&sortable,
+													false);
 	/* It's caller error if not all clauses were sortable */
 	Assert(sortable);
 	return result;
@@ -1379,13 +1380,17 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
  * to remove any clauses that can be proven redundant via the eclass logic.
  * Even though we'll have to hash in that case, we might as well not hash
  * redundant columns.)
+ *
+ * If set_ec_sortref is true, then the ec_sortref field of the pathkey's
+ * EquivalenceClass is set, if not initialized beforehand.
  */
 List *
 make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 									   List **sortclauses,
 									   List *tlist,
 									   bool remove_redundant,
-									   bool *sortable)
+									   bool *sortable,
+									   bool set_ec_sortref)
 {
 	List	   *pathkeys = NIL;
 	ListCell   *l;
@@ -1409,6 +1414,15 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 										   sortcl->nulls_first,
 										   sortcl->tleSortGroupRef,
 										   true);
+		if (pathkey->pk_eclass->ec_sortref == 0 && set_ec_sortref)
+		{
+			/*
+			 * Copy the sortref if it wasn't set yet.  That may happen if the
+			 * ec was constructed from a WHERE clause, i.e. it doesn't have a
+			 * target reference at all.
+			 */
+			pathkey->pk_eclass->ec_sortref = sortcl->tleSortGroupRef;
+		}
 
 		/* Canonical form eliminates redundant ordering keys */
 		if (!pathkey_is_redundant(pathkey, pathkeys))
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 032818423f6..e723f72db64 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3395,12 +3395,17 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 		 */
 		bool		sortable;
 
+		/*
+		 * Covert group clauses into pathkeys.  Set the ec_sortref field of
+		 * the EquivalenceClass'es if it's not set yet.
+		 */
 		root->group_pathkeys =
 			make_pathkeys_for_sortclauses_extended(root,
 												   &root->processed_groupClause,
 												   tlist,
 												   true,
-												   &sortable);
+												   &sortable,
+												   true);
 		if (!sortable)
 		{
 			/* Can't sort; no point in considering aggregate ordering either */
@@ -3450,7 +3455,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &root->processed_distinctClause,
 												   tlist,
 												   true,
-												   &sortable);
+												   &sortable,
+												   false);
 		if (!sortable)
 			root->distinct_pathkeys = NIL;
 	}
@@ -3476,7 +3482,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &groupClauses,
 												   tlist,
 												   false,
-												   &sortable);
+												   &sortable,
+												   false);
 		if (!sortable)
 			root->setop_pathkeys = NIL;
 	}
@@ -6061,7 +6068,8 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
 																 &wc->partitionClause,
 																 tlist,
 																 true,
-																 &sortable);
+																 &sortable,
+																 false);
 
 		Assert(sortable);
 	}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 914d9bdef58..5e88c0224a4 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -239,7 +239,8 @@ extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 													List **sortclauses,
 													List *tlist,
 													bool remove_redundant,
-													bool *sortable);
+													bool *sortable,
+													bool set_ec_sortref);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
 											RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 2442342e9d1..1c1ca7573ad 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2917,6 +2917,53 @@ GROUP BY c1.w, c1.z;
  5.0000000000000000
 (2 rows)
 
+-- Pathkeys, built in a subtree, can be used to optimize GROUP-BY clause
+-- ordering.  Also, here we check that it doesn't depend on the initial clause
+-- order in the GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c1.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c1.x, c1.y
+         Presorted Key: c1.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c2.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c2.x, c1.y
+         Presorted Key: c2.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 61a3424c845..1a18ca3d8fe 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1263,6 +1263,20 @@ SELECT avg(c1.f ORDER BY c1.x, c1.y)
 FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
 GROUP BY c1.w, c1.z;
 
+-- Pathkeys, built in a subtree, can be used to optimize GROUP-BY clause
+-- ordering.  Also, here we check that it doesn't depend on the initial clause
+-- order in the GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
-- 
2.39.3 (Apple Git-145)

#164

Andrei Lepikhov

a.lepikhov@postgrespro.ru

over 1 year ago

In reply to: Alexander Korotkov (#163)

About 0001:,Having overviewed it, I don't see any issues (but I'm the author), except grammatical ones - but I'm not a native to judge it.,Also, the sentence 'turning GROUP BY clauses into pathkeys' is unclear to me. It may be better to write something like: 'building pathkeys by the list of grouping clauses'.,,0002:,The part under USE_ASSERT_CHECKING looks good to me. But the code in group_keys_reorder_by_pathkeys looks suspicious: of course, we do some doubtful work without any possible way to reproduce, but if we envision some duplicated elements in the group_clauses, we should avoid usage of the list_concat_unique_ptr. What's more, why do you not exit from foreach_ptr immediately after SortGroupClause has been found? I think the new_group_clauses should be consistent with the new_group_pathkeys.,,0003:,Looks good,,0004:,I was also thinking about reintroducing the preprocess_groupclause because with the re-arrangement of GROUP-BY clauses according to incoming pathkeys, it doesn't make sense to have a user-defined order—at least while cost_sort doesn't differ costs for alternative column orderings.,So, I'm okay with the code. But why don't you use the same approach with foreach_ptr as before?

On 5/27/24 19:41, Alexander Korotkov wrote:

On Tue, Apr 23, 2024 at 1:19 AM Alexander Korotkov <aekorotkov@gmail.com> wrote:

While there are some particular use-cases by Jian He, I hope that
above could give some rationale.

I've assembled patches in this thread into one patchset.
0001 The patch fixing asymmetry in setting EquivalenceClass.ec_sortref
by Andrei [1]. I've revised comments and wrote the commit message.
0002 The patch for handling duplicates of SortGroupClause. I didn't
get the sense of Andrei implementation. It seems to care about
duplicate pointers in group clauses list. But the question is the
equal SortGroupClause's comprising different pointers. I think we
should group duplicate SortGroupClause's together as
preprocess_groupclause() used to do. Reimplemented patch to do so.
0003 Rename PathKeyInfo to GroupByOrdering by Andres [3]. I only
revised comments and wrote the commit message.
0004 Turn back preprocess_groupclause() for the reason I described upthread [4].

Any thoughts?

About 0001:
Having overviewed it, I don't see any issues (but I'm the author),
except grammatical ones - but I'm not a native to judge it.
Also, the sentence 'turning GROUP BY clauses into pathkeys' is unclear
to me. It may be better to write something like: 'building pathkeys by
the list of grouping clauses'.

0002:
The part under USE_ASSERT_CHECKING looks good to me. But the code in
group_keys_reorder_by_pathkeys looks suspicious: of course, we do some
doubtful work without any possible way to reproduce, but if we envision
some duplicated elements in the group_clauses, we should avoid usage of
the list_concat_unique_ptr. What's more, why do you not exit from
foreach_ptr immediately after SortGroupClause has been found? I think
the new_group_clauses should be consistent with the new_group_pathkeys.

0003:
Looks good

0004:
I was also thinking about reintroducing the preprocess_groupclause
because with the re-arrangement of GROUP-BY clauses according to
incoming pathkeys, it doesn't make sense to have a user-defined order—at
least while cost_sort doesn't differ costs for alternative column orderings.
So, I'm okay with the code. But why don't you use the same approach with
foreach_ptr as before?

--
regards,
Andrei Lepikhov
Postgres Professional

#165

Alexander Korotkov

aekorotkov@gmail.com

over 1 year ago

In reply to: Andrei Lepikhov (#164)

Re: About 0001:,Having overviewed it, I don't see any issues (but I'm the author), except grammatical ones - but I'm not a native to judge it.,Also, the sentence 'turning GROUP BY clauses into pathkeys' is unclear to me. It may be better to write something like: 'building pathkeys by the list of grouping clauses'.,,0002:,The part under USE_ASSERT_CHECKING looks good to me. But the code in group_keys_reorder_by_pathkeys looks suspicious: of course, we do some doubtful work without any possible way to reproduce, but if we envision some duplicated elements in the group_clauses, we should avoid usage of the list_concat_unique_ptr. What's more, why do you not exit from foreach_ptr immediately after SortGroupClause has been found? I think the new_group_clauses should be consistent with the new_group_pathkeys.,,0003:,Looks good,,0004:,I was also thinking about reintroducing the preprocess_groupclause because with the re-arrangement of GROUP-BY clauses according to incoming pathkeys, it d...

Hi, Andrei!

Thank you for your feedback.

On Wed, May 29, 2024 at 11:08 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

On 5/27/24 19:41, Alexander Korotkov wrote:

On Tue, Apr 23, 2024 at 1:19 AM Alexander Korotkov <aekorotkov@gmail.com> wrote:

While there are some particular use-cases by Jian He, I hope that
above could give some rationale.

I've assembled patches in this thread into one patchset.
0001 The patch fixing asymmetry in setting EquivalenceClass.ec_sortref
by Andrei [1]. I've revised comments and wrote the commit message.
0002 The patch for handling duplicates of SortGroupClause. I didn't
get the sense of Andrei implementation. It seems to care about
duplicate pointers in group clauses list. But the question is the
equal SortGroupClause's comprising different pointers. I think we
should group duplicate SortGroupClause's together as
preprocess_groupclause() used to do. Reimplemented patch to do so.
0003 Rename PathKeyInfo to GroupByOrdering by Andres [3]. I only
revised comments and wrote the commit message.
0004 Turn back preprocess_groupclause() for the reason I described upthread [4].

Any thoughts?

About 0001:
Having overviewed it, I don't see any issues (but I'm the author),
except grammatical ones - but I'm not a native to judge it.
Also, the sentence 'turning GROUP BY clauses into pathkeys' is unclear
to me. It may be better to write something like: 'building pathkeys by
the list of grouping clauses'.

OK, thank you. I'll run once again for the grammar issues.

0002:
The part under USE_ASSERT_CHECKING looks good to me. But the code in
group_keys_reorder_by_pathkeys looks suspicious: of course, we do some
doubtful work without any possible way to reproduce, but if we envision
some duplicated elements in the group_clauses, we should avoid usage of
the list_concat_unique_ptr.

As I understand Tom, there is a risk that clauses list may contain
multiple instances of equivalent SortGroupClause, not duplicate
pointers.

What's more, why do you not exit from
foreach_ptr immediately after SortGroupClause has been found? I think
the new_group_clauses should be consistent with the new_group_pathkeys.

I wanted this to be consistent with preprocess_groupclause(), where
duplicate SortGroupClause'es are grouped together. Otherwise, we
could just delete redundant SortGroupClause'es.

0003:
Looks good

0004:
I was also thinking about reintroducing the preprocess_groupclause
because with the re-arrangement of GROUP-BY clauses according to
incoming pathkeys, it doesn't make sense to have a user-defined order—at
least while cost_sort doesn't differ costs for alternative column orderings.
So, I'm okay with the code. But why don't you use the same approach with
foreach_ptr as before?

I restored the function as it was before 0452b461bc with minimal edits
to support the incremental sort. I think it would be more valuable to
keep the difference with pg16 code small rather than refactor to
simplify existing code.

------
Regards,
Alexander Korotkov

#166

Andrei Lepikhov

a.lepikhov@postgrespro.ru

over 1 year ago

In reply to: Alexander Korotkov (#165)

Re: POC: GROUP BY optimization

On 5/29/24 19:53, Alexander Korotkov wrote:

Hi, Andrei!

Thank you for your feedback.

On Wed, May 29, 2024 at 11:08 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

On 5/27/24 19:41, Alexander Korotkov wrote:

Any thoughts?

About 0001:
Having overviewed it, I don't see any issues (but I'm the author),
except grammatical ones - but I'm not a native to judge it.
Also, the sentence 'turning GROUP BY clauses into pathkeys' is unclear
to me. It may be better to write something like: 'building pathkeys by
the list of grouping clauses'.

OK, thank you. I'll run once again for the grammar issues.

0002:
The part under USE_ASSERT_CHECKING looks good to me. But the code in
group_keys_reorder_by_pathkeys looks suspicious: of course, we do some
doubtful work without any possible way to reproduce, but if we envision
some duplicated elements in the group_clauses, we should avoid usage of
the list_concat_unique_ptr.

As I understand Tom, there is a risk that clauses list may contain
multiple instances of equivalent SortGroupClause, not duplicate
pointers.

Maybe. I just implemented the worst-case scenario with the intention of
maximum safety. So, it's up to you.

What's more, why do you not exit from
foreach_ptr immediately after SortGroupClause has been found? I think
the new_group_clauses should be consistent with the new_group_pathkeys.

I wanted this to be consistent with preprocess_groupclause(), where
duplicate SortGroupClause'es are grouped together. Otherwise, we
could just delete redundant SortGroupClause'es.

Hm, preprocess_groupclause is called before the standard_qp_callback
forms the 'canonical form' of group_pathkeys and such behaviour needed.
But the code that chooses the reordering strategy uses already processed
grouping clauses, where we don't have duplicates in the first
num_groupby_pathkeys elements, do we?

0004:
I was also thinking about reintroducing the preprocess_groupclause
because with the re-arrangement of GROUP-BY clauses according to
incoming pathkeys, it doesn't make sense to have a user-defined order—at
least while cost_sort doesn't differ costs for alternative column orderings.
So, I'm okay with the code. But why don't you use the same approach with
foreach_ptr as before?

I restored the function as it was before 0452b461bc with minimal edits
to support the incremental sort. I think it would be more valuable to
keep the difference with pg16 code small rather than refactor to
simplify existing code.

Got it

--
regards,
Andrei Lepikhov
Postgres Professional

#167

Alexander Korotkov

aekorotkov@gmail.com

over 1 year ago

In reply to: Andrei Lepikhov (#166)

5 attachment(s)

Re: POC: GROUP BY optimization

Hi!

On Thu, May 30, 2024 at 7:22 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

On 5/29/24 19:53, Alexander Korotkov wrote:

Thank you for your feedback.

On Wed, May 29, 2024 at 11:08 AM Andrei Lepikhov
<a.lepikhov@postgrespro.ru> wrote:

On 5/27/24 19:41, Alexander Korotkov wrote:

Any thoughts?

About 0001:
Having overviewed it, I don't see any issues (but I'm the author),
except grammatical ones - but I'm not a native to judge it.
Also, the sentence 'turning GROUP BY clauses into pathkeys' is unclear
to me. It may be better to write something like: 'building pathkeys by
the list of grouping clauses'.

OK, thank you. I'll run once again for the grammar issues.

I've revised some grammar including the sentence you've proposed.

0002:
The part under USE_ASSERT_CHECKING looks good to me. But the code in
group_keys_reorder_by_pathkeys looks suspicious: of course, we do some
doubtful work without any possible way to reproduce, but if we envision
some duplicated elements in the group_clauses, we should avoid usage of
the list_concat_unique_ptr.

As I understand Tom, there is a risk that clauses list may contain
multiple instances of equivalent SortGroupClause, not duplicate
pointers.

Maybe. I just implemented the worst-case scenario with the intention of
maximum safety. So, it's up to you.

What's more, why do you not exit from
foreach_ptr immediately after SortGroupClause has been found? I think
the new_group_clauses should be consistent with the new_group_pathkeys.

I wanted this to be consistent with preprocess_groupclause(), where
duplicate SortGroupClause'es are grouped together. Otherwise, we
could just delete redundant SortGroupClause'es.

Hm, preprocess_groupclause is called before the standard_qp_callback
forms the 'canonical form' of group_pathkeys and such behaviour needed.
But the code that chooses the reordering strategy uses already processed
grouping clauses, where we don't have duplicates in the first
num_groupby_pathkeys elements, do we?

Yep, it seems that we work with group clauses which were processed by
standard_qp_callback(). In turn standard_qp_callback() called
make_pathkeys_for_sortclauses_extended() with remove_redundant ==
true. So, there shouldn't be duplicates. And it's unclear what
should we be protected from.

I leaved 0002 with just asserts. And extracted questionable changes into 0005.

------
Regards,
Alexander Korotkov
Supabase

Attachments:

v3-0004-Restore-preprocess_groupclause.patchapplication/octet-stream; name=v3-0004-Restore-preprocess_groupclause.patchDownload

From c1b657dbb5cbdb19004ba6ab93757f1762d305c2 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:51:31 +0300
Subject: [PATCH v3 4/5] Restore preprocess_groupclause()

0452b461bc made optimizer explore alternative orderings of group-by pathkeys.
It eliminated preprocess_groupclause(), which was intended to match items
between GROUP BY and ORDER BY.  Instead, get_useful_group_keys_orderings()
function generates orderings of GROUP BY elements at the time of grouping
paths generation.  The get_useful_group_keys_orderings() function takes into
account 3 orderings of GROUP BY pathkeys and clauses: original order as written
in GROUP BY, matching ORDER BY clauses as much as possible, and matching the
input path as much as possible.  Given that even before 0452b461b,
preprocess_groupclause() could change the original order of GROUP BY clauses
we don't need to consider it apart from ordering matching ORDER BY clauses.

This commit restores preprocess_groupclause() to provide an ordering of
GROUP BY elements matching ORDER BY before generation of paths.  The new
version of preprocess_groupclause() takes into account an incremental sort.
The get_useful_group_keys_orderings() function now takes into 2 orderings of
GROUP BY elements: the order generated preprocess_groupclause() and the order
matching the input path as much as possible.

Discussion: https://postgr.es/m/CAPpHfdvyWLMGwvxaf%3D7KAp-z-4mxbSH8ti2f6mNOQv5metZFzg%40mail.gmail.com
Author: Alexander Korotkov
---
 src/backend/optimizer/path/pathkeys.c         |  55 +--------
 src/backend/optimizer/plan/planner.c          | 108 +++++++++++++++---
 src/include/nodes/pathnodes.h                 |   6 +-
 .../regress/expected/partition_aggregate.out  |   6 +-
 4 files changed, 108 insertions(+), 67 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 1cc8b5da49a..4287811abe4 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -447,26 +447,6 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	return n;
 }
 
-/*
- * pathkeys_are_duplicate
- *		Check if give pathkeys are already contained the list of
- *		GroupByOrdering's.
- */
-static bool
-pathkeys_are_duplicate(List *infos, List *pathkeys)
-{
-	ListCell   *lc;
-
-	foreach(lc, infos)
-	{
-		GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
-
-		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
-			return true;
-	}
-	return false;
-}
-
 /*
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
@@ -475,11 +455,11 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
  * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
  * matching order.
  *
- * The function considers (and keeps) multiple GROUP BY orderings:
+ * The function considers (and keeps) following GROUP BY orderings:
  *
- * - the original ordering, as specified by the GROUP BY clause,
- * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
- * - GROUP BY keys to match target ORDER BY clause (as much as possible).
+ * - GROUP BY keys as ordered by preprocess_groupclause() to match target
+ *   ORDER BY clause (as much as possible),
+ * - GROUP BY keys reordered to match 'path' ordering (as much as possible).
  */
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
@@ -526,32 +506,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 		if (n > 0 &&
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
-			!pathkeys_are_duplicate(infos, pathkeys))
-		{
-			info = makeNode(GroupByOrdering);
-			info->pathkeys = pathkeys;
-			info->clauses = clauses;
-
-			infos = lappend(infos, info);
-		}
-	}
-
-	/*
-	 * Try reordering pathkeys to minimize the sort cost (this time consider
-	 * the ORDER BY clause).
-	 */
-	if (root->sort_pathkeys &&
-		!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
-	{
-		int			n;
-
-		n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
-										   &clauses,
-										   root->num_groupby_pathkeys);
-
-		if (n > 0 &&
-			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
-			!pathkeys_are_duplicate(infos, pathkeys))
+			compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL)
 		{
 			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1eeedc121ed..d6a1fb8e7d5 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -137,7 +137,7 @@ static double preprocess_limit(PlannerInfo *root,
 							   double tuple_fraction,
 							   int64 *offset_est, int64 *count_est);
 static void remove_useless_groupby_columns(PlannerInfo *root);
-static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
+static List *preprocess_groupclause(PlannerInfo *root, List *force);
 static List *extract_rollup_sets(List *groupingSets);
 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
 static void standard_qp_callback(PlannerInfo *root, void *extra);
@@ -1422,7 +1422,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 		else if (parse->groupClause)
 		{
 			/* Preprocess regular GROUP BY clause, if any */
-			root->processed_groupClause = list_copy(parse->groupClause);
+			root->processed_groupClause = preprocess_groupclause(root, NIL);
 			/* Remove any redundant GROUP BY columns */
 			remove_useless_groupby_columns(root);
 		}
@@ -2169,7 +2169,7 @@ preprocess_grouping_sets(PlannerInfo *root)
 		 * The groupClauses for hashed grouping sets are built later on.)
 		 */
 		if (gs->set)
-			rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
+			rollup->groupClause = preprocess_groupclause(root, gs->set);
 		else
 			rollup->groupClause = NIL;
 
@@ -2821,24 +2821,106 @@ remove_useless_groupby_columns(PlannerInfo *root)
 }
 
 /*
- * groupclause_apply_groupingset
- *		Apply the order of GROUP BY clauses defined by grouping sets.  Items
- *		not in the grouping set are skipped.
+ * preprocess_groupclause - do preparatory work on GROUP BY clause
+ *
+ * The idea here is to adjust the ordering of the GROUP BY elements
+ * (which in itself is semantically insignificant) to match ORDER BY,
+ * thereby allowing a single sort operation to both implement the ORDER BY
+ * requirement and set up for a Unique step that implements GROUP BY.
+ * We also consider partial match between GROUP BY and ORDER BY elements,
+ * which could allow to implement ORDER BY using the incremental sort.
+ *
+ * We also consider other orderings of the GROUP BY elements, which could
+ * match the sort ordering of other possible plans (eg an indexscan) and
+ * thereby reduce cost.  This is implemented during the generation of grouping
+ * paths.  See get_useful_group_keys_orderings() for details.
+ *
+ * Note: we need no comparable processing of the distinctClause because
+ * the parser already enforced that that matches ORDER BY.
+ *
+ * Note: we return a fresh List, but its elements are the same
+ * SortGroupClauses appearing in parse->groupClause.  This is important
+ * because later processing may modify the processed_groupClause list.
+ *
+ * For grouping sets, the order of items is instead forced to agree with that
+ * of the grouping set (and items not in the grouping set are skipped). The
+ * work of sorting the order of grouping set elements to match the ORDER BY if
+ * possible is done elsewhere.
  */
 static List *
-groupclause_apply_groupingset(PlannerInfo *root, List *gset)
+preprocess_groupclause(PlannerInfo *root, List *force)
 {
 	Query	   *parse = root->parse;
 	List	   *new_groupclause = NIL;
 	ListCell   *sl;
+	ListCell   *gl;
 
-	foreach(sl, gset)
+	/* For grouping sets, we need to force the ordering */
+	if (force)
 	{
-		Index		ref = lfirst_int(sl);
-		SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
+		foreach(sl, force)
+		{
+			Index		ref = lfirst_int(sl);
+			SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
+
+			new_groupclause = lappend(new_groupclause, cl);
+		}
 
-		new_groupclause = lappend(new_groupclause, cl);
+		return new_groupclause;
 	}
+
+	/* If no ORDER BY, nothing useful to do here */
+	if (parse->sortClause == NIL)
+		return list_copy(parse->groupClause);
+
+	/*
+	 * Scan the ORDER BY clause and construct a list of matching GROUP BY
+	 * items, but only as far as we can make a matching prefix.
+	 *
+	 * This code assumes that the sortClause contains no duplicate items.
+	 */
+	foreach(sl, parse->sortClause)
+	{
+		SortGroupClause *sc = lfirst_node(SortGroupClause, sl);
+
+		foreach(gl, parse->groupClause)
+		{
+			SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
+
+			if (equal(gc, sc))
+			{
+				new_groupclause = lappend(new_groupclause, gc);
+				break;
+			}
+		}
+		if (gl == NULL)
+			break;				/* no match, so stop scanning */
+	}
+
+
+	/* If no match at all, no point in reordering GROUP BY */
+	if (new_groupclause == NIL)
+		return list_copy(parse->groupClause);
+
+	/*
+	 * Add any remaining GROUP BY items to the new list.  We don't require a
+	 * complete match, because even partial match allows ORDER BY to be
+	 * implemented using incremental sort.  Also, give up if there are any
+	 * non-sortable GROUP BY items, since then there's no hope anyway.
+	 */
+	foreach(gl, parse->groupClause)
+	{
+		SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
+
+		if (list_member_ptr(new_groupclause, gc))
+			continue;			/* it matched an ORDER BY item */
+		if (!OidIsValid(gc->sortop))	/* give up, GROUP BY can't be sorted */
+			return list_copy(parse->groupClause);
+		new_groupclause = lappend(new_groupclause, gc);
+	}
+
+	/* Success --- install the rearranged GROUP BY list */
+	Assert(list_length(parse->groupClause) == list_length(new_groupclause));
 	return new_groupclause;
 }
 
@@ -4170,7 +4252,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 			{
 				rollup = makeNode(RollupData);
 
-				rollup->groupClause = groupclause_apply_groupingset(root, gset);
+				rollup->groupClause = preprocess_groupclause(root, gset);
 				rollup->gsets_data = list_make1(gs);
 				rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
 														 rollup->gsets_data,
@@ -4359,7 +4441,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 
 			Assert(gs->set != NIL);
 
-			rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
+			rollup->groupClause = preprocess_groupclause(root, gs->set);
 			rollup->gsets_data = list_make1(gs);
 			rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
 													 rollup->gsets_data,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 78489398294..af8de4213de 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -426,7 +426,11 @@ struct PlannerInfo
 	 * items to be proven redundant, implying that there is only one group
 	 * containing all the query's rows.  Hence, if you want to check whether
 	 * GROUP BY was specified, test for nonempty parse->groupClause, not for
-	 * nonempty processed_groupClause.
+	 * nonempty processed_groupClause.  Optimiser chooses specific order of
+	 * group-by clauses during the upper paths generation process, attempting
+	 * to use different strategies to minimize number of sorts or engage
+	 * incremental sort.  See preprocess_groupclause() and
+	 * get_useful_group_keys_orderings() for details.
 	 *
 	 * Currently, when grouping sets are specified we do not attempt to
 	 * optimize the groupClause, so that processed_groupClause will be
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index 1b900fddf8e..5f2c0cf5786 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -901,15 +901,15 @@ SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAV
    Sort Key: pagg_tab_m.a, pagg_tab_m.c, (sum(pagg_tab_m.b))
    ->  Append
          ->  HashAggregate
-               Group Key: ((pagg_tab_m.a + pagg_tab_m.b) / 2), pagg_tab_m.c, pagg_tab_m.a
+               Group Key: pagg_tab_m.a, pagg_tab_m.c, ((pagg_tab_m.a + pagg_tab_m.b) / 2)
                Filter: ((sum(pagg_tab_m.b) = 50) AND (avg(pagg_tab_m.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p1 pagg_tab_m
          ->  HashAggregate
-               Group Key: ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2), pagg_tab_m_1.c, pagg_tab_m_1.a
+               Group Key: pagg_tab_m_1.a, pagg_tab_m_1.c, ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2)
                Filter: ((sum(pagg_tab_m_1.b) = 50) AND (avg(pagg_tab_m_1.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p2 pagg_tab_m_1
          ->  HashAggregate
-               Group Key: ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2), pagg_tab_m_2.c, pagg_tab_m_2.a
+               Group Key: pagg_tab_m_2.a, pagg_tab_m_2.c, ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2)
                Filter: ((sum(pagg_tab_m_2.b) = 50) AND (avg(pagg_tab_m_2.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p3 pagg_tab_m_2
 (15 rows)
-- 
2.39.3 (Apple Git-145)

v3-0005-Teach-group_keys_reorder_by_pathkeys-about-redund.patchapplication/octet-stream; name=v3-0005-Teach-group_keys_reorder_by_pathkeys-about-redund.patchDownload

From d76a85b614f5d27c603af812799104a006076cbe Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:13:11 +0300
Subject: [PATCH v3 5/5] Teach group_keys_reorder_by_pathkeys() about redundant
 SortGroupClause's

We don't have strict guarantees that there shouldn't be redundant
SortGroupClause's.  Although there is no confirmed way to generate
duplicate SortGroupClause's, this commit teaches
group_keys_reorder_by_pathkeys() to pull all the SortGroupClause matching to
pathkey at once.

Discussion: https://postgr.es/m/a663f0f6-cbf6-49aa-af2e-234dc6768a07%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/pathkeys.c | 41 ++++++++++++++++++---------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 4287811abe4..6ae595781d8 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -401,7 +401,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	foreach(lc, pathkeys)
 	{
 		PathKey    *pathkey = (PathKey *) lfirst(lc);
-		SortGroupClause *sgc;
+		bool		found = false;
 
 		/*
 		 * Pathkeys are built in a way that allows simply comparing pointers.
@@ -416,28 +416,40 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 		/*
 		 * Since 1349d27 pathkey coming from underlying node can be in the
 		 * root->group_pathkeys but not in the processed_groupClause. So, we
-		 * should be careful here.
+		 * should be careful here.  Also, there could be redundant
+		 * SortGroupClause's.  So, we need to pull of all the matching
+		 * SortGroupClause's, but ensure there is at least one.
 		 */
-		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
-											*group_clauses);
-		if (!sgc)
-			/* The grouping clause does not cover this pathkey */
-			break;
+		foreach_ptr(SortGroupClause, sgc, *group_clauses)
+		{
+			if (sgc->tleSortGroupRef == pathkey->pk_eclass->ec_sortref)
+			{
+				/*
+				 * Sort group clause should have an ordering operator as long
+				 * as there is an associated pathkey.
+				 */
+				Assert(OidIsValid(sgc->sortop));
 
-		/*
-		 * Sort group clause should have an ordering operator as long as there
-		 * is an associated pathkey.
-		 */
-		Assert(OidIsValid(sgc->sortop));
+				new_group_clauses = lappend(new_group_clauses, sgc);
+				found = true;
+			}
+		}
+
+		/* Check if the grouping clause does not cover this pathkey */
+		if (!found)
+			break;
 
 		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
-		new_group_clauses = lappend(new_group_clauses, sgc);
+
 	}
 
 	/* remember the number of pathkeys with a matching GROUP BY key */
 	n = list_length(new_group_pathkeys);
 
-	/* append the remaining group pathkeys (will be treated as not sorted) */
+	/*
+	 * Append the remaining group pathkeys (will be treated as not sorted) and
+	 * grouping clauses.
+	 */
 	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
 											 *group_pathkeys);
 	*group_clauses = list_concat_unique_ptr(new_group_clauses,
@@ -545,6 +557,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 		}
 	}
 #endif
+
 	return infos;
 }
 
-- 
2.39.3 (Apple Git-145)

v3-0002-Add-invariants-check-to-get_useful_group_keys_ord.patchapplication/octet-stream; name=v3-0002-Add-invariants-check-to-get_useful_group_keys_ord.patchDownload

From b77abf6c882778321ca10a63b11a5d789518fc01 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 30 May 2024 22:38:53 +0300
Subject: [PATCH v3 2/5] Add invariants check to
 get_useful_group_keys_orderings()

This commit introduces invariants checking of generated orderings
in get_useful_group_keys_orderings for assert-enabled builds.

Discussion: https://postgr.es/m/a663f0f6-cbf6-49aa-af2e-234dc6768a07%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/pathkeys.c | 29 +++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 837e612121f..4e13fdcb23c 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -561,6 +561,35 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 		}
 	}
 
+#ifdef USE_ASSERT_CHECKING
+	{
+		PathKeyInfo *pinfo = linitial_node(PathKeyInfo, infos);
+		ListCell   *lc;
+
+		/* Test consistency of info structures */
+		for_each_from(lc, infos, 1)
+		{
+			ListCell   *lc1,
+					   *lc2;
+
+			info = lfirst_node(PathKeyInfo, lc);
+
+			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
+			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
+			Assert(list_difference(info->clauses, pinfo->clauses) == NIL);
+			Assert(list_difference_ptr(info->pathkeys, pinfo->pathkeys) == NIL);
+
+			forboth(lc1, info->clauses, lc2, info->pathkeys)
+			{
+				SortGroupClause *sgc = lfirst_node(SortGroupClause, lc1);
+				PathKey    *pk = lfirst_node(PathKey, lc2);
+
+				if (pk->pk_eclass->ec_sortref != sgc->tleSortGroupRef)
+					elog(ERROR, "Order of group-by clauses doesn't correspond incoming sort order");
+			}
+		}
+	}
+#endif
 	return infos;
 }
 
-- 
2.39.3 (Apple Git-145)

v3-0003-Rename-PathKeyInfo-to-GroupByOrdering.patchapplication/octet-stream; name=v3-0003-Rename-PathKeyInfo-to-GroupByOrdering.patchDownload

From c805a94738e51d5ee21ad51fe1ed52c595e99758 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Mon, 27 May 2024 14:57:43 +0300
Subject: [PATCH v3 3/5] Rename PathKeyInfo to GroupByOrdering

0452b461bc made optimizer explore alternative orderings of group-by pathkeys.
The PathKeyInfo data structure was used to store the particular ordering of
group-by pathkeys and corresponding clauses.  It turns out that PathKeyInfo
is not the best name for that purpose.  This commit renames this data structure
to GroupByOrdering, and revises its comment.

Discussion: https://postgr.es/m/db0fc3a4-966c-4cec-a136-94024d39212d%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/pathkeys.c | 18 +++++++++---------
 src/backend/optimizer/plan/planner.c  |  8 ++++----
 src/include/nodes/pathnodes.h         | 13 ++++++++++---
 src/tools/pgindent/typedefs.list      |  2 +-
 4 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 4e13fdcb23c..1cc8b5da49a 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -450,7 +450,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 /*
  * pathkeys_are_duplicate
  *		Check if give pathkeys are already contained the list of
- *		PathKeyInfo's.
+ *		GroupByOrdering's.
  */
 static bool
 pathkeys_are_duplicate(List *infos, List *pathkeys)
@@ -459,7 +459,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
 
 	foreach(lc, infos)
 	{
-		PathKeyInfo *info = lfirst_node(PathKeyInfo, lc);
+		GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
 
 		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
 			return true;
@@ -471,7 +471,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
  *
- * Returns a list of PathKeyInfo items, each representing an interesting
+ * Returns a list of GroupByOrdering items, each representing an interesting
  * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
  * matching order.
  *
@@ -486,13 +486,13 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 {
 	Query	   *parse = root->parse;
 	List	   *infos = NIL;
-	PathKeyInfo *info;
+	GroupByOrdering *info;
 
 	List	   *pathkeys = root->group_pathkeys;
 	List	   *clauses = root->processed_groupClause;
 
 	/* always return at least the original pathkeys/clauses */
-	info = makeNode(PathKeyInfo);
+	info = makeNode(GroupByOrdering);
 	info->pathkeys = pathkeys;
 	info->clauses = clauses;
 	infos = lappend(infos, info);
@@ -528,7 +528,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -553,7 +553,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -563,7 +563,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 #ifdef USE_ASSERT_CHECKING
 	{
-		PathKeyInfo *pinfo = linitial_node(PathKeyInfo, infos);
+		GroupByOrdering *pinfo = linitial_node(GroupByOrdering, infos);
 		ListCell   *lc;
 
 		/* Test consistency of info structures */
@@ -572,7 +572,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			ListCell   *lc1,
 					   *lc2;
 
-			info = lfirst_node(PathKeyInfo, lc);
+			info = lfirst_node(GroupByOrdering, lc);
 
 			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
 			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 669903b9059..1eeedc121ed 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6917,7 +6917,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -6998,7 +6998,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				/* process all potentially interesting grouping reorderings */
 				foreach(lc2, pathkey_orderings)
 				{
-					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+					GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 					/* restore the path (we replace it in the loop) */
 					path = path_save;
@@ -7249,7 +7249,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -7305,7 +7305,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 
 				/* restore the path (we replace it in the loop) */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 14ef296ab72..78489398294 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1468,14 +1468,21 @@ typedef struct PathKey
 } PathKey;
 
 /*
- * Combines the information about pathkeys and the associated clauses.
+ * Contains an order of group-by clauses and the corresponding list of
+ * pathkeys.
+ *
+ * The elements of 'clauses' list should have the same order as the head of
+ * 'pathkeys' list.  The tleSortGroupRef of the clause should be equal to
+ * ec_sortref of the pathkey equivalence class.  If there are redundant
+ * clauses with the same tleSortGroupRef, they must be grouped together.
  */
-typedef struct PathKeyInfo
+typedef struct GroupByOrdering
 {
 	NodeTag		type;
+
 	List	   *pathkeys;
 	List	   *clauses;
-} PathKeyInfo;
+} GroupByOrdering;
 
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index d427a1c16a5..4f57078d133 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1056,6 +1056,7 @@ GrantRoleStmt
 GrantStmt
 GrantTargetType
 Group
+GroupByOrdering
 GroupClause
 GroupPath
 GroupPathExtraData
@@ -2067,7 +2068,6 @@ PathClauseUsage
 PathCostComparison
 PathHashStack
 PathKey
-PathKeyInfo
 PathKeysComparison
 PathTarget
 PatternInfo
-- 
2.39.3 (Apple Git-145)

v3-0001-Fix-asymmetry-in-setting-EquivalenceClass.ec_sort.patchapplication/octet-stream; name=v3-0001-Fix-asymmetry-in-setting-EquivalenceClass.ec_sort.patchDownload

From eef9b88438eb4c3cc40d98f4367a5c6b4ec4a920 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:10:34 +0300
Subject: [PATCH v3 1/5] Fix asymmetry in setting EquivalenceClass.ec_sortref

0452b461bc made get_eclass_for_sort_expr() always set
EquivalenceClass.ec_sortref if it's not done yet.  This leads to an asymmetric
situation when whoever first looks for the EquivalenceClass sets the
ec_sortref.  It is also counterintuitive that get_eclass_for_sort_expr()
performs modification of data structures.

This commit makes make_pathkeys_for_sortclauses_extended() responsible for
setting EquivalenceClass.ec_sortref.  Now we set the
EquivalenceClass.ec_sortref's needed to explore alternative GROUP BY ordering
specifically during building pathkeys by the list of grouping clauses.

Discussion: https://postgr.es/m/17037754-f187-4138-8285-0e2bfebd0dea%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/equivclass.c  | 13 +------
 src/backend/optimizer/path/pathkeys.c    | 18 ++++++++-
 src/backend/optimizer/plan/planner.c     | 16 ++++++--
 src/include/optimizer/paths.h            |  3 +-
 src/test/regress/expected/aggregates.out | 47 ++++++++++++++++++++++++
 src/test/regress/sql/aggregates.sql      | 14 +++++++
 6 files changed, 92 insertions(+), 19 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 21ce1ae2e13..51d806326eb 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,18 +652,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-			{
-				/*
-				 * Match!
-				 *
-				 * Copy the sortref if it wasn't set yet.  That may happen if
-				 * the ec was constructed from a WHERE clause, i.e. it doesn't
-				 * have a target reference at all.
-				 */
-				if (cur_ec->ec_sortref == 0 && sortref > 0)
-					cur_ec->ec_sortref = sortref;
-				return cur_ec;
-			}
+				return cur_ec;	/* Match! */
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 8b258cbef92..837e612121f 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1355,7 +1355,8 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 													&sortclauses,
 													tlist,
 													false,
-													&sortable);
+													&sortable,
+													false);
 	/* It's caller error if not all clauses were sortable */
 	Assert(sortable);
 	return result;
@@ -1379,13 +1380,17 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
  * to remove any clauses that can be proven redundant via the eclass logic.
  * Even though we'll have to hash in that case, we might as well not hash
  * redundant columns.)
+ *
+ * If set_ec_sortref is true then sets the uninitialized value of ec_sortref
+ * of the pathkey's EquivalenceClass.
  */
 List *
 make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 									   List **sortclauses,
 									   List *tlist,
 									   bool remove_redundant,
-									   bool *sortable)
+									   bool *sortable,
+									   bool set_ec_sortref)
 {
 	List	   *pathkeys = NIL;
 	ListCell   *l;
@@ -1409,6 +1414,15 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 										   sortcl->nulls_first,
 										   sortcl->tleSortGroupRef,
 										   true);
+		if (pathkey->pk_eclass->ec_sortref == 0 && set_ec_sortref)
+		{
+			/*
+			 * Copy the sortref if it wasn't set yet.  That may happen if the
+			 * EquivalenceClass was constructed from a WHERE clause, i.e. it
+			 * doesn't have a target reference at all.
+			 */
+			pathkey->pk_eclass->ec_sortref = sortcl->tleSortGroupRef;
+		}
 
 		/* Canonical form eliminates redundant ordering keys */
 		if (!pathkey_is_redundant(pathkey, pathkeys))
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 032818423f6..669903b9059 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3395,12 +3395,17 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 		 */
 		bool		sortable;
 
+		/*
+		 * Covert group clauses into pathkeys.  Set the ec_sortref field of
+		 * EquivalenceClass'es if it's not set yet.
+		 */
 		root->group_pathkeys =
 			make_pathkeys_for_sortclauses_extended(root,
 												   &root->processed_groupClause,
 												   tlist,
 												   true,
-												   &sortable);
+												   &sortable,
+												   true);
 		if (!sortable)
 		{
 			/* Can't sort; no point in considering aggregate ordering either */
@@ -3450,7 +3455,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &root->processed_distinctClause,
 												   tlist,
 												   true,
-												   &sortable);
+												   &sortable,
+												   false);
 		if (!sortable)
 			root->distinct_pathkeys = NIL;
 	}
@@ -3476,7 +3482,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &groupClauses,
 												   tlist,
 												   false,
-												   &sortable);
+												   &sortable,
+												   false);
 		if (!sortable)
 			root->setop_pathkeys = NIL;
 	}
@@ -6061,7 +6068,8 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
 																 &wc->partitionClause,
 																 tlist,
 																 true,
-																 &sortable);
+																 &sortable,
+																 false);
 
 		Assert(sortable);
 	}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 914d9bdef58..5e88c0224a4 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -239,7 +239,8 @@ extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 													List **sortclauses,
 													List *tlist,
 													bool remove_redundant,
-													bool *sortable);
+													bool *sortable,
+													bool set_ec_sortref);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
 											RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 2442342e9d1..1c1ca7573ad 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2917,6 +2917,53 @@ GROUP BY c1.w, c1.z;
  5.0000000000000000
 (2 rows)
 
+-- Pathkeys, built in a subtree, can be used to optimize GROUP-BY clause
+-- ordering.  Also, here we check that it doesn't depend on the initial clause
+-- order in the GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c1.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c1.x, c1.y
+         Presorted Key: c1.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c2.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c2.x, c1.y
+         Presorted Key: c2.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 61a3424c845..1a18ca3d8fe 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1263,6 +1263,20 @@ SELECT avg(c1.f ORDER BY c1.x, c1.y)
 FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
 GROUP BY c1.w, c1.z;
 
+-- Pathkeys, built in a subtree, can be used to optimize GROUP-BY clause
+-- ordering.  Also, here we check that it doesn't depend on the initial clause
+-- order in the GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
-- 
2.39.3 (Apple Git-145)

#168

jian he

jian.universality@gmail.com

over 1 year ago

In reply to: Alexander Korotkov (#167)

1 attachment(s)

Re: POC: GROUP BY optimization

On Fri, May 31, 2024 at 8:12 AM Alexander Korotkov <aekorotkov@gmail.com> wrote:

I've revised some grammar including the sentence you've proposed.

-static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
+static List *preprocess_groupclause(PlannerInfo *root, List *force);

changing preprocess_groupclause the second argument
from "force" to "gset" would be more intuitive, I think.

`elog(ERROR, "Order of group-by clauses doesn't correspond incoming
sort order");`

I think this error message makes people wonder what "incoming sort order" is.
BTW, "correspond", generally people use "correspond to".

I did some minor cosmetic changes, mainly changing foreach to foreach_node.
Please check the attachment.

Attachments:

minor_change_on_v3_Restore-preprocess_groupclause.difftext/x-patch; charset=US-ASCII; name=minor_change_on_v3_Restore-preprocess_groupclause.diffDownload

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index d6a1fb8e..801d9f6d 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2852,15 +2852,13 @@ preprocess_groupclause(PlannerInfo *root, List *force)
 {
 	Query	   *parse = root->parse;
 	List	   *new_groupclause = NIL;
-	ListCell   *sl;
-	ListCell   *gl;
+	ListCell	*gl;
 
 	/* For grouping sets, we need to force the ordering */
-	if (force)
+	if (force != NIL)
 	{
-		foreach(sl, force)
+		foreach_int(ref, force)
 		{
-			Index		ref = lfirst_int(sl);
 			SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
 
 			new_groupclause = lappend(new_groupclause, cl);
@@ -2879,10 +2877,8 @@ preprocess_groupclause(PlannerInfo *root, List *force)
 	 *
 	 * This code assumes that the sortClause contains no duplicate items.
 	 */
-	foreach(sl, parse->sortClause)
+	foreach_node(SortGroupClause, sc, parse->sortClause)
 	{
-		SortGroupClause *sc = lfirst_node(SortGroupClause, sl);
-
 		foreach(gl, parse->groupClause)
 		{
 			SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
@@ -2908,10 +2904,8 @@ preprocess_groupclause(PlannerInfo *root, List *force)
 	 * implemented using incremental sort.  Also, give up if there are any
 	 * non-sortable GROUP BY items, since then there's no hope anyway.
 	 */
-	foreach(gl, parse->groupClause)
+	foreach_node(SortGroupClause,gc, parse->groupClause)
 	{
-		SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
-
 		if (list_member_ptr(new_groupclause, gc))
 			continue;			/* it matched an ORDER BY item */
 		if (!OidIsValid(gc->sortop))	/* give up, GROUP BY can't be sorted */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index af8de421..2ba297c1 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -426,7 +426,7 @@ struct PlannerInfo
 	 * items to be proven redundant, implying that there is only one group
 	 * containing all the query's rows.  Hence, if you want to check whether
 	 * GROUP BY was specified, test for nonempty parse->groupClause, not for
-	 * nonempty processed_groupClause.  Optimiser chooses specific order of
+	 * nonempty processed_groupClause.  Optimizer chooses specific order of
 	 * group-by clauses during the upper paths generation process, attempting
 	 * to use different strategies to minimize number of sorts or engage
 	 * incremental sort.  See preprocess_groupclause() and

#169

Alexander Korotkov

aekorotkov@gmail.com

over 1 year ago

In reply to: jian he (#168)

5 attachment(s)

Re: POC: GROUP BY optimization

Hi!

On Sun, Jun 2, 2024 at 10:55 AM jian he <jian.universality@gmail.com> wrote:

On Fri, May 31, 2024 at 8:12 AM Alexander Korotkov <aekorotkov@gmail.com> wrote:

I've revised some grammar including the sentence you've proposed.
-static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
+static List *preprocess_groupclause(PlannerInfo *root, List *force);
changing preprocess_groupclause the second argument
from "force" to "gset" would be more intuitive, I think.

Probably, but my intention is to restore preprocess_groupclause() as
it was before 0452b461bc with minimal edits to support incremental
sort. I'd rather avoid refactoring if this area for now.

`elog(ERROR, "Order of group-by clauses doesn't correspond incoming
sort order");`

I think this error message makes people wonder what "incoming sort order" is.
BTW, "correspond", generally people use "correspond to".

Thank you. On the second thought, I think it would be better to turn
this into an assertion like the checks before.

I did some minor cosmetic changes, mainly changing foreach to foreach_node.
Please check the attachment.

I would avoid refactoring of preprocess_groupclause() for the reason
described above. But I picked the grammar fix for PlannerInfo's
comment.

------
Regards,
Alexander Korotkov
Supabase

Attachments:

v4-0003-Rename-PathKeyInfo-to-GroupByOrdering.patchapplication/octet-stream; name=v4-0003-Rename-PathKeyInfo-to-GroupByOrdering.patchDownload

From 74734378e209cf37124cb12b7d80717d49d56d42 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Mon, 27 May 2024 14:57:43 +0300
Subject: [PATCH v4 3/5] Rename PathKeyInfo to GroupByOrdering

0452b461bc made optimizer explore alternative orderings of group-by pathkeys.
The PathKeyInfo data structure was used to store the particular ordering of
group-by pathkeys and corresponding clauses.  It turns out that PathKeyInfo
is not the best name for that purpose.  This commit renames this data structure
to GroupByOrdering, and revises its comment.

Discussion: https://postgr.es/m/db0fc3a4-966c-4cec-a136-94024d39212d%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/pathkeys.c | 18 +++++++++---------
 src/backend/optimizer/plan/planner.c  |  8 ++++----
 src/include/nodes/pathnodes.h         | 13 ++++++++++---
 src/tools/pgindent/typedefs.list      |  2 +-
 4 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 1ed79797329..c98486e292c 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -450,7 +450,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 /*
  * pathkeys_are_duplicate
  *		Check if give pathkeys are already contained the list of
- *		PathKeyInfo's.
+ *		GroupByOrdering's.
  */
 static bool
 pathkeys_are_duplicate(List *infos, List *pathkeys)
@@ -459,7 +459,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
 
 	foreach(lc, infos)
 	{
-		PathKeyInfo *info = lfirst_node(PathKeyInfo, lc);
+		GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
 
 		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
 			return true;
@@ -471,7 +471,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
  *
- * Returns a list of PathKeyInfo items, each representing an interesting
+ * Returns a list of GroupByOrdering items, each representing an interesting
  * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
  * matching order.
  *
@@ -486,13 +486,13 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 {
 	Query	   *parse = root->parse;
 	List	   *infos = NIL;
-	PathKeyInfo *info;
+	GroupByOrdering *info;
 
 	List	   *pathkeys = root->group_pathkeys;
 	List	   *clauses = root->processed_groupClause;
 
 	/* always return at least the original pathkeys/clauses */
-	info = makeNode(PathKeyInfo);
+	info = makeNode(GroupByOrdering);
 	info->pathkeys = pathkeys;
 	info->clauses = clauses;
 	infos = lappend(infos, info);
@@ -528,7 +528,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -553,7 +553,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -563,7 +563,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 #ifdef USE_ASSERT_CHECKING
 	{
-		PathKeyInfo *pinfo = linitial_node(PathKeyInfo, infos);
+		GroupByOrdering *pinfo = linitial_node(GroupByOrdering, infos);
 		ListCell   *lc;
 
 		/* Test consistency of info structures */
@@ -572,7 +572,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			ListCell   *lc1,
 					   *lc2;
 
-			info = lfirst_node(PathKeyInfo, lc);
+			info = lfirst_node(GroupByOrdering, lc);
 
 			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
 			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 669903b9059..1eeedc121ed 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6917,7 +6917,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -6998,7 +6998,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				/* process all potentially interesting grouping reorderings */
 				foreach(lc2, pathkey_orderings)
 				{
-					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+					GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 					/* restore the path (we replace it in the loop) */
 					path = path_save;
@@ -7249,7 +7249,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -7305,7 +7305,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 
 				/* restore the path (we replace it in the loop) */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 14ef296ab72..78489398294 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1468,14 +1468,21 @@ typedef struct PathKey
 } PathKey;
 
 /*
- * Combines the information about pathkeys and the associated clauses.
+ * Contains an order of group-by clauses and the corresponding list of
+ * pathkeys.
+ *
+ * The elements of 'clauses' list should have the same order as the head of
+ * 'pathkeys' list.  The tleSortGroupRef of the clause should be equal to
+ * ec_sortref of the pathkey equivalence class.  If there are redundant
+ * clauses with the same tleSortGroupRef, they must be grouped together.
  */
-typedef struct PathKeyInfo
+typedef struct GroupByOrdering
 {
 	NodeTag		type;
+
 	List	   *pathkeys;
 	List	   *clauses;
-} PathKeyInfo;
+} GroupByOrdering;
 
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index d427a1c16a5..4f57078d133 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1056,6 +1056,7 @@ GrantRoleStmt
 GrantStmt
 GrantTargetType
 Group
+GroupByOrdering
 GroupClause
 GroupPath
 GroupPathExtraData
@@ -2067,7 +2068,6 @@ PathClauseUsage
 PathCostComparison
 PathHashStack
 PathKey
-PathKeyInfo
 PathKeysComparison
 PathTarget
 PatternInfo
-- 
2.39.3 (Apple Git-145)

v4-0004-Restore-preprocess_groupclause.patchapplication/octet-stream; name=v4-0004-Restore-preprocess_groupclause.patchDownload

From 087791dc5de8626337b9a7ab6099c8c7794a763c Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:51:31 +0300
Subject: [PATCH v4 4/5] Restore preprocess_groupclause()

0452b461bc made optimizer explore alternative orderings of group-by pathkeys.
It eliminated preprocess_groupclause(), which was intended to match items
between GROUP BY and ORDER BY.  Instead, get_useful_group_keys_orderings()
function generates orderings of GROUP BY elements at the time of grouping
paths generation.  The get_useful_group_keys_orderings() function takes into
account 3 orderings of GROUP BY pathkeys and clauses: original order as written
in GROUP BY, matching ORDER BY clauses as much as possible, and matching the
input path as much as possible.  Given that even before 0452b461b,
preprocess_groupclause() could change the original order of GROUP BY clauses
we don't need to consider it apart from ordering matching ORDER BY clauses.

This commit restores preprocess_groupclause() to provide an ordering of
GROUP BY elements matching ORDER BY before generation of paths.  The new
version of preprocess_groupclause() takes into account an incremental sort.
The get_useful_group_keys_orderings() function now takes into 2 orderings of
GROUP BY elements: the order generated preprocess_groupclause() and the order
matching the input path as much as possible.

Discussion: https://postgr.es/m/CAPpHfdvyWLMGwvxaf%3D7KAp-z-4mxbSH8ti2f6mNOQv5metZFzg%40mail.gmail.com
Author: Alexander Korotkov
---
 src/backend/optimizer/path/pathkeys.c         |  55 +--------
 src/backend/optimizer/plan/planner.c          | 108 +++++++++++++++---
 src/include/nodes/pathnodes.h                 |   6 +-
 .../regress/expected/partition_aggregate.out  |   6 +-
 4 files changed, 108 insertions(+), 67 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index c98486e292c..793c8ad2930 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -447,26 +447,6 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	return n;
 }
 
-/*
- * pathkeys_are_duplicate
- *		Check if give pathkeys are already contained the list of
- *		GroupByOrdering's.
- */
-static bool
-pathkeys_are_duplicate(List *infos, List *pathkeys)
-{
-	ListCell   *lc;
-
-	foreach(lc, infos)
-	{
-		GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
-
-		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
-			return true;
-	}
-	return false;
-}
-
 /*
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
@@ -475,11 +455,11 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
  * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
  * matching order.
  *
- * The function considers (and keeps) multiple GROUP BY orderings:
+ * The function considers (and keeps) following GROUP BY orderings:
  *
- * - the original ordering, as specified by the GROUP BY clause,
- * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
- * - GROUP BY keys to match target ORDER BY clause (as much as possible).
+ * - GROUP BY keys as ordered by preprocess_groupclause() to match target
+ *   ORDER BY clause (as much as possible),
+ * - GROUP BY keys reordered to match 'path' ordering (as much as possible).
  */
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
@@ -526,32 +506,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 		if (n > 0 &&
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
-			!pathkeys_are_duplicate(infos, pathkeys))
-		{
-			info = makeNode(GroupByOrdering);
-			info->pathkeys = pathkeys;
-			info->clauses = clauses;
-
-			infos = lappend(infos, info);
-		}
-	}
-
-	/*
-	 * Try reordering pathkeys to minimize the sort cost (this time consider
-	 * the ORDER BY clause).
-	 */
-	if (root->sort_pathkeys &&
-		!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
-	{
-		int			n;
-
-		n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
-										   &clauses,
-										   root->num_groupby_pathkeys);
-
-		if (n > 0 &&
-			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
-			!pathkeys_are_duplicate(infos, pathkeys))
+			compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL)
 		{
 			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1eeedc121ed..d6a1fb8e7d5 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -137,7 +137,7 @@ static double preprocess_limit(PlannerInfo *root,
 							   double tuple_fraction,
 							   int64 *offset_est, int64 *count_est);
 static void remove_useless_groupby_columns(PlannerInfo *root);
-static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
+static List *preprocess_groupclause(PlannerInfo *root, List *force);
 static List *extract_rollup_sets(List *groupingSets);
 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
 static void standard_qp_callback(PlannerInfo *root, void *extra);
@@ -1422,7 +1422,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 		else if (parse->groupClause)
 		{
 			/* Preprocess regular GROUP BY clause, if any */
-			root->processed_groupClause = list_copy(parse->groupClause);
+			root->processed_groupClause = preprocess_groupclause(root, NIL);
 			/* Remove any redundant GROUP BY columns */
 			remove_useless_groupby_columns(root);
 		}
@@ -2169,7 +2169,7 @@ preprocess_grouping_sets(PlannerInfo *root)
 		 * The groupClauses for hashed grouping sets are built later on.)
 		 */
 		if (gs->set)
-			rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
+			rollup->groupClause = preprocess_groupclause(root, gs->set);
 		else
 			rollup->groupClause = NIL;
 
@@ -2821,24 +2821,106 @@ remove_useless_groupby_columns(PlannerInfo *root)
 }
 
 /*
- * groupclause_apply_groupingset
- *		Apply the order of GROUP BY clauses defined by grouping sets.  Items
- *		not in the grouping set are skipped.
+ * preprocess_groupclause - do preparatory work on GROUP BY clause
+ *
+ * The idea here is to adjust the ordering of the GROUP BY elements
+ * (which in itself is semantically insignificant) to match ORDER BY,
+ * thereby allowing a single sort operation to both implement the ORDER BY
+ * requirement and set up for a Unique step that implements GROUP BY.
+ * We also consider partial match between GROUP BY and ORDER BY elements,
+ * which could allow to implement ORDER BY using the incremental sort.
+ *
+ * We also consider other orderings of the GROUP BY elements, which could
+ * match the sort ordering of other possible plans (eg an indexscan) and
+ * thereby reduce cost.  This is implemented during the generation of grouping
+ * paths.  See get_useful_group_keys_orderings() for details.
+ *
+ * Note: we need no comparable processing of the distinctClause because
+ * the parser already enforced that that matches ORDER BY.
+ *
+ * Note: we return a fresh List, but its elements are the same
+ * SortGroupClauses appearing in parse->groupClause.  This is important
+ * because later processing may modify the processed_groupClause list.
+ *
+ * For grouping sets, the order of items is instead forced to agree with that
+ * of the grouping set (and items not in the grouping set are skipped). The
+ * work of sorting the order of grouping set elements to match the ORDER BY if
+ * possible is done elsewhere.
  */
 static List *
-groupclause_apply_groupingset(PlannerInfo *root, List *gset)
+preprocess_groupclause(PlannerInfo *root, List *force)
 {
 	Query	   *parse = root->parse;
 	List	   *new_groupclause = NIL;
 	ListCell   *sl;
+	ListCell   *gl;
 
-	foreach(sl, gset)
+	/* For grouping sets, we need to force the ordering */
+	if (force)
 	{
-		Index		ref = lfirst_int(sl);
-		SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
+		foreach(sl, force)
+		{
+			Index		ref = lfirst_int(sl);
+			SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
+
+			new_groupclause = lappend(new_groupclause, cl);
+		}
 
-		new_groupclause = lappend(new_groupclause, cl);
+		return new_groupclause;
 	}
+
+	/* If no ORDER BY, nothing useful to do here */
+	if (parse->sortClause == NIL)
+		return list_copy(parse->groupClause);
+
+	/*
+	 * Scan the ORDER BY clause and construct a list of matching GROUP BY
+	 * items, but only as far as we can make a matching prefix.
+	 *
+	 * This code assumes that the sortClause contains no duplicate items.
+	 */
+	foreach(sl, parse->sortClause)
+	{
+		SortGroupClause *sc = lfirst_node(SortGroupClause, sl);
+
+		foreach(gl, parse->groupClause)
+		{
+			SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
+
+			if (equal(gc, sc))
+			{
+				new_groupclause = lappend(new_groupclause, gc);
+				break;
+			}
+		}
+		if (gl == NULL)
+			break;				/* no match, so stop scanning */
+	}
+
+
+	/* If no match at all, no point in reordering GROUP BY */
+	if (new_groupclause == NIL)
+		return list_copy(parse->groupClause);
+
+	/*
+	 * Add any remaining GROUP BY items to the new list.  We don't require a
+	 * complete match, because even partial match allows ORDER BY to be
+	 * implemented using incremental sort.  Also, give up if there are any
+	 * non-sortable GROUP BY items, since then there's no hope anyway.
+	 */
+	foreach(gl, parse->groupClause)
+	{
+		SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
+
+		if (list_member_ptr(new_groupclause, gc))
+			continue;			/* it matched an ORDER BY item */
+		if (!OidIsValid(gc->sortop))	/* give up, GROUP BY can't be sorted */
+			return list_copy(parse->groupClause);
+		new_groupclause = lappend(new_groupclause, gc);
+	}
+
+	/* Success --- install the rearranged GROUP BY list */
+	Assert(list_length(parse->groupClause) == list_length(new_groupclause));
 	return new_groupclause;
 }
 
@@ -4170,7 +4252,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 			{
 				rollup = makeNode(RollupData);
 
-				rollup->groupClause = groupclause_apply_groupingset(root, gset);
+				rollup->groupClause = preprocess_groupclause(root, gset);
 				rollup->gsets_data = list_make1(gs);
 				rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
 														 rollup->gsets_data,
@@ -4359,7 +4441,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 
 			Assert(gs->set != NIL);
 
-			rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
+			rollup->groupClause = preprocess_groupclause(root, gs->set);
 			rollup->gsets_data = list_make1(gs);
 			rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
 													 rollup->gsets_data,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 78489398294..2ba297c1172 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -426,7 +426,11 @@ struct PlannerInfo
 	 * items to be proven redundant, implying that there is only one group
 	 * containing all the query's rows.  Hence, if you want to check whether
 	 * GROUP BY was specified, test for nonempty parse->groupClause, not for
-	 * nonempty processed_groupClause.
+	 * nonempty processed_groupClause.  Optimizer chooses specific order of
+	 * group-by clauses during the upper paths generation process, attempting
+	 * to use different strategies to minimize number of sorts or engage
+	 * incremental sort.  See preprocess_groupclause() and
+	 * get_useful_group_keys_orderings() for details.
 	 *
 	 * Currently, when grouping sets are specified we do not attempt to
 	 * optimize the groupClause, so that processed_groupClause will be
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index 1b900fddf8e..5f2c0cf5786 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -901,15 +901,15 @@ SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAV
    Sort Key: pagg_tab_m.a, pagg_tab_m.c, (sum(pagg_tab_m.b))
    ->  Append
          ->  HashAggregate
-               Group Key: ((pagg_tab_m.a + pagg_tab_m.b) / 2), pagg_tab_m.c, pagg_tab_m.a
+               Group Key: pagg_tab_m.a, pagg_tab_m.c, ((pagg_tab_m.a + pagg_tab_m.b) / 2)
                Filter: ((sum(pagg_tab_m.b) = 50) AND (avg(pagg_tab_m.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p1 pagg_tab_m
          ->  HashAggregate
-               Group Key: ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2), pagg_tab_m_1.c, pagg_tab_m_1.a
+               Group Key: pagg_tab_m_1.a, pagg_tab_m_1.c, ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2)
                Filter: ((sum(pagg_tab_m_1.b) = 50) AND (avg(pagg_tab_m_1.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p2 pagg_tab_m_1
          ->  HashAggregate
-               Group Key: ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2), pagg_tab_m_2.c, pagg_tab_m_2.a
+               Group Key: pagg_tab_m_2.a, pagg_tab_m_2.c, ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2)
                Filter: ((sum(pagg_tab_m_2.b) = 50) AND (avg(pagg_tab_m_2.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p3 pagg_tab_m_2
 (15 rows)
-- 
2.39.3 (Apple Git-145)

v4-0005-Teach-group_keys_reorder_by_pathkeys-about-redund.patchapplication/octet-stream; name=v4-0005-Teach-group_keys_reorder_by_pathkeys-about-redund.patchDownload

From 0e33f7c66682401180e037ec0fd8564401241c0a Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:13:11 +0300
Subject: [PATCH v4 5/5] Teach group_keys_reorder_by_pathkeys() about redundant
 SortGroupClause's

We don't have strict guarantees that there shouldn't be redundant
SortGroupClause's.  Although there is no confirmed way to generate
duplicate SortGroupClause's, this commit teaches
group_keys_reorder_by_pathkeys() to pull all the SortGroupClause matching to
pathkey at once.

Discussion: https://postgr.es/m/a663f0f6-cbf6-49aa-af2e-234dc6768a07%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/pathkeys.c | 41 ++++++++++++++++++---------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 793c8ad2930..0b7e9cd48db 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -401,7 +401,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	foreach(lc, pathkeys)
 	{
 		PathKey    *pathkey = (PathKey *) lfirst(lc);
-		SortGroupClause *sgc;
+		bool		found = false;
 
 		/*
 		 * Pathkeys are built in a way that allows simply comparing pointers.
@@ -416,28 +416,40 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 		/*
 		 * Since 1349d27 pathkey coming from underlying node can be in the
 		 * root->group_pathkeys but not in the processed_groupClause. So, we
-		 * should be careful here.
+		 * should be careful here.  Also, there could be redundant
+		 * SortGroupClause's.  So, we need to pull of all the matching
+		 * SortGroupClause's, but ensure there is at least one.
 		 */
-		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
-											*group_clauses);
-		if (!sgc)
-			/* The grouping clause does not cover this pathkey */
-			break;
+		foreach_ptr(SortGroupClause, sgc, *group_clauses)
+		{
+			if (sgc->tleSortGroupRef == pathkey->pk_eclass->ec_sortref)
+			{
+				/*
+				 * Sort group clause should have an ordering operator as long
+				 * as there is an associated pathkey.
+				 */
+				Assert(OidIsValid(sgc->sortop));
 
-		/*
-		 * Sort group clause should have an ordering operator as long as there
-		 * is an associated pathkey.
-		 */
-		Assert(OidIsValid(sgc->sortop));
+				new_group_clauses = lappend(new_group_clauses, sgc);
+				found = true;
+			}
+		}
+
+		/* Check if the grouping clause does not cover this pathkey */
+		if (!found)
+			break;
 
 		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
-		new_group_clauses = lappend(new_group_clauses, sgc);
+
 	}
 
 	/* remember the number of pathkeys with a matching GROUP BY key */
 	n = list_length(new_group_pathkeys);
 
-	/* append the remaining group pathkeys (will be treated as not sorted) */
+	/*
+	 * Append the remaining group pathkeys (will be treated as not sorted) and
+	 * grouping clauses.
+	 */
 	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
 											 *group_pathkeys);
 	*group_clauses = list_concat_unique_ptr(new_group_clauses,
@@ -544,6 +556,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 		}
 	}
 #endif
+
 	return infos;
 }
 
-- 
2.39.3 (Apple Git-145)

v4-0001-Fix-asymmetry-in-setting-EquivalenceClass.ec_sort.patchapplication/octet-stream; name=v4-0001-Fix-asymmetry-in-setting-EquivalenceClass.ec_sort.patchDownload

From eef9b88438eb4c3cc40d98f4367a5c6b4ec4a920 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:10:34 +0300
Subject: [PATCH v4 1/5] Fix asymmetry in setting EquivalenceClass.ec_sortref

0452b461bc made get_eclass_for_sort_expr() always set
EquivalenceClass.ec_sortref if it's not done yet.  This leads to an asymmetric
situation when whoever first looks for the EquivalenceClass sets the
ec_sortref.  It is also counterintuitive that get_eclass_for_sort_expr()
performs modification of data structures.

This commit makes make_pathkeys_for_sortclauses_extended() responsible for
setting EquivalenceClass.ec_sortref.  Now we set the
EquivalenceClass.ec_sortref's needed to explore alternative GROUP BY ordering
specifically during building pathkeys by the list of grouping clauses.

Discussion: https://postgr.es/m/17037754-f187-4138-8285-0e2bfebd0dea%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/equivclass.c  | 13 +------
 src/backend/optimizer/path/pathkeys.c    | 18 ++++++++-
 src/backend/optimizer/plan/planner.c     | 16 ++++++--
 src/include/optimizer/paths.h            |  3 +-
 src/test/regress/expected/aggregates.out | 47 ++++++++++++++++++++++++
 src/test/regress/sql/aggregates.sql      | 14 +++++++
 6 files changed, 92 insertions(+), 19 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 21ce1ae2e13..51d806326eb 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,18 +652,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-			{
-				/*
-				 * Match!
-				 *
-				 * Copy the sortref if it wasn't set yet.  That may happen if
-				 * the ec was constructed from a WHERE clause, i.e. it doesn't
-				 * have a target reference at all.
-				 */
-				if (cur_ec->ec_sortref == 0 && sortref > 0)
-					cur_ec->ec_sortref = sortref;
-				return cur_ec;
-			}
+				return cur_ec;	/* Match! */
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 8b258cbef92..837e612121f 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1355,7 +1355,8 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 													&sortclauses,
 													tlist,
 													false,
-													&sortable);
+													&sortable,
+													false);
 	/* It's caller error if not all clauses were sortable */
 	Assert(sortable);
 	return result;
@@ -1379,13 +1380,17 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
  * to remove any clauses that can be proven redundant via the eclass logic.
  * Even though we'll have to hash in that case, we might as well not hash
  * redundant columns.)
+ *
+ * If set_ec_sortref is true then sets the uninitialized value of ec_sortref
+ * of the pathkey's EquivalenceClass.
  */
 List *
 make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 									   List **sortclauses,
 									   List *tlist,
 									   bool remove_redundant,
-									   bool *sortable)
+									   bool *sortable,
+									   bool set_ec_sortref)
 {
 	List	   *pathkeys = NIL;
 	ListCell   *l;
@@ -1409,6 +1414,15 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 										   sortcl->nulls_first,
 										   sortcl->tleSortGroupRef,
 										   true);
+		if (pathkey->pk_eclass->ec_sortref == 0 && set_ec_sortref)
+		{
+			/*
+			 * Copy the sortref if it wasn't set yet.  That may happen if the
+			 * EquivalenceClass was constructed from a WHERE clause, i.e. it
+			 * doesn't have a target reference at all.
+			 */
+			pathkey->pk_eclass->ec_sortref = sortcl->tleSortGroupRef;
+		}
 
 		/* Canonical form eliminates redundant ordering keys */
 		if (!pathkey_is_redundant(pathkey, pathkeys))
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 032818423f6..669903b9059 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3395,12 +3395,17 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 		 */
 		bool		sortable;
 
+		/*
+		 * Covert group clauses into pathkeys.  Set the ec_sortref field of
+		 * EquivalenceClass'es if it's not set yet.
+		 */
 		root->group_pathkeys =
 			make_pathkeys_for_sortclauses_extended(root,
 												   &root->processed_groupClause,
 												   tlist,
 												   true,
-												   &sortable);
+												   &sortable,
+												   true);
 		if (!sortable)
 		{
 			/* Can't sort; no point in considering aggregate ordering either */
@@ -3450,7 +3455,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &root->processed_distinctClause,
 												   tlist,
 												   true,
-												   &sortable);
+												   &sortable,
+												   false);
 		if (!sortable)
 			root->distinct_pathkeys = NIL;
 	}
@@ -3476,7 +3482,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &groupClauses,
 												   tlist,
 												   false,
-												   &sortable);
+												   &sortable,
+												   false);
 		if (!sortable)
 			root->setop_pathkeys = NIL;
 	}
@@ -6061,7 +6068,8 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
 																 &wc->partitionClause,
 																 tlist,
 																 true,
-																 &sortable);
+																 &sortable,
+																 false);
 
 		Assert(sortable);
 	}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 914d9bdef58..5e88c0224a4 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -239,7 +239,8 @@ extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 													List **sortclauses,
 													List *tlist,
 													bool remove_redundant,
-													bool *sortable);
+													bool *sortable,
+													bool set_ec_sortref);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
 											RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 2442342e9d1..1c1ca7573ad 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2917,6 +2917,53 @@ GROUP BY c1.w, c1.z;
  5.0000000000000000
 (2 rows)
 
+-- Pathkeys, built in a subtree, can be used to optimize GROUP-BY clause
+-- ordering.  Also, here we check that it doesn't depend on the initial clause
+-- order in the GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c1.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c1.x, c1.y
+         Presorted Key: c1.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c2.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c2.x, c1.y
+         Presorted Key: c2.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 61a3424c845..1a18ca3d8fe 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1263,6 +1263,20 @@ SELECT avg(c1.f ORDER BY c1.x, c1.y)
 FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
 GROUP BY c1.w, c1.z;
 
+-- Pathkeys, built in a subtree, can be used to optimize GROUP-BY clause
+-- ordering.  Also, here we check that it doesn't depend on the initial clause
+-- order in the GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
-- 
2.39.3 (Apple Git-145)

v4-0002-Add-invariants-check-to-get_useful_group_keys_ord.patchapplication/octet-stream; name=v4-0002-Add-invariants-check-to-get_useful_group_keys_ord.patchDownload

From 61cd7d2c4439a5971a9f00930b08011f7e881814 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 30 May 2024 22:38:53 +0300
Subject: [PATCH v4 2/5] Add invariants check to
 get_useful_group_keys_orderings()

This commit introduces invariants checking of generated orderings
in get_useful_group_keys_orderings for assert-enabled builds.

Discussion: https://postgr.es/m/a663f0f6-cbf6-49aa-af2e-234dc6768a07%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/pathkeys.c | 28 +++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 837e612121f..1ed79797329 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -561,6 +561,34 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 		}
 	}
 
+#ifdef USE_ASSERT_CHECKING
+	{
+		PathKeyInfo *pinfo = linitial_node(PathKeyInfo, infos);
+		ListCell   *lc;
+
+		/* Test consistency of info structures */
+		for_each_from(lc, infos, 1)
+		{
+			ListCell   *lc1,
+					   *lc2;
+
+			info = lfirst_node(PathKeyInfo, lc);
+
+			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
+			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
+			Assert(list_difference(info->clauses, pinfo->clauses) == NIL);
+			Assert(list_difference_ptr(info->pathkeys, pinfo->pathkeys) == NIL);
+
+			forboth(lc1, info->clauses, lc2, info->pathkeys)
+			{
+				SortGroupClause *sgc = lfirst_node(SortGroupClause, lc1);
+				PathKey    *pk = lfirst_node(PathKey, lc2);
+
+				Assert(pk->pk_eclass->ec_sortref == sgc->tleSortGroupRef);
+			}
+		}
+	}
+#endif
 	return infos;
 }
 
-- 
2.39.3 (Apple Git-145)

#170

Pavel Borisov

pashkin.elfe@gmail.com

over 1 year ago

In reply to: Alexander Korotkov (#169)

Re: POC: GROUP BY optimization

Hi, Alexander!

On Sun, 2 Jun 2024 at 17:18, Alexander Korotkov <aekorotkov@gmail.com>
wrote:

Hi!

On Sun, Jun 2, 2024 at 10:55 AM jian he <jian.universality@gmail.com>
wrote:

On Fri, May 31, 2024 at 8:12 AM Alexander Korotkov <aekorotkov@gmail.com>

wrote:

I've revised some grammar including the sentence you've proposed.

-static List *groupclause_apply_groupingset(PlannerInfo *root, List

*force);

+static List *preprocess_groupclause(PlannerInfo *root, List *force);

changing preprocess_groupclause the second argument
from "force" to "gset" would be more intuitive, I think.

Probably, but my intention is to restore preprocess_groupclause() as
it was before 0452b461bc with minimal edits to support incremental
sort. I'd rather avoid refactoring if this area for now.

`elog(ERROR, "Order of group-by clauses doesn't correspond incoming
sort order");`

I think this error message makes people wonder what "incoming sort

order" is.

BTW, "correspond", generally people use "correspond to".

Thank you. On the second thought, I think it would be better to turn
this into an assertion like the checks before.

I did some minor cosmetic changes, mainly changing foreach to

foreach_node.

Please check the attachment.

I would avoid refactoring of preprocess_groupclause() for the reason
described above. But I picked the grammar fix for PlannerInfo's
comment.

Thank you for working on this patchset.

0001: Patch looks right

Comments:

Covert -> Convert
sets the uninitialized value of ec_sortref of the pathkey's
EquivalenceClass -> sets the value of the pathkey's EquivalenceClass unless
it's already initialized
wasn't set yet -> hasn't been set yet

0002: additional assert checking only. Looks right.

0003: pure renaming, looks good.

0004: Restores pre 0452b461bc state to preprocess_groupclause with removed
partial_match fallback. Looks right. I haven't checked the comments
provided they are restored from pre 0452b461bc state.

0005: Looks right.

Regards,
Pavel Borisov
Supabase

#171

Alexander Korotkov

aekorotkov@gmail.com

over 1 year ago

In reply to: Pavel Borisov (#170)

5 attachment(s)

Re: POC: GROUP BY optimization

On Mon, Jun 3, 2024 at 6:55 PM Pavel Borisov <pashkin.elfe@gmail.com> wrote:

On Sun, 2 Jun 2024 at 17:18, Alexander Korotkov <aekorotkov@gmail.com> wrote:
Hi!

On Sun, Jun 2, 2024 at 10:55 AM jian he <jian.universality@gmail.com> wrote:
On Fri, May 31, 2024 at 8:12 AM Alexander Korotkov <aekorotkov@gmail.com> wrote:

I've revised some grammar including the sentence you've proposed.
-static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
+static List *preprocess_groupclause(PlannerInfo *root, List *force);
changing preprocess_groupclause the second argument
from "force" to "gset" would be more intuitive, I think.
Probably, but my intention is to restore preprocess_groupclause() as
it was before 0452b461bc with minimal edits to support incremental
sort. I'd rather avoid refactoring if this area for now.

`elog(ERROR, "Order of group-by clauses doesn't correspond incoming
sort order");`

I think this error message makes people wonder what "incoming sort order" is.
BTW, "correspond", generally people use "correspond to".

Thank you. On the second thought, I think it would be better to turn
this into an assertion like the checks before.

I did some minor cosmetic changes, mainly changing foreach to foreach_node.
Please check the attachment.

I would avoid refactoring of preprocess_groupclause() for the reason
described above. But I picked the grammar fix for PlannerInfo's
comment.
Thank you for working on this patchset.

0001: Patch looks right

Comments:

Covert -> Convert
sets the uninitialized value of ec_sortref of the pathkey's EquivalenceClass -> sets the value of the pathkey's EquivalenceClass unless it's already initialized
wasn't set yet -> hasn't been set yet

0002: additional assert checking only. Looks right.

0003: pure renaming, looks good.

0004: Restores pre 0452b461bc state to preprocess_groupclause with removed partial_match fallback. Looks right. I haven't checked the comments provided they are restored from pre 0452b461bc state.

0005: Looks right.

Thank you. Revised according to your comments. I think 0001-0004
comprising a good refactoring addressing concerns from Tom [1].
0001 Removes from get_eclass_for_sort_expr() unnatural responsibility
of setting EquivalenceClass.ec_sortref. Instead this field is set in
make_pathkeys_for_sortclauses_extended() while processing of group
clauses.
0002 Provides additional asserts. That should helping to defend
against lurking bugs.
0003 Fixes unsuitable name of data structure.
0004 Restores pre 0452b461bc state to preprocess_groupclause() and
lowers the number of paths to consider.
It would be good to explicitly hear Tom about this. But I'm quite
sure these patches makes situation better not worse. I'm going to
push them if no objections.

I'm putting 0005 aside. It's unclear why and how there could be
duplicate SortGroupClauses at that stage. Also, it's unclear why
existing code handles them bad. So, this should wait a comment from
Tom.

Links.
1. /messages/by-id/266850.1712879082@sss.pgh.pa.us

------
Regards,
Alexander Korotkov
Supabase

Attachments:

v5-0001-Fix-asymmetry-in-setting-EquivalenceClass.ec_sort.patchapplication/octet-stream; name=v5-0001-Fix-asymmetry-in-setting-EquivalenceClass.ec_sort.patchDownload

From eef9b88438eb4c3cc40d98f4367a5c6b4ec4a920 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:10:34 +0300
Subject: [PATCH v5 1/5] Fix asymmetry in setting EquivalenceClass.ec_sortref

0452b461bc made get_eclass_for_sort_expr() always set
EquivalenceClass.ec_sortref if it's not done yet.  This leads to an asymmetric
situation when whoever first looks for the EquivalenceClass sets the
ec_sortref.  It is also counterintuitive that get_eclass_for_sort_expr()
performs modification of data structures.

This commit makes make_pathkeys_for_sortclauses_extended() responsible for
setting EquivalenceClass.ec_sortref.  Now we set the
EquivalenceClass.ec_sortref's needed to explore alternative GROUP BY ordering
specifically during building pathkeys by the list of grouping clauses.

Discussion: https://postgr.es/m/17037754-f187-4138-8285-0e2bfebd0dea%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/equivclass.c  | 13 +------
 src/backend/optimizer/path/pathkeys.c    | 18 ++++++++-
 src/backend/optimizer/plan/planner.c     | 16 ++++++--
 src/include/optimizer/paths.h            |  3 +-
 src/test/regress/expected/aggregates.out | 47 ++++++++++++++++++++++++
 src/test/regress/sql/aggregates.sql      | 14 +++++++
 6 files changed, 92 insertions(+), 19 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 21ce1ae2e13..51d806326eb 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,18 +652,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-			{
-				/*
-				 * Match!
-				 *
-				 * Copy the sortref if it wasn't set yet.  That may happen if
-				 * the ec was constructed from a WHERE clause, i.e. it doesn't
-				 * have a target reference at all.
-				 */
-				if (cur_ec->ec_sortref == 0 && sortref > 0)
-					cur_ec->ec_sortref = sortref;
-				return cur_ec;
-			}
+				return cur_ec;	/* Match! */
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 8b258cbef92..837e612121f 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1355,7 +1355,8 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 													&sortclauses,
 													tlist,
 													false,
-													&sortable);
+													&sortable,
+													false);
 	/* It's caller error if not all clauses were sortable */
 	Assert(sortable);
 	return result;
@@ -1379,13 +1380,17 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
  * to remove any clauses that can be proven redundant via the eclass logic.
  * Even though we'll have to hash in that case, we might as well not hash
  * redundant columns.)
+ *
+ * If set_ec_sortref is true then sets the uninitialized value of ec_sortref
+ * of the pathkey's EquivalenceClass.
  */
 List *
 make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 									   List **sortclauses,
 									   List *tlist,
 									   bool remove_redundant,
-									   bool *sortable)
+									   bool *sortable,
+									   bool set_ec_sortref)
 {
 	List	   *pathkeys = NIL;
 	ListCell   *l;
@@ -1409,6 +1414,15 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 										   sortcl->nulls_first,
 										   sortcl->tleSortGroupRef,
 										   true);
+		if (pathkey->pk_eclass->ec_sortref == 0 && set_ec_sortref)
+		{
+			/*
+			 * Copy the sortref if it wasn't set yet.  That may happen if the
+			 * EquivalenceClass was constructed from a WHERE clause, i.e. it
+			 * doesn't have a target reference at all.
+			 */
+			pathkey->pk_eclass->ec_sortref = sortcl->tleSortGroupRef;
+		}
 
 		/* Canonical form eliminates redundant ordering keys */
 		if (!pathkey_is_redundant(pathkey, pathkeys))
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 032818423f6..669903b9059 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3395,12 +3395,17 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 		 */
 		bool		sortable;
 
+		/*
+		 * Covert group clauses into pathkeys.  Set the ec_sortref field of
+		 * EquivalenceClass'es if it's not set yet.
+		 */
 		root->group_pathkeys =
 			make_pathkeys_for_sortclauses_extended(root,
 												   &root->processed_groupClause,
 												   tlist,
 												   true,
-												   &sortable);
+												   &sortable,
+												   true);
 		if (!sortable)
 		{
 			/* Can't sort; no point in considering aggregate ordering either */
@@ -3450,7 +3455,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &root->processed_distinctClause,
 												   tlist,
 												   true,
-												   &sortable);
+												   &sortable,
+												   false);
 		if (!sortable)
 			root->distinct_pathkeys = NIL;
 	}
@@ -3476,7 +3482,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &groupClauses,
 												   tlist,
 												   false,
-												   &sortable);
+												   &sortable,
+												   false);
 		if (!sortable)
 			root->setop_pathkeys = NIL;
 	}
@@ -6061,7 +6068,8 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
 																 &wc->partitionClause,
 																 tlist,
 																 true,
-																 &sortable);
+																 &sortable,
+																 false);
 
 		Assert(sortable);
 	}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 914d9bdef58..5e88c0224a4 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -239,7 +239,8 @@ extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 													List **sortclauses,
 													List *tlist,
 													bool remove_redundant,
-													bool *sortable);
+													bool *sortable,
+													bool set_ec_sortref);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
 											RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 2442342e9d1..1c1ca7573ad 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2917,6 +2917,53 @@ GROUP BY c1.w, c1.z;
  5.0000000000000000
 (2 rows)
 
+-- Pathkeys, built in a subtree, can be used to optimize GROUP-BY clause
+-- ordering.  Also, here we check that it doesn't depend on the initial clause
+-- order in the GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c1.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c1.x, c1.y
+         Presorted Key: c1.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c2.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c2.x, c1.y
+         Presorted Key: c2.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 61a3424c845..1a18ca3d8fe 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1263,6 +1263,20 @@ SELECT avg(c1.f ORDER BY c1.x, c1.y)
 FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
 GROUP BY c1.w, c1.z;
 
+-- Pathkeys, built in a subtree, can be used to optimize GROUP-BY clause
+-- ordering.  Also, here we check that it doesn't depend on the initial clause
+-- order in the GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
-- 
2.39.3 (Apple Git-145)

v5-0002-Add-invariants-check-to-get_useful_group_keys_ord.patchapplication/octet-stream; name=v5-0002-Add-invariants-check-to-get_useful_group_keys_ord.patchDownload

From 61cd7d2c4439a5971a9f00930b08011f7e881814 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 30 May 2024 22:38:53 +0300
Subject: [PATCH v5 2/5] Add invariants check to
 get_useful_group_keys_orderings()

This commit introduces invariants checking of generated orderings
in get_useful_group_keys_orderings for assert-enabled builds.

Discussion: https://postgr.es/m/a663f0f6-cbf6-49aa-af2e-234dc6768a07%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/pathkeys.c | 28 +++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 837e612121f..1ed79797329 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -561,6 +561,34 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 		}
 	}
 
+#ifdef USE_ASSERT_CHECKING
+	{
+		PathKeyInfo *pinfo = linitial_node(PathKeyInfo, infos);
+		ListCell   *lc;
+
+		/* Test consistency of info structures */
+		for_each_from(lc, infos, 1)
+		{
+			ListCell   *lc1,
+					   *lc2;
+
+			info = lfirst_node(PathKeyInfo, lc);
+
+			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
+			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
+			Assert(list_difference(info->clauses, pinfo->clauses) == NIL);
+			Assert(list_difference_ptr(info->pathkeys, pinfo->pathkeys) == NIL);
+
+			forboth(lc1, info->clauses, lc2, info->pathkeys)
+			{
+				SortGroupClause *sgc = lfirst_node(SortGroupClause, lc1);
+				PathKey    *pk = lfirst_node(PathKey, lc2);
+
+				Assert(pk->pk_eclass->ec_sortref == sgc->tleSortGroupRef);
+			}
+		}
+	}
+#endif
 	return infos;
 }
 
-- 
2.39.3 (Apple Git-145)

v5-0004-Restore-preprocess_groupclause.patchapplication/octet-stream; name=v5-0004-Restore-preprocess_groupclause.patchDownload

From 087791dc5de8626337b9a7ab6099c8c7794a763c Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:51:31 +0300
Subject: [PATCH v5 4/5] Restore preprocess_groupclause()

0452b461bc made optimizer explore alternative orderings of group-by pathkeys.
It eliminated preprocess_groupclause(), which was intended to match items
between GROUP BY and ORDER BY.  Instead, get_useful_group_keys_orderings()
function generates orderings of GROUP BY elements at the time of grouping
paths generation.  The get_useful_group_keys_orderings() function takes into
account 3 orderings of GROUP BY pathkeys and clauses: original order as written
in GROUP BY, matching ORDER BY clauses as much as possible, and matching the
input path as much as possible.  Given that even before 0452b461b,
preprocess_groupclause() could change the original order of GROUP BY clauses
we don't need to consider it apart from ordering matching ORDER BY clauses.

This commit restores preprocess_groupclause() to provide an ordering of
GROUP BY elements matching ORDER BY before generation of paths.  The new
version of preprocess_groupclause() takes into account an incremental sort.
The get_useful_group_keys_orderings() function now takes into 2 orderings of
GROUP BY elements: the order generated preprocess_groupclause() and the order
matching the input path as much as possible.

Discussion: https://postgr.es/m/CAPpHfdvyWLMGwvxaf%3D7KAp-z-4mxbSH8ti2f6mNOQv5metZFzg%40mail.gmail.com
Author: Alexander Korotkov
---
 src/backend/optimizer/path/pathkeys.c         |  55 +--------
 src/backend/optimizer/plan/planner.c          | 108 +++++++++++++++---
 src/include/nodes/pathnodes.h                 |   6 +-
 .../regress/expected/partition_aggregate.out  |   6 +-
 4 files changed, 108 insertions(+), 67 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index c98486e292c..793c8ad2930 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -447,26 +447,6 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	return n;
 }
 
-/*
- * pathkeys_are_duplicate
- *		Check if give pathkeys are already contained the list of
- *		GroupByOrdering's.
- */
-static bool
-pathkeys_are_duplicate(List *infos, List *pathkeys)
-{
-	ListCell   *lc;
-
-	foreach(lc, infos)
-	{
-		GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
-
-		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
-			return true;
-	}
-	return false;
-}
-
 /*
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
@@ -475,11 +455,11 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
  * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
  * matching order.
  *
- * The function considers (and keeps) multiple GROUP BY orderings:
+ * The function considers (and keeps) following GROUP BY orderings:
  *
- * - the original ordering, as specified by the GROUP BY clause,
- * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
- * - GROUP BY keys to match target ORDER BY clause (as much as possible).
+ * - GROUP BY keys as ordered by preprocess_groupclause() to match target
+ *   ORDER BY clause (as much as possible),
+ * - GROUP BY keys reordered to match 'path' ordering (as much as possible).
  */
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
@@ -526,32 +506,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 		if (n > 0 &&
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
-			!pathkeys_are_duplicate(infos, pathkeys))
-		{
-			info = makeNode(GroupByOrdering);
-			info->pathkeys = pathkeys;
-			info->clauses = clauses;
-
-			infos = lappend(infos, info);
-		}
-	}
-
-	/*
-	 * Try reordering pathkeys to minimize the sort cost (this time consider
-	 * the ORDER BY clause).
-	 */
-	if (root->sort_pathkeys &&
-		!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
-	{
-		int			n;
-
-		n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
-										   &clauses,
-										   root->num_groupby_pathkeys);
-
-		if (n > 0 &&
-			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
-			!pathkeys_are_duplicate(infos, pathkeys))
+			compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL)
 		{
 			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1eeedc121ed..d6a1fb8e7d5 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -137,7 +137,7 @@ static double preprocess_limit(PlannerInfo *root,
 							   double tuple_fraction,
 							   int64 *offset_est, int64 *count_est);
 static void remove_useless_groupby_columns(PlannerInfo *root);
-static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
+static List *preprocess_groupclause(PlannerInfo *root, List *force);
 static List *extract_rollup_sets(List *groupingSets);
 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
 static void standard_qp_callback(PlannerInfo *root, void *extra);
@@ -1422,7 +1422,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 		else if (parse->groupClause)
 		{
 			/* Preprocess regular GROUP BY clause, if any */
-			root->processed_groupClause = list_copy(parse->groupClause);
+			root->processed_groupClause = preprocess_groupclause(root, NIL);
 			/* Remove any redundant GROUP BY columns */
 			remove_useless_groupby_columns(root);
 		}
@@ -2169,7 +2169,7 @@ preprocess_grouping_sets(PlannerInfo *root)
 		 * The groupClauses for hashed grouping sets are built later on.)
 		 */
 		if (gs->set)
-			rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
+			rollup->groupClause = preprocess_groupclause(root, gs->set);
 		else
 			rollup->groupClause = NIL;
 
@@ -2821,24 +2821,106 @@ remove_useless_groupby_columns(PlannerInfo *root)
 }
 
 /*
- * groupclause_apply_groupingset
- *		Apply the order of GROUP BY clauses defined by grouping sets.  Items
- *		not in the grouping set are skipped.
+ * preprocess_groupclause - do preparatory work on GROUP BY clause
+ *
+ * The idea here is to adjust the ordering of the GROUP BY elements
+ * (which in itself is semantically insignificant) to match ORDER BY,
+ * thereby allowing a single sort operation to both implement the ORDER BY
+ * requirement and set up for a Unique step that implements GROUP BY.
+ * We also consider partial match between GROUP BY and ORDER BY elements,
+ * which could allow to implement ORDER BY using the incremental sort.
+ *
+ * We also consider other orderings of the GROUP BY elements, which could
+ * match the sort ordering of other possible plans (eg an indexscan) and
+ * thereby reduce cost.  This is implemented during the generation of grouping
+ * paths.  See get_useful_group_keys_orderings() for details.
+ *
+ * Note: we need no comparable processing of the distinctClause because
+ * the parser already enforced that that matches ORDER BY.
+ *
+ * Note: we return a fresh List, but its elements are the same
+ * SortGroupClauses appearing in parse->groupClause.  This is important
+ * because later processing may modify the processed_groupClause list.
+ *
+ * For grouping sets, the order of items is instead forced to agree with that
+ * of the grouping set (and items not in the grouping set are skipped). The
+ * work of sorting the order of grouping set elements to match the ORDER BY if
+ * possible is done elsewhere.
  */
 static List *
-groupclause_apply_groupingset(PlannerInfo *root, List *gset)
+preprocess_groupclause(PlannerInfo *root, List *force)
 {
 	Query	   *parse = root->parse;
 	List	   *new_groupclause = NIL;
 	ListCell   *sl;
+	ListCell   *gl;
 
-	foreach(sl, gset)
+	/* For grouping sets, we need to force the ordering */
+	if (force)
 	{
-		Index		ref = lfirst_int(sl);
-		SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
+		foreach(sl, force)
+		{
+			Index		ref = lfirst_int(sl);
+			SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
+
+			new_groupclause = lappend(new_groupclause, cl);
+		}
 
-		new_groupclause = lappend(new_groupclause, cl);
+		return new_groupclause;
 	}
+
+	/* If no ORDER BY, nothing useful to do here */
+	if (parse->sortClause == NIL)
+		return list_copy(parse->groupClause);
+
+	/*
+	 * Scan the ORDER BY clause and construct a list of matching GROUP BY
+	 * items, but only as far as we can make a matching prefix.
+	 *
+	 * This code assumes that the sortClause contains no duplicate items.
+	 */
+	foreach(sl, parse->sortClause)
+	{
+		SortGroupClause *sc = lfirst_node(SortGroupClause, sl);
+
+		foreach(gl, parse->groupClause)
+		{
+			SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
+
+			if (equal(gc, sc))
+			{
+				new_groupclause = lappend(new_groupclause, gc);
+				break;
+			}
+		}
+		if (gl == NULL)
+			break;				/* no match, so stop scanning */
+	}
+
+
+	/* If no match at all, no point in reordering GROUP BY */
+	if (new_groupclause == NIL)
+		return list_copy(parse->groupClause);
+
+	/*
+	 * Add any remaining GROUP BY items to the new list.  We don't require a
+	 * complete match, because even partial match allows ORDER BY to be
+	 * implemented using incremental sort.  Also, give up if there are any
+	 * non-sortable GROUP BY items, since then there's no hope anyway.
+	 */
+	foreach(gl, parse->groupClause)
+	{
+		SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
+
+		if (list_member_ptr(new_groupclause, gc))
+			continue;			/* it matched an ORDER BY item */
+		if (!OidIsValid(gc->sortop))	/* give up, GROUP BY can't be sorted */
+			return list_copy(parse->groupClause);
+		new_groupclause = lappend(new_groupclause, gc);
+	}
+
+	/* Success --- install the rearranged GROUP BY list */
+	Assert(list_length(parse->groupClause) == list_length(new_groupclause));
 	return new_groupclause;
 }
 
@@ -4170,7 +4252,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 			{
 				rollup = makeNode(RollupData);
 
-				rollup->groupClause = groupclause_apply_groupingset(root, gset);
+				rollup->groupClause = preprocess_groupclause(root, gset);
 				rollup->gsets_data = list_make1(gs);
 				rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
 														 rollup->gsets_data,
@@ -4359,7 +4441,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 
 			Assert(gs->set != NIL);
 
-			rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
+			rollup->groupClause = preprocess_groupclause(root, gs->set);
 			rollup->gsets_data = list_make1(gs);
 			rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
 													 rollup->gsets_data,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 78489398294..2ba297c1172 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -426,7 +426,11 @@ struct PlannerInfo
 	 * items to be proven redundant, implying that there is only one group
 	 * containing all the query's rows.  Hence, if you want to check whether
 	 * GROUP BY was specified, test for nonempty parse->groupClause, not for
-	 * nonempty processed_groupClause.
+	 * nonempty processed_groupClause.  Optimizer chooses specific order of
+	 * group-by clauses during the upper paths generation process, attempting
+	 * to use different strategies to minimize number of sorts or engage
+	 * incremental sort.  See preprocess_groupclause() and
+	 * get_useful_group_keys_orderings() for details.
 	 *
 	 * Currently, when grouping sets are specified we do not attempt to
 	 * optimize the groupClause, so that processed_groupClause will be
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index 1b900fddf8e..5f2c0cf5786 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -901,15 +901,15 @@ SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAV
    Sort Key: pagg_tab_m.a, pagg_tab_m.c, (sum(pagg_tab_m.b))
    ->  Append
          ->  HashAggregate
-               Group Key: ((pagg_tab_m.a + pagg_tab_m.b) / 2), pagg_tab_m.c, pagg_tab_m.a
+               Group Key: pagg_tab_m.a, pagg_tab_m.c, ((pagg_tab_m.a + pagg_tab_m.b) / 2)
                Filter: ((sum(pagg_tab_m.b) = 50) AND (avg(pagg_tab_m.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p1 pagg_tab_m
          ->  HashAggregate
-               Group Key: ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2), pagg_tab_m_1.c, pagg_tab_m_1.a
+               Group Key: pagg_tab_m_1.a, pagg_tab_m_1.c, ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2)
                Filter: ((sum(pagg_tab_m_1.b) = 50) AND (avg(pagg_tab_m_1.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p2 pagg_tab_m_1
          ->  HashAggregate
-               Group Key: ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2), pagg_tab_m_2.c, pagg_tab_m_2.a
+               Group Key: pagg_tab_m_2.a, pagg_tab_m_2.c, ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2)
                Filter: ((sum(pagg_tab_m_2.b) = 50) AND (avg(pagg_tab_m_2.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p3 pagg_tab_m_2
 (15 rows)
-- 
2.39.3 (Apple Git-145)

v5-0003-Rename-PathKeyInfo-to-GroupByOrdering.patchapplication/octet-stream; name=v5-0003-Rename-PathKeyInfo-to-GroupByOrdering.patchDownload

From 74734378e209cf37124cb12b7d80717d49d56d42 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Mon, 27 May 2024 14:57:43 +0300
Subject: [PATCH v5 3/5] Rename PathKeyInfo to GroupByOrdering

0452b461bc made optimizer explore alternative orderings of group-by pathkeys.
The PathKeyInfo data structure was used to store the particular ordering of
group-by pathkeys and corresponding clauses.  It turns out that PathKeyInfo
is not the best name for that purpose.  This commit renames this data structure
to GroupByOrdering, and revises its comment.

Discussion: https://postgr.es/m/db0fc3a4-966c-4cec-a136-94024d39212d%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/pathkeys.c | 18 +++++++++---------
 src/backend/optimizer/plan/planner.c  |  8 ++++----
 src/include/nodes/pathnodes.h         | 13 ++++++++++---
 src/tools/pgindent/typedefs.list      |  2 +-
 4 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 1ed79797329..c98486e292c 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -450,7 +450,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 /*
  * pathkeys_are_duplicate
  *		Check if give pathkeys are already contained the list of
- *		PathKeyInfo's.
+ *		GroupByOrdering's.
  */
 static bool
 pathkeys_are_duplicate(List *infos, List *pathkeys)
@@ -459,7 +459,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
 
 	foreach(lc, infos)
 	{
-		PathKeyInfo *info = lfirst_node(PathKeyInfo, lc);
+		GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
 
 		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
 			return true;
@@ -471,7 +471,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
  *
- * Returns a list of PathKeyInfo items, each representing an interesting
+ * Returns a list of GroupByOrdering items, each representing an interesting
  * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
  * matching order.
  *
@@ -486,13 +486,13 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 {
 	Query	   *parse = root->parse;
 	List	   *infos = NIL;
-	PathKeyInfo *info;
+	GroupByOrdering *info;
 
 	List	   *pathkeys = root->group_pathkeys;
 	List	   *clauses = root->processed_groupClause;
 
 	/* always return at least the original pathkeys/clauses */
-	info = makeNode(PathKeyInfo);
+	info = makeNode(GroupByOrdering);
 	info->pathkeys = pathkeys;
 	info->clauses = clauses;
 	infos = lappend(infos, info);
@@ -528,7 +528,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -553,7 +553,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -563,7 +563,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 #ifdef USE_ASSERT_CHECKING
 	{
-		PathKeyInfo *pinfo = linitial_node(PathKeyInfo, infos);
+		GroupByOrdering *pinfo = linitial_node(GroupByOrdering, infos);
 		ListCell   *lc;
 
 		/* Test consistency of info structures */
@@ -572,7 +572,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			ListCell   *lc1,
 					   *lc2;
 
-			info = lfirst_node(PathKeyInfo, lc);
+			info = lfirst_node(GroupByOrdering, lc);
 
 			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
 			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 669903b9059..1eeedc121ed 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6917,7 +6917,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -6998,7 +6998,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				/* process all potentially interesting grouping reorderings */
 				foreach(lc2, pathkey_orderings)
 				{
-					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+					GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 					/* restore the path (we replace it in the loop) */
 					path = path_save;
@@ -7249,7 +7249,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -7305,7 +7305,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 
 				/* restore the path (we replace it in the loop) */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 14ef296ab72..78489398294 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1468,14 +1468,21 @@ typedef struct PathKey
 } PathKey;
 
 /*
- * Combines the information about pathkeys and the associated clauses.
+ * Contains an order of group-by clauses and the corresponding list of
+ * pathkeys.
+ *
+ * The elements of 'clauses' list should have the same order as the head of
+ * 'pathkeys' list.  The tleSortGroupRef of the clause should be equal to
+ * ec_sortref of the pathkey equivalence class.  If there are redundant
+ * clauses with the same tleSortGroupRef, they must be grouped together.
  */
-typedef struct PathKeyInfo
+typedef struct GroupByOrdering
 {
 	NodeTag		type;
+
 	List	   *pathkeys;
 	List	   *clauses;
-} PathKeyInfo;
+} GroupByOrdering;
 
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index d427a1c16a5..4f57078d133 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1056,6 +1056,7 @@ GrantRoleStmt
 GrantStmt
 GrantTargetType
 Group
+GroupByOrdering
 GroupClause
 GroupPath
 GroupPathExtraData
@@ -2067,7 +2068,6 @@ PathClauseUsage
 PathCostComparison
 PathHashStack
 PathKey
-PathKeyInfo
 PathKeysComparison
 PathTarget
 PatternInfo
-- 
2.39.3 (Apple Git-145)

v5-0005-Teach-group_keys_reorder_by_pathkeys-about-redund.patchapplication/octet-stream; name=v5-0005-Teach-group_keys_reorder_by_pathkeys-about-redund.patchDownload

From 0e33f7c66682401180e037ec0fd8564401241c0a Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:13:11 +0300
Subject: [PATCH v5 5/5] Teach group_keys_reorder_by_pathkeys() about redundant
 SortGroupClause's

We don't have strict guarantees that there shouldn't be redundant
SortGroupClause's.  Although there is no confirmed way to generate
duplicate SortGroupClause's, this commit teaches
group_keys_reorder_by_pathkeys() to pull all the SortGroupClause matching to
pathkey at once.

Discussion: https://postgr.es/m/a663f0f6-cbf6-49aa-af2e-234dc6768a07%40postgrespro.ru
Author: Andrei Lepikhov
---
 src/backend/optimizer/path/pathkeys.c | 41 ++++++++++++++++++---------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 793c8ad2930..0b7e9cd48db 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -401,7 +401,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	foreach(lc, pathkeys)
 	{
 		PathKey    *pathkey = (PathKey *) lfirst(lc);
-		SortGroupClause *sgc;
+		bool		found = false;
 
 		/*
 		 * Pathkeys are built in a way that allows simply comparing pointers.
@@ -416,28 +416,40 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 		/*
 		 * Since 1349d27 pathkey coming from underlying node can be in the
 		 * root->group_pathkeys but not in the processed_groupClause. So, we
-		 * should be careful here.
+		 * should be careful here.  Also, there could be redundant
+		 * SortGroupClause's.  So, we need to pull of all the matching
+		 * SortGroupClause's, but ensure there is at least one.
 		 */
-		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
-											*group_clauses);
-		if (!sgc)
-			/* The grouping clause does not cover this pathkey */
-			break;
+		foreach_ptr(SortGroupClause, sgc, *group_clauses)
+		{
+			if (sgc->tleSortGroupRef == pathkey->pk_eclass->ec_sortref)
+			{
+				/*
+				 * Sort group clause should have an ordering operator as long
+				 * as there is an associated pathkey.
+				 */
+				Assert(OidIsValid(sgc->sortop));
 
-		/*
-		 * Sort group clause should have an ordering operator as long as there
-		 * is an associated pathkey.
-		 */
-		Assert(OidIsValid(sgc->sortop));
+				new_group_clauses = lappend(new_group_clauses, sgc);
+				found = true;
+			}
+		}
+
+		/* Check if the grouping clause does not cover this pathkey */
+		if (!found)
+			break;
 
 		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
-		new_group_clauses = lappend(new_group_clauses, sgc);
+
 	}
 
 	/* remember the number of pathkeys with a matching GROUP BY key */
 	n = list_length(new_group_pathkeys);
 
-	/* append the remaining group pathkeys (will be treated as not sorted) */
+	/*
+	 * Append the remaining group pathkeys (will be treated as not sorted) and
+	 * grouping clauses.
+	 */
 	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
 											 *group_pathkeys);
 	*group_clauses = list_concat_unique_ptr(new_group_clauses,
@@ -544,6 +556,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 		}
 	}
 #endif
+
 	return infos;
 }
 
-- 
2.39.3 (Apple Git-145)

#172

Alexander Korotkov

aekorotkov@gmail.com

over 1 year ago

In reply to: Alexander Korotkov (#171)

5 attachment(s)

Re: POC: GROUP BY optimization

On Tue, Jun 4, 2024 at 1:47 AM Alexander Korotkov <aekorotkov@gmail.com> wrote:

On Mon, Jun 3, 2024 at 6:55 PM Pavel Borisov <pashkin.elfe@gmail.com> wrote:
On Sun, 2 Jun 2024 at 17:18, Alexander Korotkov <aekorotkov@gmail.com> wrote:
On Sun, Jun 2, 2024 at 10:55 AM jian he <jian.universality@gmail.com> wrote:
On Fri, May 31, 2024 at 8:12 AM Alexander Korotkov <aekorotkov@gmail.com> wrote:

I've revised some grammar including the sentence you've proposed.
-static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
+static List *preprocess_groupclause(PlannerInfo *root, List *force);
changing preprocess_groupclause the second argument
from "force" to "gset" would be more intuitive, I think.
Probably, but my intention is to restore preprocess_groupclause() as
it was before 0452b461bc with minimal edits to support incremental
sort. I'd rather avoid refactoring if this area for now.

`elog(ERROR, "Order of group-by clauses doesn't correspond incoming
sort order");`

I think this error message makes people wonder what "incoming sort order" is.
BTW, "correspond", generally people use "correspond to".

Thank you. On the second thought, I think it would be better to turn
this into an assertion like the checks before.

I did some minor cosmetic changes, mainly changing foreach to foreach_node.
Please check the attachment.

I would avoid refactoring of preprocess_groupclause() for the reason
described above. But I picked the grammar fix for PlannerInfo's
comment.
Thank you for working on this patchset.

0001: Patch looks right

Comments:

Covert -> Convert
sets the uninitialized value of ec_sortref of the pathkey's EquivalenceClass -> sets the value of the pathkey's EquivalenceClass unless it's already initialized
wasn't set yet -> hasn't been set yet

0002: additional assert checking only. Looks right.

0003: pure renaming, looks good.

0004: Restores pre 0452b461bc state to preprocess_groupclause with removed partial_match fallback. Looks right. I haven't checked the comments provided they are restored from pre 0452b461bc state.

0005: Looks right.
Thank you. Revised according to your comments. I think 0001-0004
comprising a good refactoring addressing concerns from Tom [1].
0001 Removes from get_eclass_for_sort_expr() unnatural responsibility
of setting EquivalenceClass.ec_sortref. Instead this field is set in
make_pathkeys_for_sortclauses_extended() while processing of group
clauses.
0002 Provides additional asserts. That should helping to defend
against lurking bugs.
0003 Fixes unsuitable name of data structure.
0004 Restores pre 0452b461bc state to preprocess_groupclause() and
lowers the number of paths to consider.
It would be good to explicitly hear Tom about this. But I'm quite
sure these patches makes situation better not worse. I'm going to
push them if no objections.

I'm putting 0005 aside. It's unclear why and how there could be
duplicate SortGroupClauses at that stage. Also, it's unclear why
existing code handles them bad. So, this should wait a comment from
Tom.

Links.
1. /messages/by-id/266850.1712879082@sss.pgh.pa.us

Ops... I found I've published an old version of patchset. The
relevant version is attached.

------
Regards,
Alexander Korotkov
Supabase

Attachments:

v6-0002-Add-invariants-check-to-get_useful_group_keys_ord.patchapplication/octet-stream; name=v6-0002-Add-invariants-check-to-get_useful_group_keys_ord.patchDownload

From ad53192551dc043b2c43ff09c2097173fa6a5a9b Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 30 May 2024 22:38:53 +0300
Subject: [PATCH v6 2/5] Add invariants check to
 get_useful_group_keys_orderings()

This commit introduces invariants checking of generated orderings
in get_useful_group_keys_orderings for assert-enabled builds.

Discussion: https://postgr.es/m/a663f0f6-cbf6-49aa-af2e-234dc6768a07%40postgrespro.ru
Reported-by: Tom Lane
Author: Andrei Lepikhov
Reviewed-by: Alexander Korotkov, Pavel Borisov
---
 src/backend/optimizer/path/pathkeys.c | 28 +++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index df966b18f34..02e46a6eaaf 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -561,6 +561,34 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 		}
 	}
 
+#ifdef USE_ASSERT_CHECKING
+	{
+		PathKeyInfo *pinfo = linitial_node(PathKeyInfo, infos);
+		ListCell   *lc;
+
+		/* Test consistency of info structures */
+		for_each_from(lc, infos, 1)
+		{
+			ListCell   *lc1,
+					   *lc2;
+
+			info = lfirst_node(PathKeyInfo, lc);
+
+			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
+			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
+			Assert(list_difference(info->clauses, pinfo->clauses) == NIL);
+			Assert(list_difference_ptr(info->pathkeys, pinfo->pathkeys) == NIL);
+
+			forboth(lc1, info->clauses, lc2, info->pathkeys)
+			{
+				SortGroupClause *sgc = lfirst_node(SortGroupClause, lc1);
+				PathKey    *pk = lfirst_node(PathKey, lc2);
+
+				Assert(pk->pk_eclass->ec_sortref == sgc->tleSortGroupRef);
+			}
+		}
+	}
+#endif
 	return infos;
 }
 
-- 
2.39.3 (Apple Git-145)

v6-0004-Restore-preprocess_groupclause.patchapplication/octet-stream; name=v6-0004-Restore-preprocess_groupclause.patchDownload

From 4d912e16a9809bee89b8f375f979c297a387094c Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:51:31 +0300
Subject: [PATCH v6 4/5] Restore preprocess_groupclause()

0452b461bc made optimizer explore alternative orderings of group-by pathkeys.
It eliminated preprocess_groupclause(), which was intended to match items
between GROUP BY and ORDER BY.  Instead, get_useful_group_keys_orderings()
function generates orderings of GROUP BY elements at the time of grouping
paths generation.  The get_useful_group_keys_orderings() function takes into
account 3 orderings of GROUP BY pathkeys and clauses: original order as written
in GROUP BY, matching ORDER BY clauses as much as possible, and matching the
input path as much as possible.  Given that even before 0452b461b,
preprocess_groupclause() could change the original order of GROUP BY clauses
we don't need to consider it apart from ordering matching ORDER BY clauses.

This commit restores preprocess_groupclause() to provide an ordering of
GROUP BY elements matching ORDER BY before generation of paths.  The new
version of preprocess_groupclause() takes into account an incremental sort.
The get_useful_group_keys_orderings() function now takes into 2 orderings of
GROUP BY elements: the order generated preprocess_groupclause() and the order
matching the input path as much as possible.

Discussion: https://postgr.es/m/CAPpHfdvyWLMGwvxaf%3D7KAp-z-4mxbSH8ti2f6mNOQv5metZFzg%40mail.gmail.com
Author: Alexander Korotkov
Reviewed-by: Andrei Lepikhov, Pavel Borisov
---
 src/backend/optimizer/path/pathkeys.c         |  55 +--------
 src/backend/optimizer/plan/planner.c          | 108 +++++++++++++++---
 src/include/nodes/pathnodes.h                 |   6 +-
 .../regress/expected/partition_aggregate.out  |   6 +-
 4 files changed, 108 insertions(+), 67 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 0a25b6384bb..416fc4e240b 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -447,26 +447,6 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	return n;
 }
 
-/*
- * pathkeys_are_duplicate
- *		Check if give pathkeys are already contained the list of
- *		GroupByOrdering's.
- */
-static bool
-pathkeys_are_duplicate(List *infos, List *pathkeys)
-{
-	ListCell   *lc;
-
-	foreach(lc, infos)
-	{
-		GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
-
-		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
-			return true;
-	}
-	return false;
-}
-
 /*
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
@@ -475,11 +455,11 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
  * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
  * matching order.
  *
- * The function considers (and keeps) multiple GROUP BY orderings:
+ * The function considers (and keeps) following GROUP BY orderings:
  *
- * - the original ordering, as specified by the GROUP BY clause,
- * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
- * - GROUP BY keys to match target ORDER BY clause (as much as possible).
+ * - GROUP BY keys as ordered by preprocess_groupclause() to match target
+ *   ORDER BY clause (as much as possible),
+ * - GROUP BY keys reordered to match 'path' ordering (as much as possible).
  */
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
@@ -526,32 +506,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 		if (n > 0 &&
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
-			!pathkeys_are_duplicate(infos, pathkeys))
-		{
-			info = makeNode(GroupByOrdering);
-			info->pathkeys = pathkeys;
-			info->clauses = clauses;
-
-			infos = lappend(infos, info);
-		}
-	}
-
-	/*
-	 * Try reordering pathkeys to minimize the sort cost (this time consider
-	 * the ORDER BY clause).
-	 */
-	if (root->sort_pathkeys &&
-		!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
-	{
-		int			n;
-
-		n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
-										   &clauses,
-										   root->num_groupby_pathkeys);
-
-		if (n > 0 &&
-			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
-			!pathkeys_are_duplicate(infos, pathkeys))
+			compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL)
 		{
 			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 084c6796d03..4711f912390 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -137,7 +137,7 @@ static double preprocess_limit(PlannerInfo *root,
 							   double tuple_fraction,
 							   int64 *offset_est, int64 *count_est);
 static void remove_useless_groupby_columns(PlannerInfo *root);
-static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
+static List *preprocess_groupclause(PlannerInfo *root, List *force);
 static List *extract_rollup_sets(List *groupingSets);
 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
 static void standard_qp_callback(PlannerInfo *root, void *extra);
@@ -1422,7 +1422,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 		else if (parse->groupClause)
 		{
 			/* Preprocess regular GROUP BY clause, if any */
-			root->processed_groupClause = list_copy(parse->groupClause);
+			root->processed_groupClause = preprocess_groupclause(root, NIL);
 			/* Remove any redundant GROUP BY columns */
 			remove_useless_groupby_columns(root);
 		}
@@ -2169,7 +2169,7 @@ preprocess_grouping_sets(PlannerInfo *root)
 		 * The groupClauses for hashed grouping sets are built later on.)
 		 */
 		if (gs->set)
-			rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
+			rollup->groupClause = preprocess_groupclause(root, gs->set);
 		else
 			rollup->groupClause = NIL;
 
@@ -2821,24 +2821,106 @@ remove_useless_groupby_columns(PlannerInfo *root)
 }
 
 /*
- * groupclause_apply_groupingset
- *		Apply the order of GROUP BY clauses defined by grouping sets.  Items
- *		not in the grouping set are skipped.
+ * preprocess_groupclause - do preparatory work on GROUP BY clause
+ *
+ * The idea here is to adjust the ordering of the GROUP BY elements
+ * (which in itself is semantically insignificant) to match ORDER BY,
+ * thereby allowing a single sort operation to both implement the ORDER BY
+ * requirement and set up for a Unique step that implements GROUP BY.
+ * We also consider partial match between GROUP BY and ORDER BY elements,
+ * which could allow to implement ORDER BY using the incremental sort.
+ *
+ * We also consider other orderings of the GROUP BY elements, which could
+ * match the sort ordering of other possible plans (eg an indexscan) and
+ * thereby reduce cost.  This is implemented during the generation of grouping
+ * paths.  See get_useful_group_keys_orderings() for details.
+ *
+ * Note: we need no comparable processing of the distinctClause because
+ * the parser already enforced that that matches ORDER BY.
+ *
+ * Note: we return a fresh List, but its elements are the same
+ * SortGroupClauses appearing in parse->groupClause.  This is important
+ * because later processing may modify the processed_groupClause list.
+ *
+ * For grouping sets, the order of items is instead forced to agree with that
+ * of the grouping set (and items not in the grouping set are skipped). The
+ * work of sorting the order of grouping set elements to match the ORDER BY if
+ * possible is done elsewhere.
  */
 static List *
-groupclause_apply_groupingset(PlannerInfo *root, List *gset)
+preprocess_groupclause(PlannerInfo *root, List *force)
 {
 	Query	   *parse = root->parse;
 	List	   *new_groupclause = NIL;
 	ListCell   *sl;
+	ListCell   *gl;
 
-	foreach(sl, gset)
+	/* For grouping sets, we need to force the ordering */
+	if (force)
 	{
-		Index		ref = lfirst_int(sl);
-		SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
+		foreach(sl, force)
+		{
+			Index		ref = lfirst_int(sl);
+			SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
+
+			new_groupclause = lappend(new_groupclause, cl);
+		}
 
-		new_groupclause = lappend(new_groupclause, cl);
+		return new_groupclause;
 	}
+
+	/* If no ORDER BY, nothing useful to do here */
+	if (parse->sortClause == NIL)
+		return list_copy(parse->groupClause);
+
+	/*
+	 * Scan the ORDER BY clause and construct a list of matching GROUP BY
+	 * items, but only as far as we can make a matching prefix.
+	 *
+	 * This code assumes that the sortClause contains no duplicate items.
+	 */
+	foreach(sl, parse->sortClause)
+	{
+		SortGroupClause *sc = lfirst_node(SortGroupClause, sl);
+
+		foreach(gl, parse->groupClause)
+		{
+			SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
+
+			if (equal(gc, sc))
+			{
+				new_groupclause = lappend(new_groupclause, gc);
+				break;
+			}
+		}
+		if (gl == NULL)
+			break;				/* no match, so stop scanning */
+	}
+
+
+	/* If no match at all, no point in reordering GROUP BY */
+	if (new_groupclause == NIL)
+		return list_copy(parse->groupClause);
+
+	/*
+	 * Add any remaining GROUP BY items to the new list.  We don't require a
+	 * complete match, because even partial match allows ORDER BY to be
+	 * implemented using incremental sort.  Also, give up if there are any
+	 * non-sortable GROUP BY items, since then there's no hope anyway.
+	 */
+	foreach(gl, parse->groupClause)
+	{
+		SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
+
+		if (list_member_ptr(new_groupclause, gc))
+			continue;			/* it matched an ORDER BY item */
+		if (!OidIsValid(gc->sortop))	/* give up, GROUP BY can't be sorted */
+			return list_copy(parse->groupClause);
+		new_groupclause = lappend(new_groupclause, gc);
+	}
+
+	/* Success --- install the rearranged GROUP BY list */
+	Assert(list_length(parse->groupClause) == list_length(new_groupclause));
 	return new_groupclause;
 }
 
@@ -4170,7 +4252,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 			{
 				rollup = makeNode(RollupData);
 
-				rollup->groupClause = groupclause_apply_groupingset(root, gset);
+				rollup->groupClause = preprocess_groupclause(root, gset);
 				rollup->gsets_data = list_make1(gs);
 				rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
 														 rollup->gsets_data,
@@ -4359,7 +4441,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 
 			Assert(gs->set != NIL);
 
-			rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
+			rollup->groupClause = preprocess_groupclause(root, gs->set);
 			rollup->gsets_data = list_make1(gs);
 			rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
 													 rollup->gsets_data,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 78489398294..2ba297c1172 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -426,7 +426,11 @@ struct PlannerInfo
 	 * items to be proven redundant, implying that there is only one group
 	 * containing all the query's rows.  Hence, if you want to check whether
 	 * GROUP BY was specified, test for nonempty parse->groupClause, not for
-	 * nonempty processed_groupClause.
+	 * nonempty processed_groupClause.  Optimizer chooses specific order of
+	 * group-by clauses during the upper paths generation process, attempting
+	 * to use different strategies to minimize number of sorts or engage
+	 * incremental sort.  See preprocess_groupclause() and
+	 * get_useful_group_keys_orderings() for details.
 	 *
 	 * Currently, when grouping sets are specified we do not attempt to
 	 * optimize the groupClause, so that processed_groupClause will be
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index 1b900fddf8e..5f2c0cf5786 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -901,15 +901,15 @@ SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAV
    Sort Key: pagg_tab_m.a, pagg_tab_m.c, (sum(pagg_tab_m.b))
    ->  Append
          ->  HashAggregate
-               Group Key: ((pagg_tab_m.a + pagg_tab_m.b) / 2), pagg_tab_m.c, pagg_tab_m.a
+               Group Key: pagg_tab_m.a, pagg_tab_m.c, ((pagg_tab_m.a + pagg_tab_m.b) / 2)
                Filter: ((sum(pagg_tab_m.b) = 50) AND (avg(pagg_tab_m.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p1 pagg_tab_m
          ->  HashAggregate
-               Group Key: ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2), pagg_tab_m_1.c, pagg_tab_m_1.a
+               Group Key: pagg_tab_m_1.a, pagg_tab_m_1.c, ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2)
                Filter: ((sum(pagg_tab_m_1.b) = 50) AND (avg(pagg_tab_m_1.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p2 pagg_tab_m_1
          ->  HashAggregate
-               Group Key: ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2), pagg_tab_m_2.c, pagg_tab_m_2.a
+               Group Key: pagg_tab_m_2.a, pagg_tab_m_2.c, ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2)
                Filter: ((sum(pagg_tab_m_2.b) = 50) AND (avg(pagg_tab_m_2.c) > '25'::numeric))
                ->  Seq Scan on pagg_tab_m_p3 pagg_tab_m_2
 (15 rows)
-- 
2.39.3 (Apple Git-145)

v6-0003-Rename-PathKeyInfo-to-GroupByOrdering.patchapplication/octet-stream; name=v6-0003-Rename-PathKeyInfo-to-GroupByOrdering.patchDownload

From b4ccb48e68d595446f10c3fde8acf4f675728878 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Mon, 27 May 2024 14:57:43 +0300
Subject: [PATCH v6 3/5] Rename PathKeyInfo to GroupByOrdering

0452b461bc made optimizer explore alternative orderings of group-by pathkeys.
The PathKeyInfo data structure was used to store the particular ordering of
group-by pathkeys and corresponding clauses.  It turns out that PathKeyInfo
is not the best name for that purpose.  This commit renames this data structure
to GroupByOrdering, and revises its comment.

Discussion: https://postgr.es/m/db0fc3a4-966c-4cec-a136-94024d39212d%40postgrespro.ru
Reported-by: Tom Lane
Author: Andrei Lepikhov
Reviewed-by: Alexander Korotkov, Pavel Borisov
---
 src/backend/optimizer/path/pathkeys.c | 18 +++++++++---------
 src/backend/optimizer/plan/planner.c  |  8 ++++----
 src/include/nodes/pathnodes.h         | 13 ++++++++++---
 src/tools/pgindent/typedefs.list      |  2 +-
 4 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 02e46a6eaaf..0a25b6384bb 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -450,7 +450,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 /*
  * pathkeys_are_duplicate
  *		Check if give pathkeys are already contained the list of
- *		PathKeyInfo's.
+ *		GroupByOrdering's.
  */
 static bool
 pathkeys_are_duplicate(List *infos, List *pathkeys)
@@ -459,7 +459,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
 
 	foreach(lc, infos)
 	{
-		PathKeyInfo *info = lfirst_node(PathKeyInfo, lc);
+		GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
 
 		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
 			return true;
@@ -471,7 +471,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
  *
- * Returns a list of PathKeyInfo items, each representing an interesting
+ * Returns a list of GroupByOrdering items, each representing an interesting
  * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
  * matching order.
  *
@@ -486,13 +486,13 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 {
 	Query	   *parse = root->parse;
 	List	   *infos = NIL;
-	PathKeyInfo *info;
+	GroupByOrdering *info;
 
 	List	   *pathkeys = root->group_pathkeys;
 	List	   *clauses = root->processed_groupClause;
 
 	/* always return at least the original pathkeys/clauses */
-	info = makeNode(PathKeyInfo);
+	info = makeNode(GroupByOrdering);
 	info->pathkeys = pathkeys;
 	info->clauses = clauses;
 	infos = lappend(infos, info);
@@ -528,7 +528,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -553,7 +553,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -563,7 +563,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 #ifdef USE_ASSERT_CHECKING
 	{
-		PathKeyInfo *pinfo = linitial_node(PathKeyInfo, infos);
+		GroupByOrdering *pinfo = linitial_node(GroupByOrdering, infos);
 		ListCell   *lc;
 
 		/* Test consistency of info structures */
@@ -572,7 +572,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			ListCell   *lc1,
 					   *lc2;
 
-			info = lfirst_node(PathKeyInfo, lc);
+			info = lfirst_node(GroupByOrdering, lc);
 
 			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
 			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index ea107c70cb8..084c6796d03 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6917,7 +6917,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -6998,7 +6998,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				/* process all potentially interesting grouping reorderings */
 				foreach(lc2, pathkey_orderings)
 				{
-					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+					GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 					/* restore the path (we replace it in the loop) */
 					path = path_save;
@@ -7249,7 +7249,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -7305,7 +7305,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 
 				/* restore the path (we replace it in the loop) */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 14ef296ab72..78489398294 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1468,14 +1468,21 @@ typedef struct PathKey
 } PathKey;
 
 /*
- * Combines the information about pathkeys and the associated clauses.
+ * Contains an order of group-by clauses and the corresponding list of
+ * pathkeys.
+ *
+ * The elements of 'clauses' list should have the same order as the head of
+ * 'pathkeys' list.  The tleSortGroupRef of the clause should be equal to
+ * ec_sortref of the pathkey equivalence class.  If there are redundant
+ * clauses with the same tleSortGroupRef, they must be grouped together.
  */
-typedef struct PathKeyInfo
+typedef struct GroupByOrdering
 {
 	NodeTag		type;
+
 	List	   *pathkeys;
 	List	   *clauses;
-} PathKeyInfo;
+} GroupByOrdering;
 
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index d427a1c16a5..4f57078d133 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1056,6 +1056,7 @@ GrantRoleStmt
 GrantStmt
 GrantTargetType
 Group
+GroupByOrdering
 GroupClause
 GroupPath
 GroupPathExtraData
@@ -2067,7 +2068,6 @@ PathClauseUsage
 PathCostComparison
 PathHashStack
 PathKey
-PathKeyInfo
 PathKeysComparison
 PathTarget
 PatternInfo
-- 
2.39.3 (Apple Git-145)

v6-0005-Teach-group_keys_reorder_by_pathkeys-about-redund.patchapplication/octet-stream; name=v6-0005-Teach-group_keys_reorder_by_pathkeys-about-redund.patchDownload

From e8736bbb1af07343d1f4736311703345cfcb11e4 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:13:11 +0300
Subject: [PATCH v6 5/5] Teach group_keys_reorder_by_pathkeys() about redundant
 SortGroupClause's

We don't have strict guarantees that there shouldn't be redundant
SortGroupClause's.  Although there is no confirmed way to generate
duplicate SortGroupClause's, this commit teaches
group_keys_reorder_by_pathkeys() to pull all the SortGroupClause matching to
pathkey at once.

Discussion: https://postgr.es/m/a663f0f6-cbf6-49aa-af2e-234dc6768a07%40postgrespro.ru
Reported-by: Tom Lane
Author: Andrei Lepikhov, Alexander Korotkov
Reviewed-by: Pavel Borisov
---
 src/backend/optimizer/path/pathkeys.c | 41 ++++++++++++++++++---------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 416fc4e240b..3761cbb3cf6 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -401,7 +401,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	foreach(lc, pathkeys)
 	{
 		PathKey    *pathkey = (PathKey *) lfirst(lc);
-		SortGroupClause *sgc;
+		bool		found = false;
 
 		/*
 		 * Pathkeys are built in a way that allows simply comparing pointers.
@@ -416,28 +416,40 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 		/*
 		 * Since 1349d27 pathkey coming from underlying node can be in the
 		 * root->group_pathkeys but not in the processed_groupClause. So, we
-		 * should be careful here.
+		 * should be careful here.  Also, there could be redundant
+		 * SortGroupClause's.  So, we need to pull of all the matching
+		 * SortGroupClause's, but ensure there is at least one.
 		 */
-		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
-											*group_clauses);
-		if (!sgc)
-			/* The grouping clause does not cover this pathkey */
-			break;
+		foreach_ptr(SortGroupClause, sgc, *group_clauses)
+		{
+			if (sgc->tleSortGroupRef == pathkey->pk_eclass->ec_sortref)
+			{
+				/*
+				 * Sort group clause should have an ordering operator as long
+				 * as there is an associated pathkey.
+				 */
+				Assert(OidIsValid(sgc->sortop));
 
-		/*
-		 * Sort group clause should have an ordering operator as long as there
-		 * is an associated pathkey.
-		 */
-		Assert(OidIsValid(sgc->sortop));
+				new_group_clauses = lappend(new_group_clauses, sgc);
+				found = true;
+			}
+		}
+
+		/* Check if the grouping clause does not cover this pathkey */
+		if (!found)
+			break;
 
 		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
-		new_group_clauses = lappend(new_group_clauses, sgc);
+
 	}
 
 	/* remember the number of pathkeys with a matching GROUP BY key */
 	n = list_length(new_group_pathkeys);
 
-	/* append the remaining group pathkeys (will be treated as not sorted) */
+	/*
+	 * Append the remaining group pathkeys (will be treated as not sorted) and
+	 * grouping clauses.
+	 */
 	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
 											 *group_pathkeys);
 	*group_clauses = list_concat_unique_ptr(new_group_clauses,
@@ -544,6 +556,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 		}
 	}
 #endif
+
 	return infos;
 }
 
-- 
2.39.3 (Apple Git-145)

v6-0001-Fix-asymmetry-in-setting-EquivalenceClass.ec_sort.patchapplication/octet-stream; name=v6-0001-Fix-asymmetry-in-setting-EquivalenceClass.ec_sort.patchDownload

From c04fd80a0b88ebd12f21d998c39e661eef082f07 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 21 May 2024 03:10:34 +0300
Subject: [PATCH v6 1/5] Fix asymmetry in setting EquivalenceClass.ec_sortref

0452b461bc made get_eclass_for_sort_expr() always set
EquivalenceClass.ec_sortref if it's not done yet.  This leads to an asymmetric
situation when whoever first looks for the EquivalenceClass sets the
ec_sortref.  It is also counterintuitive that get_eclass_for_sort_expr()
performs modification of data structures.

This commit makes make_pathkeys_for_sortclauses_extended() responsible for
setting EquivalenceClass.ec_sortref.  Now we set the
EquivalenceClass.ec_sortref's needed to explore alternative GROUP BY ordering
specifically during building pathkeys by the list of grouping clauses.

Discussion: https://postgr.es/m/17037754-f187-4138-8285-0e2bfebd0dea%40postgrespro.ru
Reported-by: Tom Lane
Author: Andrei Lepikhov
Reviewed-by: Alexander Korotkov, Pavel Borisov
---
 src/backend/optimizer/path/equivclass.c  | 13 +------
 src/backend/optimizer/path/pathkeys.c    | 18 ++++++++-
 src/backend/optimizer/plan/planner.c     | 16 ++++++--
 src/include/optimizer/paths.h            |  3 +-
 src/test/regress/expected/aggregates.out | 47 ++++++++++++++++++++++++
 src/test/regress/sql/aggregates.sql      | 14 +++++++
 6 files changed, 92 insertions(+), 19 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 21ce1ae2e13..51d806326eb 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,18 +652,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-			{
-				/*
-				 * Match!
-				 *
-				 * Copy the sortref if it wasn't set yet.  That may happen if
-				 * the ec was constructed from a WHERE clause, i.e. it doesn't
-				 * have a target reference at all.
-				 */
-				if (cur_ec->ec_sortref == 0 && sortref > 0)
-					cur_ec->ec_sortref = sortref;
-				return cur_ec;
-			}
+				return cur_ec;	/* Match! */
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 8b258cbef92..df966b18f34 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1355,7 +1355,8 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 													&sortclauses,
 													tlist,
 													false,
-													&sortable);
+													&sortable,
+													false);
 	/* It's caller error if not all clauses were sortable */
 	Assert(sortable);
 	return result;
@@ -1379,13 +1380,17 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
  * to remove any clauses that can be proven redundant via the eclass logic.
  * Even though we'll have to hash in that case, we might as well not hash
  * redundant columns.)
+ *
+ * If set_ec_sortref is true then sets the value of the pathkey's
+ * EquivalenceClass unless it's already initialized.
  */
 List *
 make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 									   List **sortclauses,
 									   List *tlist,
 									   bool remove_redundant,
-									   bool *sortable)
+									   bool *sortable,
+									   bool set_ec_sortref)
 {
 	List	   *pathkeys = NIL;
 	ListCell   *l;
@@ -1409,6 +1414,15 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 										   sortcl->nulls_first,
 										   sortcl->tleSortGroupRef,
 										   true);
+		if (pathkey->pk_eclass->ec_sortref == 0 && set_ec_sortref)
+		{
+			/*
+			 * Copy the sortref if it hasn't been set yet.  That may happen if
+			 * the EquivalenceClass was constructed from a WHERE clause, i.e.
+			 * it doesn't have a target reference at all.
+			 */
+			pathkey->pk_eclass->ec_sortref = sortcl->tleSortGroupRef;
+		}
 
 		/* Canonical form eliminates redundant ordering keys */
 		if (!pathkey_is_redundant(pathkey, pathkeys))
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 032818423f6..ea107c70cb8 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3395,12 +3395,17 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 		 */
 		bool		sortable;
 
+		/*
+		 * Convert group clauses into pathkeys.  Set the ec_sortref field of
+		 * EquivalenceClass'es if it's not set yet.
+		 */
 		root->group_pathkeys =
 			make_pathkeys_for_sortclauses_extended(root,
 												   &root->processed_groupClause,
 												   tlist,
 												   true,
-												   &sortable);
+												   &sortable,
+												   true);
 		if (!sortable)
 		{
 			/* Can't sort; no point in considering aggregate ordering either */
@@ -3450,7 +3455,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &root->processed_distinctClause,
 												   tlist,
 												   true,
-												   &sortable);
+												   &sortable,
+												   false);
 		if (!sortable)
 			root->distinct_pathkeys = NIL;
 	}
@@ -3476,7 +3482,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 												   &groupClauses,
 												   tlist,
 												   false,
-												   &sortable);
+												   &sortable,
+												   false);
 		if (!sortable)
 			root->setop_pathkeys = NIL;
 	}
@@ -6061,7 +6068,8 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
 																 &wc->partitionClause,
 																 tlist,
 																 true,
-																 &sortable);
+																 &sortable,
+																 false);
 
 		Assert(sortable);
 	}
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 914d9bdef58..5e88c0224a4 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -239,7 +239,8 @@ extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 													List **sortclauses,
 													List *tlist,
 													bool remove_redundant,
-													bool *sortable);
+													bool *sortable,
+													bool set_ec_sortref);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
 											RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 2442342e9d1..1c1ca7573ad 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2917,6 +2917,53 @@ GROUP BY c1.w, c1.z;
  5.0000000000000000
 (2 rows)
 
+-- Pathkeys, built in a subtree, can be used to optimize GROUP-BY clause
+-- ordering.  Also, here we check that it doesn't depend on the initial clause
+-- order in the GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c1.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c1.x, c1.y
+         Presorted Key: c1.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c2.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c2.x, c1.y
+         Presorted Key: c2.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 61a3424c845..1a18ca3d8fe 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1263,6 +1263,20 @@ SELECT avg(c1.f ORDER BY c1.x, c1.y)
 FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
 GROUP BY c1.w, c1.z;
 
+-- Pathkeys, built in a subtree, can be used to optimize GROUP-BY clause
+-- ordering.  Also, here we check that it doesn't depend on the initial clause
+-- order in the GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
-- 
2.39.3 (Apple Git-145)