diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c
index 28e3aed..274eaf8 100644
--- a/src/backend/catalog/pg_constraint.c
+++ b/src/backend/catalog/pg_constraint.c
@@ -871,6 +871,143 @@ get_domain_constraint_oid(Oid typid, const char *conname, bool missing_ok)
 }
 
 /*
+ * identify_key_vars
+ *		Returns a Bitmapset with bits set to mark which 0-based varlist items
+ *		belong to 'relid's PRIMARY KEY.
+ *
+ *	On successful identification of primary key columns 'constraintOid' is set
+ *	to the OID of the primary key constraint. On failure to match to *all*
+ *	primary key columns, NULL is returned and 'constraintOid' is left
+ *	unchanged.
+ */
+Bitmapset *
+identify_key_vars(Oid relid, Index varno, Index varlevelsup, List *varlist,
+				  Oid *constraintOid)
+{
+	Relation	pg_constraint;
+	HeapTuple	tuple;
+	SysScanDesc scan;
+	ScanKeyData skey[1];
+	Bitmapset	*varlistmatches;
+
+	/* Scan pg_constraint for constraints of the target rel */
+	pg_constraint = heap_open(ConstraintRelationId, AccessShareLock);
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+
+	scan = systable_beginscan(pg_constraint, ConstraintRelidIndexId, true,
+							  NULL, 1, skey);
+
+	varlistmatches = NULL;
+
+	while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+	{
+		Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple);
+		Datum		adatum;
+		bool		isNull;
+		ArrayType  *arr;
+		int16	   *attnums;
+		int			numkeys;
+		int			i;
+
+		/*
+		 * For now we must ignore anything apart from PRIMARY KEY constraints.
+		 * Technically we could look at UNIQUE indexes too, however we'd also
+		 * have to ensure that each column of the unique index has a NOT NULL
+		 * constraint, however since NOT NULL constraints currently don't have
+		 * a pg_constraint entry this means that we're unable to track the
+		 * dependency on the NOT NULL in order to invalidate any cached query
+		 * plans in the event that someone drops the NOT NULL constraint. In
+		 * any case it seems strange to go to more lengths here than what is
+		 * done by check_functional_grouping().
+		 */
+		if (con->contype != CONSTRAINT_PRIMARY)
+			continue;
+
+		/*
+		 * Constraint must be non-deferrable, but no point in looking for
+		 * another primary key, as there can only be one, so break;
+		 */
+		if (con->condeferrable)
+			break;
+
+		/* Extract the conkey array, ie, attnums of PK's columns */
+		adatum = heap_getattr(tuple, Anum_pg_constraint_conkey,
+							  RelationGetDescr(pg_constraint), &isNull);
+		if (isNull)
+			elog(ERROR, "null conkey for constraint %u",
+				 HeapTupleGetOid(tuple));
+		arr = DatumGetArrayTypeP(adatum);		/* ensure not toasted */
+		numkeys = ARR_DIMS(arr)[0];
+		if (ARR_NDIM(arr) != 1 ||
+			numkeys < 0 ||
+			ARR_HASNULL(arr) ||
+			ARR_ELEMTYPE(arr) != INT2OID)
+			elog(ERROR, "conkey is not a 1-D smallint array");
+		attnums = (int16 *) ARR_DATA_PTR(arr);
+
+		for (i = 0; i < numkeys; i++)
+		{
+			AttrNumber	attnum = attnums[i];
+			ListCell   *lc;
+			bool found_col = false;
+			int varlistidx = 0;
+
+			foreach(lc, varlist)
+			{
+				Var		   *gvar = (Var *) lfirst(lc);
+
+				if (IsA(gvar, Var) &&
+					gvar->varno == varno &&
+					gvar->varlevelsup == varlevelsup &&
+					gvar->varattno == attnum)
+				{
+					varlistmatches = bms_add_member(varlistmatches, varlistidx);
+					found_col = true;
+					/*
+					 * We can break out of the loop here, as no duplicate Vars
+					 * should exist as these will have been removed during
+					 * parse analysis.
+					 */
+					break;
+				}
+				varlistidx++;
+			}
+
+			if (!found_col)
+			{
+				/*
+				 * No match found for this index key. Since the relation has
+				 * only one PRIMARY KEY we can skip looking through any other
+				 * constraints.
+				 */
+				varlistmatches = NULL;
+				break;
+			}
+		}
+
+		/*
+		 * Did we match all PRIMARY KEY Vars? If so we can return this match.
+		 * There's no need to look for a better match as a relation can only
+		 * have one PRIMARY KEY constraint.
+		 */
+		if (i == numkeys)
+			*constraintOid = HeapTupleGetOid(tuple);
+
+		break;
+	}
+
+	systable_endscan(scan);
+
+	heap_close(pg_constraint, AccessShareLock);
+
+	return varlistmatches;
+}
+
+/*
  * Determine whether a relation can be proven functionally dependent on
  * a set of grouping columns.  If so, return TRUE and add the pg_constraint
  * OIDs of the constraints needed for the proof to the *constraintDeps list.
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index fed1acc..4684ba2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -21,6 +21,7 @@
 #include "access/htup_details.h"
 #include "access/parallel.h"
 #include "access/xact.h"
+#include "catalog/pg_constraint.h"
 #include "executor/executor.h"
 #include "executor/nodeAgg.h"
 #include "foreign/fdwapi.h"
@@ -3160,6 +3161,162 @@ limit_needed(Query *parse)
 	return false;				/* don't need a Limit plan node */
 }
 
+/*
+ * remove_useless_groupby_columns
+ *		Removes columns from the GROUP BY clause which are redundant due to
+ *		being functionally dependant on one or more other column which are
+ *		present.
+ *
+ * We do support non-aggregated columns in the query targetlist which are
+ * functionally dependant on the primary key, but not all relational databases
+ * allow this, so it can be fairly common that queries are written to have the
+ * GROUP BY clause include all of the columns which are not aggregated in the
+ * query's targetlist. These columns have no need to be there as long as the
+ * columns which make up the table's PRIMARY KEY are present in the GROUP BY
+ * clause. Here we test to see if the primary key columns are present for each
+ * relation which make up the columns of the GROUP BY clause, and if so we
+ * remove any which are not part of the primary key.
+ *
+ * In reality we could do more than just the primary key columns here as UNIQUE
+ * indexes with NOT NULL constraints on the columns allow us to also determine
+ * functional dependencies. However we mustn't go any further than what's done
+ * by check_functional_grouping().
+ */
+static void
+remove_useless_groupby_columns(PlannerInfo *root)
+{
+	List		  **relvarlist;
+	Bitmapset	  **surplusvars;
+	ListCell	   *lc;
+	int				relid;
+	bool			anysurplus = false;
+	Query		   *parse = root->parse;
+
+	if (parse->groupingSets)
+		return;
+
+	/* Nothing do to when there's only 1 group by expression */
+	if (list_length(parse->groupClause) < 2)
+		return;
+
+	relvarlist = (List **) palloc0(sizeof(List *) * (list_length(parse->rtable) + 1));
+	surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) * (list_length(parse->rtable) + 1));
+
+	/*
+	 * Pre-process the GROUP BY clause Vars from each relation into
+	 * 'relvarlist' indexed by the relid
+	 */
+	foreach (lc, parse->groupClause)
+	{
+		SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
+		TargetEntry *tle;
+		Var *var;
+
+		tle = get_sortgroupclause_tle(sgc, parse->targetList);
+		var = (Var *) tle->expr;
+
+		/*
+		 * XXX it may be useful to also look at non-Vars here. If the clause
+		 * is an Expr containing Vars from a single relation then we may also
+		 * be able to remove it, providing that we find all PRIMARY KEY columns
+		 * elsewhere in the GROUP BY clause. Perhaps we can improve on this
+		 * later, but for now it seems better to keep it simple.
+		 */
+		if (!IsA(var, Var))
+			continue;
+
+		Assert(var->varno <= list_length(parse->rtable));
+		relvarlist[var->varno] = lappend(relvarlist[var->varno], var);
+	}
+
+	/*
+	 * Look at each rtable relation and check if any Vars were found to belong
+	 * to this relation by the logic above. Note that since identify_key_vars()
+	 * only concerns itself with PRIMARY KEY constraints to prove uniqueness,
+	 * and that since PRIMARY KEYs cannot be based on expressions, then we need
+	 * only concern ourselves here with plain Vars.
+	 */
+	relid = 1;
+	foreach(lc, parse->rtable)
+	{
+		RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
+		List *varlist = relvarlist[relid];
+		Bitmapset *varlistmatches;
+		Oid		  constraintOid;
+		ListCell *lc2;
+
+		/*
+		 * Clearly there's no room for removing any items from the GROUP BY
+		 * clause there are fewer than two Vars belonging to this rel.
+		 */
+		if (varlist == NULL || list_length(varlist) < 2)
+			continue;
+
+		varlistmatches = identify_key_vars(rte->relid, relid, 0, varlist,
+										   &constraintOid);
+
+		/*
+		 * If we have some varlist matches and varlist has additional columns,
+		 * then each of these additional column may be removed from the
+		 * GROUP BY clause. We'll mark the varattno of each variable that's not
+		 * needed.
+		 */
+		if (varlistmatches != NULL &&
+			bms_num_members(varlistmatches) < list_length(varlist))
+		{
+			int varidx = 0;
+			foreach(lc2, varlist)
+			{
+				Var *var = (Var *) lfirst(lc2);
+
+				if (!bms_is_member(varidx, varlistmatches))
+				{
+					surplusvars[relid] = bms_add_member(surplusvars[relid],
+														var->varattno);
+				}
+
+				varidx++;
+			}
+
+			/*
+			 * Mark that the GROUP BY clause needs to be rebuilt. We'll do this
+			 * just once when we've completely processing any other relation's
+			 * columns which are present in the GROUP BY clause.
+			 */
+			anysurplus = true;
+			parse->constraintDeps = lappend_oid(parse->constraintDeps,
+												constraintOid);
+		}
+		relid++;
+	}
+
+	/*
+	 * If we found any surplus Vars in the GROUP BY clause, then we'll build
+	 * a new GROUP BY clause without these surplus Vars.
+	 */
+	if (anysurplus)
+	{
+		List *new_groupby = NIL;
+
+		foreach (lc, root->parse->groupClause)
+		{
+			SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
+			TargetEntry *tle;
+			Var *var;
+
+			tle = get_sortgroupclause_tle(sgc, root->parse->targetList);
+			var = (Var *) tle->expr;
+
+			if (!IsA(var, Var) ||
+				!bms_is_member(var->varattno, surplusvars[var->varno]))
+				new_groupby = lappend(new_groupby, sgc);
+
+			/* XXX do we to alter tleSortGroupRef to remove gaps? */
+		}
+		root->parse->groupClause = new_groupby;
+	}
+}
+
 
 /*
  * preprocess_groupclause - do preparatory work on GROUP BY clause
@@ -3205,6 +3362,12 @@ preprocess_groupclause(PlannerInfo *root, List *force)
 		return new_groupclause;
 	}
 
+	/*
+	 * Remove any GROUP BY clause items which are functionally dependent on
+	 * other clause items
+	 */
+	remove_useless_groupby_columns(root);
+
 	/* If no ORDER BY, nothing useful to do here */
 	if (parse->sortClause == NIL)
 		return parse->groupClause;
diff --git a/src/include/catalog/pg_constraint.h b/src/include/catalog/pg_constraint.h
index 07dbcea..934e8db 100644
--- a/src/include/catalog/pg_constraint.h
+++ b/src/include/catalog/pg_constraint.h
@@ -250,6 +250,9 @@ extern void AlterConstraintNamespaces(Oid ownerId, Oid oldNspId,
 extern Oid	get_relation_constraint_oid(Oid relid, const char *conname, bool missing_ok);
 extern Oid	get_domain_constraint_oid(Oid typid, const char *conname, bool missing_ok);
 
+extern Bitmapset *identify_key_vars(Oid relid, Index varno, Index varlevelsup,
+									List *varlist, Oid *constraintOid);
+
 extern bool check_functional_grouping(Oid relid,
 						  Index varno, Index varlevelsup,
 						  List *grouping_columns,
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 64f046e..1c6c8e6 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3918,19 +3918,19 @@ select d.* from d left join (select distinct * from b) s
 -- not in the join condition
 explain (costs off)
 select d.* from d left join (select * from b group by b.id, b.c_id) s
-  on d.a = s.id;
-                 QUERY PLAN                  
----------------------------------------------
+  on d.a = s.c_id;
+              QUERY PLAN               
+---------------------------------------
  Merge Left Join
-   Merge Cond: (d.a = s.id)
+   Merge Cond: (d.a = s.c_id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
    ->  Sort
-         Sort Key: s.id
+         Sort Key: s.c_id
          ->  Subquery Scan on s
                ->  HashAggregate
-                     Group Key: b.id, b.c_id
+                     Group Key: b.id
                      ->  Seq Scan on b
 (11 rows)
 
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 0358d00..e11e0a5 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1266,7 +1266,7 @@ select d.* from d left join (select distinct * from b) s
 -- not in the join condition
 explain (costs off)
 select d.* from d left join (select * from b group by b.id, b.c_id) s
-  on d.a = s.id;
+  on d.a = s.c_id;
 
 -- similarly, but keying off a DISTINCT clause
 explain (costs off)